1bf215546Sopenharmony_ci#include <float.h>
2bf215546Sopenharmony_ci#include "pipe/p_context.h"
3bf215546Sopenharmony_ci#include "pipe/p_defines.h"
4bf215546Sopenharmony_ci#include "pipe/p_state.h"
5bf215546Sopenharmony_ci#include "util/u_dynarray.h"
6bf215546Sopenharmony_ci#include "util/u_inlines.h"
7bf215546Sopenharmony_ci#include "util/u_debug.h"
8bf215546Sopenharmony_ci#include "util/u_memory.h"
9bf215546Sopenharmony_ci
10bf215546Sopenharmony_ci#include "pipe/p_shader_tokens.h"
11bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h"
12bf215546Sopenharmony_ci#include "tgsi/tgsi_util.h"
13bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h"
14bf215546Sopenharmony_ci#include "tgsi/tgsi_ureg.h"
15bf215546Sopenharmony_ci
16bf215546Sopenharmony_ci#include "nouveau_debug.h"
17bf215546Sopenharmony_ci#include "nv_object.xml.h"
18bf215546Sopenharmony_ci#include "nv30/nv30-40_3d.xml.h"
19bf215546Sopenharmony_ci#include "nv30/nvfx_shader.h"
20bf215546Sopenharmony_ci#include "nv30/nv30_state.h"
21bf215546Sopenharmony_ci
22bf215546Sopenharmony_cistruct nvfx_fpc {
23bf215546Sopenharmony_ci   struct nv30_fragprog *fp;
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci   unsigned max_temps;
26bf215546Sopenharmony_ci   unsigned long long r_temps;
27bf215546Sopenharmony_ci   unsigned long long r_temps_discard;
28bf215546Sopenharmony_ci   struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
29bf215546Sopenharmony_ci   struct nvfx_reg r_input[PIPE_MAX_SHADER_INPUTS];
30bf215546Sopenharmony_ci   struct nvfx_reg *r_temp;
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci   int num_regs;
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci   unsigned inst_offset;
35bf215546Sopenharmony_ci   unsigned have_const;
36bf215546Sopenharmony_ci   unsigned is_nv4x;
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci   struct util_dynarray imm_data;
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci   struct nvfx_reg* r_imm;
41bf215546Sopenharmony_ci   unsigned nr_imm;
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   struct util_dynarray if_stack;
44bf215546Sopenharmony_ci   //struct util_dynarray loop_stack;
45bf215546Sopenharmony_ci   struct util_dynarray label_relocs;
46bf215546Sopenharmony_ci};
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_cistatic inline struct nvfx_reg
49bf215546Sopenharmony_citemp(struct nvfx_fpc *fpc)
50bf215546Sopenharmony_ci{
51bf215546Sopenharmony_ci   int idx = __builtin_ctzll(~fpc->r_temps);
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   if (idx >= fpc->max_temps) {
54bf215546Sopenharmony_ci      NOUVEAU_ERR("out of temps!!\n");
55bf215546Sopenharmony_ci      return nvfx_reg(NVFXSR_TEMP, 0);
56bf215546Sopenharmony_ci   }
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci   fpc->r_temps |= (1ULL << idx);
59bf215546Sopenharmony_ci   fpc->r_temps_discard |= (1ULL << idx);
60bf215546Sopenharmony_ci   return nvfx_reg(NVFXSR_TEMP, idx);
61bf215546Sopenharmony_ci}
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_cistatic inline void
64bf215546Sopenharmony_cirelease_temps(struct nvfx_fpc *fpc)
65bf215546Sopenharmony_ci{
66bf215546Sopenharmony_ci   fpc->r_temps &= ~fpc->r_temps_discard;
67bf215546Sopenharmony_ci   fpc->r_temps_discard = 0ULL;
68bf215546Sopenharmony_ci}
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_cistatic inline struct nvfx_reg
71bf215546Sopenharmony_cinvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d)
72bf215546Sopenharmony_ci{
73bf215546Sopenharmony_ci   float v[4] = {a, b, c, d};
74bf215546Sopenharmony_ci   int idx = fpc->imm_data.size >> 4;
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci   memcpy(util_dynarray_grow(&fpc->imm_data, float, 4), v, 4 * sizeof(float));
77bf215546Sopenharmony_ci   return nvfx_reg(NVFXSR_IMM, idx);
78bf215546Sopenharmony_ci}
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_cistatic void
81bf215546Sopenharmony_cigrow_insns(struct nvfx_fpc *fpc, int size)
82bf215546Sopenharmony_ci{
83bf215546Sopenharmony_ci   struct nv30_fragprog *fp = fpc->fp;
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   fp->insn_len += size;
86bf215546Sopenharmony_ci   fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
87bf215546Sopenharmony_ci}
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_cistatic void
90bf215546Sopenharmony_ciemit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src)
91bf215546Sopenharmony_ci{
92bf215546Sopenharmony_ci   struct nv30_fragprog *fp = fpc->fp;
93bf215546Sopenharmony_ci   uint32_t *hw = &fp->insn[fpc->inst_offset];
94bf215546Sopenharmony_ci   uint32_t sr = 0;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci   switch (src.reg.type) {
97bf215546Sopenharmony_ci   case NVFXSR_INPUT:
98bf215546Sopenharmony_ci      sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
99bf215546Sopenharmony_ci      hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
100bf215546Sopenharmony_ci      break;
101bf215546Sopenharmony_ci   case NVFXSR_OUTPUT:
102bf215546Sopenharmony_ci      sr |= NVFX_FP_REG_SRC_HALF;
103bf215546Sopenharmony_ci      FALLTHROUGH;
104bf215546Sopenharmony_ci   case NVFXSR_TEMP:
105bf215546Sopenharmony_ci      sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
106bf215546Sopenharmony_ci      sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT);
107bf215546Sopenharmony_ci      break;
108bf215546Sopenharmony_ci   case NVFXSR_IMM:
109bf215546Sopenharmony_ci      if (!fpc->have_const) {
110bf215546Sopenharmony_ci         grow_insns(fpc, 4);
111bf215546Sopenharmony_ci         hw = &fp->insn[fpc->inst_offset];
112bf215546Sopenharmony_ci         fpc->have_const = 1;
113bf215546Sopenharmony_ci      }
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci      memcpy(&fp->insn[fpc->inst_offset + 4],
116bf215546Sopenharmony_ci            (float*)fpc->imm_data.data + src.reg.index * 4,
117bf215546Sopenharmony_ci            sizeof(uint32_t) * 4);
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci      sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
120bf215546Sopenharmony_ci      break;
121bf215546Sopenharmony_ci   case NVFXSR_CONST:
122bf215546Sopenharmony_ci      if (!fpc->have_const) {
123bf215546Sopenharmony_ci         grow_insns(fpc, 4);
124bf215546Sopenharmony_ci         hw = &fp->insn[fpc->inst_offset];
125bf215546Sopenharmony_ci         fpc->have_const = 1;
126bf215546Sopenharmony_ci      }
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci      {
129bf215546Sopenharmony_ci         struct nv30_fragprog_data *fpd;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci         fp->consts = realloc(fp->consts, ++fp->nr_consts *
132bf215546Sopenharmony_ci                    sizeof(*fpd));
133bf215546Sopenharmony_ci         fpd = &fp->consts[fp->nr_consts - 1];
134bf215546Sopenharmony_ci         fpd->offset = fpc->inst_offset + 4;
135bf215546Sopenharmony_ci         fpd->index = src.reg.index;
136bf215546Sopenharmony_ci         memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
137bf215546Sopenharmony_ci      }
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci      sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
140bf215546Sopenharmony_ci      break;
141bf215546Sopenharmony_ci   case NVFXSR_NONE:
142bf215546Sopenharmony_ci      sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
143bf215546Sopenharmony_ci      break;
144bf215546Sopenharmony_ci   default:
145bf215546Sopenharmony_ci      assert(0);
146bf215546Sopenharmony_ci   }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   if (src.negate)
149bf215546Sopenharmony_ci      sr |= NVFX_FP_REG_NEGATE;
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci   if (src.abs)
152bf215546Sopenharmony_ci      hw[1] |= (1 << (29 + pos));
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
155bf215546Sopenharmony_ci          (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
156bf215546Sopenharmony_ci          (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
157bf215546Sopenharmony_ci          (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci   hw[pos + 1] |= sr;
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_cistatic void
163bf215546Sopenharmony_ciemit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst)
164bf215546Sopenharmony_ci{
165bf215546Sopenharmony_ci   struct nv30_fragprog *fp = fpc->fp;
166bf215546Sopenharmony_ci   uint32_t *hw = &fp->insn[fpc->inst_offset];
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   switch (dst.type) {
169bf215546Sopenharmony_ci   case NVFXSR_OUTPUT:
170bf215546Sopenharmony_ci      if (dst.index == 1)
171bf215546Sopenharmony_ci         fp->fp_control |= 0x0000000e;
172bf215546Sopenharmony_ci      else {
173bf215546Sopenharmony_ci         hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
174bf215546Sopenharmony_ci         dst.index <<= 1;
175bf215546Sopenharmony_ci      }
176bf215546Sopenharmony_ci      FALLTHROUGH;
177bf215546Sopenharmony_ci   case NVFXSR_TEMP:
178bf215546Sopenharmony_ci      if (fpc->num_regs < (dst.index + 1))
179bf215546Sopenharmony_ci         fpc->num_regs = dst.index + 1;
180bf215546Sopenharmony_ci      break;
181bf215546Sopenharmony_ci   case NVFXSR_NONE:
182bf215546Sopenharmony_ci      hw[0] |= (1 << 30);
183bf215546Sopenharmony_ci      break;
184bf215546Sopenharmony_ci   default:
185bf215546Sopenharmony_ci      assert(0);
186bf215546Sopenharmony_ci   }
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci   hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
189bf215546Sopenharmony_ci}
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_cistatic void
192bf215546Sopenharmony_cinvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn)
193bf215546Sopenharmony_ci{
194bf215546Sopenharmony_ci   struct nv30_fragprog *fp = fpc->fp;
195bf215546Sopenharmony_ci   uint32_t *hw;
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   fpc->inst_offset = fp->insn_len;
198bf215546Sopenharmony_ci   fpc->have_const = 0;
199bf215546Sopenharmony_ci   grow_insns(fpc, 4);
200bf215546Sopenharmony_ci   hw = &fp->insn[fpc->inst_offset];
201bf215546Sopenharmony_ci   memset(hw, 0, sizeof(uint32_t) * 4);
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   if (insn.op == NVFX_FP_OP_OPCODE_KIL)
204bf215546Sopenharmony_ci      fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL;
205bf215546Sopenharmony_ci   hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT);
206bf215546Sopenharmony_ci   hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT);
207bf215546Sopenharmony_ci   hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT);
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   if (insn.sat)
210bf215546Sopenharmony_ci      hw[0] |= NVFX_FP_OP_OUT_SAT;
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   if (insn.cc_update)
213bf215546Sopenharmony_ci      hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
214bf215546Sopenharmony_ci   hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT);
215bf215546Sopenharmony_ci   hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
216bf215546Sopenharmony_ci        (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
217bf215546Sopenharmony_ci        (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
218bf215546Sopenharmony_ci        (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci   if(insn.unit >= 0)
221bf215546Sopenharmony_ci   {
222bf215546Sopenharmony_ci      hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
223bf215546Sopenharmony_ci   }
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   emit_dst(fpc, insn.dst);
226bf215546Sopenharmony_ci   emit_src(fpc, 0, insn.src[0]);
227bf215546Sopenharmony_ci   emit_src(fpc, 1, insn.src[1]);
228bf215546Sopenharmony_ci   emit_src(fpc, 2, insn.src[2]);
229bf215546Sopenharmony_ci}
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci#define arith(s,o,d,m,s0,s1,s2) \
232bf215546Sopenharmony_ci       nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \
233bf215546Sopenharmony_ci                       (d), (m), (s0), (s1), (s2))
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci#define tex(s,o,u,d,m,s0,s1,s2) \
236bf215546Sopenharmony_ci   nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \
237bf215546Sopenharmony_ci                   (d), (m), (s0), none, none)
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci/* IF src.x != 0, as TGSI specifies */
240bf215546Sopenharmony_cistatic void
241bf215546Sopenharmony_cinv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src)
242bf215546Sopenharmony_ci{
243bf215546Sopenharmony_ci   const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
244bf215546Sopenharmony_ci   struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none);
245bf215546Sopenharmony_ci   uint32_t *hw;
246bf215546Sopenharmony_ci   insn.cc_update = 1;
247bf215546Sopenharmony_ci   nvfx_fp_emit(fpc, insn);
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci   fpc->inst_offset = fpc->fp->insn_len;
250bf215546Sopenharmony_ci   grow_insns(fpc, 4);
251bf215546Sopenharmony_ci   hw = &fpc->fp->insn[fpc->inst_offset];
252bf215546Sopenharmony_ci   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
253bf215546Sopenharmony_ci   hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
254bf215546Sopenharmony_ci      NV40_FP_OP_OUT_NONE |
255bf215546Sopenharmony_ci      (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
256bf215546Sopenharmony_ci   /* Use .xxxx swizzle so that we check only src[0].x*/
257bf215546Sopenharmony_ci   hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
258bf215546Sopenharmony_ci         (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
259bf215546Sopenharmony_ci         (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
260bf215546Sopenharmony_ci         (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) |
261bf215546Sopenharmony_ci         (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT);
262bf215546Sopenharmony_ci   hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */
263bf215546Sopenharmony_ci   hw[3] = 0; /* | endif_offset */
264bf215546Sopenharmony_ci   util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset);
265bf215546Sopenharmony_ci}
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci/* IF src.x != 0, as TGSI specifies */
268bf215546Sopenharmony_cistatic void
269bf215546Sopenharmony_cinv40_fp_cal(struct nvfx_fpc *fpc, unsigned target)
270bf215546Sopenharmony_ci{
271bf215546Sopenharmony_ci        struct nvfx_relocation reloc;
272bf215546Sopenharmony_ci        uint32_t *hw;
273bf215546Sopenharmony_ci        fpc->inst_offset = fpc->fp->insn_len;
274bf215546Sopenharmony_ci        grow_insns(fpc, 4);
275bf215546Sopenharmony_ci        hw = &fpc->fp->insn[fpc->inst_offset];
276bf215546Sopenharmony_ci        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
277bf215546Sopenharmony_ci        hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT);
278bf215546Sopenharmony_ci        /* Use .xxxx swizzle so that we check only src[0].x*/
279bf215546Sopenharmony_ci        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
280bf215546Sopenharmony_ci                        (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
281bf215546Sopenharmony_ci        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
282bf215546Sopenharmony_ci        hw[3] = 0;
283bf215546Sopenharmony_ci        reloc.target = target;
284bf215546Sopenharmony_ci        reloc.location = fpc->inst_offset + 2;
285bf215546Sopenharmony_ci        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
286bf215546Sopenharmony_ci}
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_cistatic void
289bf215546Sopenharmony_cinv40_fp_ret(struct nvfx_fpc *fpc)
290bf215546Sopenharmony_ci{
291bf215546Sopenharmony_ci   uint32_t *hw;
292bf215546Sopenharmony_ci   fpc->inst_offset = fpc->fp->insn_len;
293bf215546Sopenharmony_ci   grow_insns(fpc, 4);
294bf215546Sopenharmony_ci   hw = &fpc->fp->insn[fpc->inst_offset];
295bf215546Sopenharmony_ci   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
296bf215546Sopenharmony_ci   hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT);
297bf215546Sopenharmony_ci   /* Use .xxxx swizzle so that we check only src[0].x*/
298bf215546Sopenharmony_ci   hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
299bf215546Sopenharmony_ci         (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
300bf215546Sopenharmony_ci   hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
301bf215546Sopenharmony_ci   hw[3] = 0;
302bf215546Sopenharmony_ci}
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_cistatic void
305bf215546Sopenharmony_cinv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target)
306bf215546Sopenharmony_ci{
307bf215546Sopenharmony_ci        struct nvfx_relocation reloc;
308bf215546Sopenharmony_ci        uint32_t *hw;
309bf215546Sopenharmony_ci        fpc->inst_offset = fpc->fp->insn_len;
310bf215546Sopenharmony_ci        grow_insns(fpc, 4);
311bf215546Sopenharmony_ci        hw = &fpc->fp->insn[fpc->inst_offset];
312bf215546Sopenharmony_ci        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
313bf215546Sopenharmony_ci        hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) |
314bf215546Sopenharmony_ci                        NV40_FP_OP_OUT_NONE |
315bf215546Sopenharmony_ci                        (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
316bf215546Sopenharmony_ci        /* Use .xxxx swizzle so that we check only src[0].x*/
317bf215546Sopenharmony_ci        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
318bf215546Sopenharmony_ci                        (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
319bf215546Sopenharmony_ci        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH |
320bf215546Sopenharmony_ci                        (count << NV40_FP_OP_REP_COUNT1_SHIFT) |
321bf215546Sopenharmony_ci                        (count << NV40_FP_OP_REP_COUNT2_SHIFT) |
322bf215546Sopenharmony_ci                        (count << NV40_FP_OP_REP_COUNT3_SHIFT);
323bf215546Sopenharmony_ci        hw[3] = 0; /* | end_offset */
324bf215546Sopenharmony_ci        reloc.target = target;
325bf215546Sopenharmony_ci        reloc.location = fpc->inst_offset + 3;
326bf215546Sopenharmony_ci        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
327bf215546Sopenharmony_ci        //util_dynarray_append(&fpc->loop_stack, unsigned, target);
328bf215546Sopenharmony_ci}
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci#if 0
331bf215546Sopenharmony_ci/* documentation only */
332bf215546Sopenharmony_ci/* warning: this only works forward, and probably only if not inside any IF */
333bf215546Sopenharmony_cistatic void
334bf215546Sopenharmony_cinv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
335bf215546Sopenharmony_ci{
336bf215546Sopenharmony_ci        struct nvfx_relocation reloc;
337bf215546Sopenharmony_ci        uint32_t *hw;
338bf215546Sopenharmony_ci        fpc->inst_offset = fpc->fp->insn_len;
339bf215546Sopenharmony_ci        grow_insns(fpc, 4);
340bf215546Sopenharmony_ci        hw = &fpc->fp->insn[fpc->inst_offset];
341bf215546Sopenharmony_ci        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
342bf215546Sopenharmony_ci        hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
343bf215546Sopenharmony_ci                NV40_FP_OP_OUT_NONE |
344bf215546Sopenharmony_ci                (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
345bf215546Sopenharmony_ci        /* Use .xxxx swizzle so that we check only src[0].x*/
346bf215546Sopenharmony_ci        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
347bf215546Sopenharmony_ci                        (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT);
348bf215546Sopenharmony_ci        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */
349bf215546Sopenharmony_ci        hw[3] = 0; /* | endif_offset */
350bf215546Sopenharmony_ci        reloc.target = target;
351bf215546Sopenharmony_ci        reloc.location = fpc->inst_offset + 2;
352bf215546Sopenharmony_ci        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
353bf215546Sopenharmony_ci        reloc.target = target;
354bf215546Sopenharmony_ci        reloc.location = fpc->inst_offset + 3;
355bf215546Sopenharmony_ci        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
356bf215546Sopenharmony_ci}
357bf215546Sopenharmony_ci#endif
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_cistatic void
360bf215546Sopenharmony_cinv40_fp_brk(struct nvfx_fpc *fpc)
361bf215546Sopenharmony_ci{
362bf215546Sopenharmony_ci   uint32_t *hw;
363bf215546Sopenharmony_ci   fpc->inst_offset = fpc->fp->insn_len;
364bf215546Sopenharmony_ci   grow_insns(fpc, 4);
365bf215546Sopenharmony_ci   hw = &fpc->fp->insn[fpc->inst_offset];
366bf215546Sopenharmony_ci   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
367bf215546Sopenharmony_ci   hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) |
368bf215546Sopenharmony_ci      NV40_FP_OP_OUT_NONE;
369bf215546Sopenharmony_ci   /* Use .xxxx swizzle so that we check only src[0].x*/
370bf215546Sopenharmony_ci   hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
371bf215546Sopenharmony_ci         (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
372bf215546Sopenharmony_ci   hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH;
373bf215546Sopenharmony_ci   hw[3] = 0;
374bf215546Sopenharmony_ci}
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_cistatic inline struct nvfx_src
377bf215546Sopenharmony_citgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
378bf215546Sopenharmony_ci{
379bf215546Sopenharmony_ci   struct nvfx_src src;
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   switch (fsrc->Register.File) {
382bf215546Sopenharmony_ci   case TGSI_FILE_INPUT:
383bf215546Sopenharmony_ci      src.reg = fpc->r_input[fsrc->Register.Index];
384bf215546Sopenharmony_ci      break;
385bf215546Sopenharmony_ci   case TGSI_FILE_CONSTANT:
386bf215546Sopenharmony_ci      src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index);
387bf215546Sopenharmony_ci      break;
388bf215546Sopenharmony_ci   case TGSI_FILE_IMMEDIATE:
389bf215546Sopenharmony_ci      assert(fsrc->Register.Index < fpc->nr_imm);
390bf215546Sopenharmony_ci      src.reg = fpc->r_imm[fsrc->Register.Index];
391bf215546Sopenharmony_ci      break;
392bf215546Sopenharmony_ci   case TGSI_FILE_TEMPORARY:
393bf215546Sopenharmony_ci      src.reg = fpc->r_temp[fsrc->Register.Index];
394bf215546Sopenharmony_ci      break;
395bf215546Sopenharmony_ci   /* NV40 fragprog result regs are just temps, so this is simple */
396bf215546Sopenharmony_ci   case TGSI_FILE_OUTPUT:
397bf215546Sopenharmony_ci      src.reg = fpc->r_result[fsrc->Register.Index];
398bf215546Sopenharmony_ci      break;
399bf215546Sopenharmony_ci   default:
400bf215546Sopenharmony_ci      NOUVEAU_ERR("bad src file\n");
401bf215546Sopenharmony_ci      src.reg.index = 0;
402bf215546Sopenharmony_ci      src.reg.type = 0;
403bf215546Sopenharmony_ci      break;
404bf215546Sopenharmony_ci   }
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci   src.abs = fsrc->Register.Absolute;
407bf215546Sopenharmony_ci   src.negate = fsrc->Register.Negate;
408bf215546Sopenharmony_ci   src.swz[0] = fsrc->Register.SwizzleX;
409bf215546Sopenharmony_ci   src.swz[1] = fsrc->Register.SwizzleY;
410bf215546Sopenharmony_ci   src.swz[2] = fsrc->Register.SwizzleZ;
411bf215546Sopenharmony_ci   src.swz[3] = fsrc->Register.SwizzleW;
412bf215546Sopenharmony_ci   src.indirect = 0;
413bf215546Sopenharmony_ci   src.indirect_reg = 0;
414bf215546Sopenharmony_ci   src.indirect_swz = 0;
415bf215546Sopenharmony_ci   return src;
416bf215546Sopenharmony_ci}
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_cistatic inline struct nvfx_reg
419bf215546Sopenharmony_citgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
420bf215546Sopenharmony_ci   switch (fdst->Register.File) {
421bf215546Sopenharmony_ci   case TGSI_FILE_OUTPUT:
422bf215546Sopenharmony_ci      return fpc->r_result[fdst->Register.Index];
423bf215546Sopenharmony_ci   case TGSI_FILE_TEMPORARY:
424bf215546Sopenharmony_ci      return fpc->r_temp[fdst->Register.Index];
425bf215546Sopenharmony_ci   case TGSI_FILE_NULL:
426bf215546Sopenharmony_ci      return nvfx_reg(NVFXSR_NONE, 0);
427bf215546Sopenharmony_ci   default:
428bf215546Sopenharmony_ci      NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
429bf215546Sopenharmony_ci      return nvfx_reg(NVFXSR_NONE, 0);
430bf215546Sopenharmony_ci   }
431bf215546Sopenharmony_ci}
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_cistatic inline int
434bf215546Sopenharmony_citgsi_mask(uint tgsi)
435bf215546Sopenharmony_ci{
436bf215546Sopenharmony_ci   int mask = 0;
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci   if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
439bf215546Sopenharmony_ci   if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
440bf215546Sopenharmony_ci   if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
441bf215546Sopenharmony_ci   if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
442bf215546Sopenharmony_ci   return mask;
443bf215546Sopenharmony_ci}
444bf215546Sopenharmony_ci
445bf215546Sopenharmony_cistatic bool
446bf215546Sopenharmony_cinvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
447bf215546Sopenharmony_ci            const struct tgsi_full_instruction *finst)
448bf215546Sopenharmony_ci{
449bf215546Sopenharmony_ci   const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
450bf215546Sopenharmony_ci   struct nvfx_insn insn;
451bf215546Sopenharmony_ci   struct nvfx_src src[3], tmp;
452bf215546Sopenharmony_ci   struct nvfx_reg dst;
453bf215546Sopenharmony_ci   int mask, sat, unit = 0;
454bf215546Sopenharmony_ci   int ai = -1, ci = -1, ii = -1;
455bf215546Sopenharmony_ci   int i;
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   if (finst->Instruction.Opcode == TGSI_OPCODE_END)
458bf215546Sopenharmony_ci      return true;
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
461bf215546Sopenharmony_ci      const struct tgsi_full_src_register *fsrc;
462bf215546Sopenharmony_ci
463bf215546Sopenharmony_ci      fsrc = &finst->Src[i];
464bf215546Sopenharmony_ci      if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
465bf215546Sopenharmony_ci         src[i] = tgsi_src(fpc, fsrc);
466bf215546Sopenharmony_ci      }
467bf215546Sopenharmony_ci   }
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
470bf215546Sopenharmony_ci      const struct tgsi_full_src_register *fsrc;
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci      fsrc = &finst->Src[i];
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci      switch (fsrc->Register.File) {
475bf215546Sopenharmony_ci      case TGSI_FILE_INPUT:
476bf215546Sopenharmony_ci         if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0
477bf215546Sopenharmony_ci               || fsrc->Register.SwizzleX == PIPE_SWIZZLE_W
478bf215546Sopenharmony_ci               || fsrc->Register.SwizzleY == PIPE_SWIZZLE_W
479bf215546Sopenharmony_ci               || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_W
480bf215546Sopenharmony_ci               || fsrc->Register.SwizzleW == PIPE_SWIZZLE_W
481bf215546Sopenharmony_ci               )) {
482bf215546Sopenharmony_ci            /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */
483bf215546Sopenharmony_ci            struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1));
484bf215546Sopenharmony_ci            addend.swz[0] = fsrc->Register.SwizzleX;
485bf215546Sopenharmony_ci            addend.swz[1] = fsrc->Register.SwizzleY;
486bf215546Sopenharmony_ci            addend.swz[2] = fsrc->Register.SwizzleZ;
487bf215546Sopenharmony_ci            addend.swz[3] = fsrc->Register.SwizzleW;
488bf215546Sopenharmony_ci            src[i] = nvfx_src(temp(fpc));
489bf215546Sopenharmony_ci            nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none));
490bf215546Sopenharmony_ci         } else if (ai == -1 || ai == fsrc->Register.Index) {
491bf215546Sopenharmony_ci            ai = fsrc->Register.Index;
492bf215546Sopenharmony_ci            src[i] = tgsi_src(fpc, fsrc);
493bf215546Sopenharmony_ci         } else {
494bf215546Sopenharmony_ci            src[i] = nvfx_src(temp(fpc));
495bf215546Sopenharmony_ci            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
496bf215546Sopenharmony_ci         }
497bf215546Sopenharmony_ci         break;
498bf215546Sopenharmony_ci      case TGSI_FILE_CONSTANT:
499bf215546Sopenharmony_ci         if ((ci == -1 && ii == -1) ||
500bf215546Sopenharmony_ci             ci == fsrc->Register.Index) {
501bf215546Sopenharmony_ci            ci = fsrc->Register.Index;
502bf215546Sopenharmony_ci            src[i] = tgsi_src(fpc, fsrc);
503bf215546Sopenharmony_ci         } else {
504bf215546Sopenharmony_ci            src[i] = nvfx_src(temp(fpc));
505bf215546Sopenharmony_ci            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
506bf215546Sopenharmony_ci         }
507bf215546Sopenharmony_ci         break;
508bf215546Sopenharmony_ci      case TGSI_FILE_IMMEDIATE:
509bf215546Sopenharmony_ci         if ((ci == -1 && ii == -1) ||
510bf215546Sopenharmony_ci             ii == fsrc->Register.Index) {
511bf215546Sopenharmony_ci            ii = fsrc->Register.Index;
512bf215546Sopenharmony_ci            src[i] = tgsi_src(fpc, fsrc);
513bf215546Sopenharmony_ci         } else {
514bf215546Sopenharmony_ci            src[i] = nvfx_src(temp(fpc));
515bf215546Sopenharmony_ci            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
516bf215546Sopenharmony_ci         }
517bf215546Sopenharmony_ci         break;
518bf215546Sopenharmony_ci      case TGSI_FILE_TEMPORARY:
519bf215546Sopenharmony_ci         /* handled above */
520bf215546Sopenharmony_ci         break;
521bf215546Sopenharmony_ci      case TGSI_FILE_SAMPLER:
522bf215546Sopenharmony_ci         unit = fsrc->Register.Index;
523bf215546Sopenharmony_ci         break;
524bf215546Sopenharmony_ci      case TGSI_FILE_OUTPUT:
525bf215546Sopenharmony_ci         break;
526bf215546Sopenharmony_ci      default:
527bf215546Sopenharmony_ci         NOUVEAU_ERR("bad src file\n");
528bf215546Sopenharmony_ci         return false;
529bf215546Sopenharmony_ci      }
530bf215546Sopenharmony_ci   }
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   dst  = tgsi_dst(fpc, &finst->Dst[0]);
533bf215546Sopenharmony_ci   mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
534bf215546Sopenharmony_ci   sat  = finst->Instruction.Saturate;
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci   switch (finst->Instruction.Opcode) {
537bf215546Sopenharmony_ci   case TGSI_OPCODE_ADD:
538bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
539bf215546Sopenharmony_ci      break;
540bf215546Sopenharmony_ci   case TGSI_OPCODE_CEIL:
541bf215546Sopenharmony_ci      tmp = nvfx_src(temp(fpc));
542bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none));
543bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none));
544bf215546Sopenharmony_ci      break;
545bf215546Sopenharmony_ci   case TGSI_OPCODE_CMP:
546bf215546Sopenharmony_ci      insn = arith(0, MOV, none.reg, mask, src[0], none, none);
547bf215546Sopenharmony_ci      insn.cc_update = 1;
548bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, insn);
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ci      insn = arith(sat, MOV, dst, mask, src[2], none, none);
551bf215546Sopenharmony_ci      insn.cc_test = NVFX_COND_GE;
552bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, insn);
553bf215546Sopenharmony_ci
554bf215546Sopenharmony_ci      insn = arith(sat, MOV, dst, mask, src[1], none, none);
555bf215546Sopenharmony_ci      insn.cc_test = NVFX_COND_LT;
556bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, insn);
557bf215546Sopenharmony_ci      break;
558bf215546Sopenharmony_ci   case TGSI_OPCODE_COS:
559bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none));
560bf215546Sopenharmony_ci      break;
561bf215546Sopenharmony_ci   case TGSI_OPCODE_DDX:
562bf215546Sopenharmony_ci      if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
563bf215546Sopenharmony_ci         tmp = nvfx_src(temp(fpc));
564bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
565bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
566bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
567bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
568bf215546Sopenharmony_ci      } else {
569bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none));
570bf215546Sopenharmony_ci      }
571bf215546Sopenharmony_ci      break;
572bf215546Sopenharmony_ci   case TGSI_OPCODE_DDY:
573bf215546Sopenharmony_ci      if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
574bf215546Sopenharmony_ci         tmp = nvfx_src(temp(fpc));
575bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
576bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
577bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
578bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
579bf215546Sopenharmony_ci      } else {
580bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none));
581bf215546Sopenharmony_ci      }
582bf215546Sopenharmony_ci      break;
583bf215546Sopenharmony_ci   case TGSI_OPCODE_DP2:
584bf215546Sopenharmony_ci      tmp = nvfx_src(temp(fpc));
585bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none));
586bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
587bf215546Sopenharmony_ci      break;
588bf215546Sopenharmony_ci   case TGSI_OPCODE_DP3:
589bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none));
590bf215546Sopenharmony_ci      break;
591bf215546Sopenharmony_ci   case TGSI_OPCODE_DP4:
592bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
593bf215546Sopenharmony_ci      break;
594bf215546Sopenharmony_ci   case TGSI_OPCODE_DST:
595bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
596bf215546Sopenharmony_ci      break;
597bf215546Sopenharmony_ci   case TGSI_OPCODE_EX2:
598bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none));
599bf215546Sopenharmony_ci      break;
600bf215546Sopenharmony_ci   case TGSI_OPCODE_FLR:
601bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none));
602bf215546Sopenharmony_ci      break;
603bf215546Sopenharmony_ci   case TGSI_OPCODE_FRC:
604bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none));
605bf215546Sopenharmony_ci      break;
606bf215546Sopenharmony_ci   case TGSI_OPCODE_KILL:
607bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none));
608bf215546Sopenharmony_ci      break;
609bf215546Sopenharmony_ci   case TGSI_OPCODE_KILL_IF:
610bf215546Sopenharmony_ci      insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none);
611bf215546Sopenharmony_ci      insn.cc_update = 1;
612bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, insn);
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci      insn = arith(0, KIL, none.reg, 0, none, none, none);
615bf215546Sopenharmony_ci      insn.cc_test = NVFX_COND_LT;
616bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, insn);
617bf215546Sopenharmony_ci      break;
618bf215546Sopenharmony_ci   case TGSI_OPCODE_LG2:
619bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
620bf215546Sopenharmony_ci      break;
621bf215546Sopenharmony_ci   case TGSI_OPCODE_LIT:
622bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
623bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none));
624bf215546Sopenharmony_ci      else {
625bf215546Sopenharmony_ci         /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by
626bf215546Sopenharmony_ci          * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement
627bf215546Sopenharmony_ci          *
628bf215546Sopenharmony_ci          * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead
629bf215546Sopenharmony_ci          */
630bf215546Sopenharmony_ci         struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0));
631bf215546Sopenharmony_ci         tmp = nvfx_src(temp(fpc));
632bf215546Sopenharmony_ci         if (ci>= 0 || ii >= 0) {
633bf215546Sopenharmony_ci            nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none));
634bf215546Sopenharmony_ci            maxs = tmp;
635bf215546Sopenharmony_ci         }
636bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none));
637bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none));
638bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none));
639bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none));
640bf215546Sopenharmony_ci      }
641bf215546Sopenharmony_ci      break;
642bf215546Sopenharmony_ci   case TGSI_OPCODE_LRP:
643bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
644bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));
645bf215546Sopenharmony_ci      else {
646bf215546Sopenharmony_ci         tmp = nvfx_src(temp(fpc));
647bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
648bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp));
649bf215546Sopenharmony_ci      }
650bf215546Sopenharmony_ci      break;
651bf215546Sopenharmony_ci   case TGSI_OPCODE_MAD:
652bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2]));
653bf215546Sopenharmony_ci      break;
654bf215546Sopenharmony_ci   case TGSI_OPCODE_MAX:
655bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none));
656bf215546Sopenharmony_ci      break;
657bf215546Sopenharmony_ci   case TGSI_OPCODE_MIN:
658bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none));
659bf215546Sopenharmony_ci      break;
660bf215546Sopenharmony_ci   case TGSI_OPCODE_MOV:
661bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none));
662bf215546Sopenharmony_ci      break;
663bf215546Sopenharmony_ci   case TGSI_OPCODE_MUL:
664bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none));
665bf215546Sopenharmony_ci      break;
666bf215546Sopenharmony_ci   case TGSI_OPCODE_NOP:
667bf215546Sopenharmony_ci      break;
668bf215546Sopenharmony_ci   case TGSI_OPCODE_POW:
669bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
670bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none));
671bf215546Sopenharmony_ci      else {
672bf215546Sopenharmony_ci         tmp = nvfx_src(temp(fpc));
673bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
674bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
675bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none));
676bf215546Sopenharmony_ci      }
677bf215546Sopenharmony_ci      break;
678bf215546Sopenharmony_ci   case TGSI_OPCODE_RCP:
679bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none));
680bf215546Sopenharmony_ci      break;
681bf215546Sopenharmony_ci   case TGSI_OPCODE_RSQ:
682bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
683bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none));
684bf215546Sopenharmony_ci      else {
685bf215546Sopenharmony_ci         tmp = nvfx_src(temp(fpc));
686bf215546Sopenharmony_ci         insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none);
687bf215546Sopenharmony_ci         insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X;
688bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, insn);
689bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
690bf215546Sopenharmony_ci      }
691bf215546Sopenharmony_ci      break;
692bf215546Sopenharmony_ci   case TGSI_OPCODE_SEQ:
693bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
694bf215546Sopenharmony_ci      break;
695bf215546Sopenharmony_ci   case TGSI_OPCODE_SGE:
696bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none));
697bf215546Sopenharmony_ci      break;
698bf215546Sopenharmony_ci   case TGSI_OPCODE_SGT:
699bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none));
700bf215546Sopenharmony_ci      break;
701bf215546Sopenharmony_ci   case TGSI_OPCODE_SIN:
702bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none));
703bf215546Sopenharmony_ci      break;
704bf215546Sopenharmony_ci   case TGSI_OPCODE_SLE:
705bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none));
706bf215546Sopenharmony_ci      break;
707bf215546Sopenharmony_ci   case TGSI_OPCODE_SLT:
708bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none));
709bf215546Sopenharmony_ci      break;
710bf215546Sopenharmony_ci   case TGSI_OPCODE_SNE:
711bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none));
712bf215546Sopenharmony_ci      break;
713bf215546Sopenharmony_ci   case TGSI_OPCODE_SSG:
714bf215546Sopenharmony_ci   {
715bf215546Sopenharmony_ci      struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X);
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci      insn = arith(sat, MOV, dst, mask, src[0], none, none);
718bf215546Sopenharmony_ci      insn.cc_update = 1;
719bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, insn);
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci      insn = arith(0, STR, dst, mask, none, none, none);
722bf215546Sopenharmony_ci      insn.cc_test = NVFX_COND_GT;
723bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, insn);
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci      if(!sat) {
726bf215546Sopenharmony_ci         insn = arith(0, MOV, dst, mask, minones, none, none);
727bf215546Sopenharmony_ci         insn.cc_test = NVFX_COND_LT;
728bf215546Sopenharmony_ci         nvfx_fp_emit(fpc, insn);
729bf215546Sopenharmony_ci      }
730bf215546Sopenharmony_ci      break;
731bf215546Sopenharmony_ci   }
732bf215546Sopenharmony_ci   case TGSI_OPCODE_TEX:
733bf215546Sopenharmony_ci      nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
734bf215546Sopenharmony_ci      break;
735bf215546Sopenharmony_ci        case TGSI_OPCODE_TRUNC:
736bf215546Sopenharmony_ci                tmp = nvfx_src(temp(fpc));
737bf215546Sopenharmony_ci                insn = arith(0, MOV, none.reg, mask, src[0], none, none);
738bf215546Sopenharmony_ci                insn.cc_update = 1;
739bf215546Sopenharmony_ci                nvfx_fp_emit(fpc, insn);
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci                nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none));
742bf215546Sopenharmony_ci                nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none));
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci                insn = arith(sat, MOV, dst, mask, neg(tmp), none, none);
745bf215546Sopenharmony_ci                insn.cc_test = NVFX_COND_LT;
746bf215546Sopenharmony_ci                nvfx_fp_emit(fpc, insn);
747bf215546Sopenharmony_ci                break;
748bf215546Sopenharmony_ci        case TGSI_OPCODE_TXB:
749bf215546Sopenharmony_ci                nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none));
750bf215546Sopenharmony_ci                break;
751bf215546Sopenharmony_ci        case TGSI_OPCODE_TXL:
752bf215546Sopenharmony_ci                if(fpc->is_nv4x)
753bf215546Sopenharmony_ci                        nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none));
754bf215546Sopenharmony_ci                else /* unsupported on nv30, use TEX and hope they like it */
755bf215546Sopenharmony_ci                        nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
756bf215546Sopenharmony_ci                break;
757bf215546Sopenharmony_ci        case TGSI_OPCODE_TXP:
758bf215546Sopenharmony_ci                nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
759bf215546Sopenharmony_ci                break;
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci   case TGSI_OPCODE_IF:
762bf215546Sopenharmony_ci      // MOVRC0 R31 (TR0.xyzw), R<src>:
763bf215546Sopenharmony_ci      // IF (NE.xxxx) ELSE <else> END <end>
764bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
765bf215546Sopenharmony_ci         goto nv3x_cflow;
766bf215546Sopenharmony_ci      nv40_fp_if(fpc, src[0]);
767bf215546Sopenharmony_ci      break;
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci   case TGSI_OPCODE_ELSE:
770bf215546Sopenharmony_ci   {
771bf215546Sopenharmony_ci      uint32_t *hw;
772bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
773bf215546Sopenharmony_ci         goto nv3x_cflow;
774bf215546Sopenharmony_ci      assert(util_dynarray_contains(&fpc->if_stack, unsigned));
775bf215546Sopenharmony_ci      hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)];
776bf215546Sopenharmony_ci      hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
777bf215546Sopenharmony_ci      break;
778bf215546Sopenharmony_ci   }
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci   case TGSI_OPCODE_ENDIF:
781bf215546Sopenharmony_ci   {
782bf215546Sopenharmony_ci      uint32_t *hw;
783bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
784bf215546Sopenharmony_ci         goto nv3x_cflow;
785bf215546Sopenharmony_ci      assert(util_dynarray_contains(&fpc->if_stack, unsigned));
786bf215546Sopenharmony_ci      hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)];
787bf215546Sopenharmony_ci      if(!hw[2])
788bf215546Sopenharmony_ci         hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
789bf215546Sopenharmony_ci      hw[3] = fpc->fp->insn_len;
790bf215546Sopenharmony_ci      break;
791bf215546Sopenharmony_ci   }
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci   case TGSI_OPCODE_BGNSUB:
794bf215546Sopenharmony_ci   case TGSI_OPCODE_ENDSUB:
795bf215546Sopenharmony_ci      /* nothing to do here */
796bf215546Sopenharmony_ci      break;
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci   case TGSI_OPCODE_CAL:
799bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
800bf215546Sopenharmony_ci         goto nv3x_cflow;
801bf215546Sopenharmony_ci      nv40_fp_cal(fpc, finst->Label.Label);
802bf215546Sopenharmony_ci      break;
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_ci   case TGSI_OPCODE_RET:
805bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
806bf215546Sopenharmony_ci         goto nv3x_cflow;
807bf215546Sopenharmony_ci      nv40_fp_ret(fpc);
808bf215546Sopenharmony_ci      break;
809bf215546Sopenharmony_ci
810bf215546Sopenharmony_ci   case TGSI_OPCODE_BGNLOOP:
811bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
812bf215546Sopenharmony_ci         goto nv3x_cflow;
813bf215546Sopenharmony_ci      /* TODO: we should support using two nested REPs to allow a > 255 iteration count */
814bf215546Sopenharmony_ci      nv40_fp_rep(fpc, 255, finst->Label.Label);
815bf215546Sopenharmony_ci      break;
816bf215546Sopenharmony_ci
817bf215546Sopenharmony_ci   case TGSI_OPCODE_ENDLOOP:
818bf215546Sopenharmony_ci      break;
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_ci   case TGSI_OPCODE_BRK:
821bf215546Sopenharmony_ci      if(!fpc->is_nv4x)
822bf215546Sopenharmony_ci         goto nv3x_cflow;
823bf215546Sopenharmony_ci      nv40_fp_brk(fpc);
824bf215546Sopenharmony_ci      break;
825bf215546Sopenharmony_ci
826bf215546Sopenharmony_ci   case TGSI_OPCODE_CONT:
827bf215546Sopenharmony_ci   {
828bf215546Sopenharmony_ci      static int warned = 0;
829bf215546Sopenharmony_ci      if(!warned) {
830bf215546Sopenharmony_ci         NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n");
831bf215546Sopenharmony_ci         warned = 1;
832bf215546Sopenharmony_ci      }
833bf215546Sopenharmony_ci      break;
834bf215546Sopenharmony_ci   }
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_ci        default:
837bf215546Sopenharmony_ci      NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
838bf215546Sopenharmony_ci      return false;
839bf215546Sopenharmony_ci   }
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ciout:
842bf215546Sopenharmony_ci   release_temps(fpc);
843bf215546Sopenharmony_ci   return true;
844bf215546Sopenharmony_cinv3x_cflow:
845bf215546Sopenharmony_ci   {
846bf215546Sopenharmony_ci      static int warned = 0;
847bf215546Sopenharmony_ci      if(!warned) {
848bf215546Sopenharmony_ci         NOUVEAU_ERR(
849bf215546Sopenharmony_ci               "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n"
850bf215546Sopenharmony_ci               "If rendering is incorrect, try to disable GLSL support in the application.\n");
851bf215546Sopenharmony_ci         warned = 1;
852bf215546Sopenharmony_ci      }
853bf215546Sopenharmony_ci   }
854bf215546Sopenharmony_ci   goto out;
855bf215546Sopenharmony_ci}
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_cistatic bool
858bf215546Sopenharmony_cinvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc,
859bf215546Sopenharmony_ci                               const struct tgsi_full_declaration *fdec)
860bf215546Sopenharmony_ci{
861bf215546Sopenharmony_ci   unsigned idx = fdec->Range.First;
862bf215546Sopenharmony_ci   unsigned hw;
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci   switch (fdec->Semantic.Name) {
865bf215546Sopenharmony_ci   case TGSI_SEMANTIC_POSITION:
866bf215546Sopenharmony_ci      hw = NVFX_FP_OP_INPUT_SRC_POSITION;
867bf215546Sopenharmony_ci      break;
868bf215546Sopenharmony_ci   case TGSI_SEMANTIC_COLOR:
869bf215546Sopenharmony_ci      hw = NVFX_FP_OP_INPUT_SRC_COL0 + fdec->Semantic.Index;
870bf215546Sopenharmony_ci      break;
871bf215546Sopenharmony_ci   case TGSI_SEMANTIC_FOG:
872bf215546Sopenharmony_ci      hw = NVFX_FP_OP_INPUT_SRC_FOGC;
873bf215546Sopenharmony_ci      break;
874bf215546Sopenharmony_ci   case TGSI_SEMANTIC_FACE:
875bf215546Sopenharmony_ci      hw = NV40_FP_OP_INPUT_SRC_FACING;
876bf215546Sopenharmony_ci      break;
877bf215546Sopenharmony_ci   case TGSI_SEMANTIC_TEXCOORD:
878bf215546Sopenharmony_ci      assert(fdec->Semantic.Index < 8);
879bf215546Sopenharmony_ci      fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index;
880bf215546Sopenharmony_ci      fpc->fp->texcoords |= (1 << fdec->Semantic.Index);
881bf215546Sopenharmony_ci      fpc->fp->vp_or |= (0x00004000 << fdec->Semantic.Index);
882bf215546Sopenharmony_ci      hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.Index);
883bf215546Sopenharmony_ci      break;
884bf215546Sopenharmony_ci   case TGSI_SEMANTIC_GENERIC:
885bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PCOORD:
886bf215546Sopenharmony_ci      /* will be assigned to remaining TC slots later */
887bf215546Sopenharmony_ci      return true;
888bf215546Sopenharmony_ci   default:
889bf215546Sopenharmony_ci      assert(0);
890bf215546Sopenharmony_ci      return false;
891bf215546Sopenharmony_ci   }
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_ci   fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
894bf215546Sopenharmony_ci   return true;
895bf215546Sopenharmony_ci}
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_cistatic bool
898bf215546Sopenharmony_cinvfx_fragprog_assign_generic(struct nvfx_fpc *fpc,
899bf215546Sopenharmony_ci                             const struct tgsi_full_declaration *fdec)
900bf215546Sopenharmony_ci{
901bf215546Sopenharmony_ci   unsigned num_texcoords = fpc->is_nv4x ? 10 : 8;
902bf215546Sopenharmony_ci   unsigned idx = fdec->Range.First;
903bf215546Sopenharmony_ci   unsigned hw;
904bf215546Sopenharmony_ci
905bf215546Sopenharmony_ci   switch (fdec->Semantic.Name) {
906bf215546Sopenharmony_ci   case TGSI_SEMANTIC_GENERIC:
907bf215546Sopenharmony_ci   case TGSI_SEMANTIC_PCOORD:
908bf215546Sopenharmony_ci      for (hw = 0; hw < num_texcoords; hw++) {
909bf215546Sopenharmony_ci         if (fpc->fp->texcoord[hw] == 0xffff) {
910bf215546Sopenharmony_ci            if (hw <= 7) {
911bf215546Sopenharmony_ci               fpc->fp->texcoords |= (0x1 << hw);
912bf215546Sopenharmony_ci               fpc->fp->vp_or |= (0x00004000 << hw);
913bf215546Sopenharmony_ci            } else {
914bf215546Sopenharmony_ci               fpc->fp->vp_or |= (0x00001000 << (hw - 8));
915bf215546Sopenharmony_ci            }
916bf215546Sopenharmony_ci            if (fdec->Semantic.Name == TGSI_SEMANTIC_PCOORD) {
917bf215546Sopenharmony_ci               fpc->fp->texcoord[hw] = 0xfffe;
918bf215546Sopenharmony_ci               fpc->fp->point_sprite_control |= (0x00000100 << hw);
919bf215546Sopenharmony_ci            } else {
920bf215546Sopenharmony_ci               fpc->fp->texcoord[hw] = fdec->Semantic.Index + 8;
921bf215546Sopenharmony_ci            }
922bf215546Sopenharmony_ci            hw = NVFX_FP_OP_INPUT_SRC_TC(hw);
923bf215546Sopenharmony_ci            fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
924bf215546Sopenharmony_ci            return true;
925bf215546Sopenharmony_ci         }
926bf215546Sopenharmony_ci      }
927bf215546Sopenharmony_ci      return false;
928bf215546Sopenharmony_ci   default:
929bf215546Sopenharmony_ci      return true;
930bf215546Sopenharmony_ci   }
931bf215546Sopenharmony_ci}
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_cistatic bool
934bf215546Sopenharmony_cinvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc,
935bf215546Sopenharmony_ci            const struct tgsi_full_declaration *fdec)
936bf215546Sopenharmony_ci{
937bf215546Sopenharmony_ci   unsigned idx = fdec->Range.First;
938bf215546Sopenharmony_ci   unsigned hw;
939bf215546Sopenharmony_ci
940bf215546Sopenharmony_ci   switch (fdec->Semantic.Name) {
941bf215546Sopenharmony_ci   case TGSI_SEMANTIC_POSITION:
942bf215546Sopenharmony_ci      hw = 1;
943bf215546Sopenharmony_ci      break;
944bf215546Sopenharmony_ci   case TGSI_SEMANTIC_COLOR:
945bf215546Sopenharmony_ci      hw = ~0;
946bf215546Sopenharmony_ci      switch (fdec->Semantic.Index) {
947bf215546Sopenharmony_ci      case 0: hw = 0; break;
948bf215546Sopenharmony_ci      case 1: hw = 2; break;
949bf215546Sopenharmony_ci      case 2: hw = 3; break;
950bf215546Sopenharmony_ci      case 3: hw = 4; break;
951bf215546Sopenharmony_ci      }
952bf215546Sopenharmony_ci      if(hw > ((fpc->is_nv4x) ? 4 : 2)) {
953bf215546Sopenharmony_ci         NOUVEAU_ERR("bad rcol index\n");
954bf215546Sopenharmony_ci         return false;
955bf215546Sopenharmony_ci      }
956bf215546Sopenharmony_ci      break;
957bf215546Sopenharmony_ci   default:
958bf215546Sopenharmony_ci      NOUVEAU_ERR("bad output semantic\n");
959bf215546Sopenharmony_ci      return false;
960bf215546Sopenharmony_ci   }
961bf215546Sopenharmony_ci
962bf215546Sopenharmony_ci   fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
963bf215546Sopenharmony_ci   fpc->r_temps |= (1ULL << hw);
964bf215546Sopenharmony_ci   return true;
965bf215546Sopenharmony_ci}
966bf215546Sopenharmony_ci
967bf215546Sopenharmony_cistatic bool
968bf215546Sopenharmony_cinvfx_fragprog_prepare(struct nvfx_fpc *fpc)
969bf215546Sopenharmony_ci{
970bf215546Sopenharmony_ci   struct tgsi_parse_context p;
971bf215546Sopenharmony_ci   int high_temp = -1, i;
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci   fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg));
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci   tgsi_parse_init(&p, fpc->fp->pipe.tokens);
976bf215546Sopenharmony_ci   while (!tgsi_parse_end_of_tokens(&p)) {
977bf215546Sopenharmony_ci      const union tgsi_full_token *tok = &p.FullToken;
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci      tgsi_parse_token(&p);
980bf215546Sopenharmony_ci      switch(tok->Token.Type) {
981bf215546Sopenharmony_ci      case TGSI_TOKEN_TYPE_DECLARATION:
982bf215546Sopenharmony_ci      {
983bf215546Sopenharmony_ci         const struct tgsi_full_declaration *fdec;
984bf215546Sopenharmony_ci         fdec = &p.FullToken.FullDeclaration;
985bf215546Sopenharmony_ci         switch (fdec->Declaration.File) {
986bf215546Sopenharmony_ci         case TGSI_FILE_INPUT:
987bf215546Sopenharmony_ci            if (!nvfx_fragprog_parse_decl_input(fpc, fdec))
988bf215546Sopenharmony_ci               goto out_err;
989bf215546Sopenharmony_ci            break;
990bf215546Sopenharmony_ci         case TGSI_FILE_OUTPUT:
991bf215546Sopenharmony_ci            if (!nvfx_fragprog_parse_decl_output(fpc, fdec))
992bf215546Sopenharmony_ci               goto out_err;
993bf215546Sopenharmony_ci            break;
994bf215546Sopenharmony_ci         case TGSI_FILE_TEMPORARY:
995bf215546Sopenharmony_ci            if (fdec->Range.Last > high_temp) {
996bf215546Sopenharmony_ci               high_temp =
997bf215546Sopenharmony_ci                  fdec->Range.Last;
998bf215546Sopenharmony_ci            }
999bf215546Sopenharmony_ci            break;
1000bf215546Sopenharmony_ci         default:
1001bf215546Sopenharmony_ci            break;
1002bf215546Sopenharmony_ci         }
1003bf215546Sopenharmony_ci      }
1004bf215546Sopenharmony_ci         break;
1005bf215546Sopenharmony_ci      case TGSI_TOKEN_TYPE_IMMEDIATE:
1006bf215546Sopenharmony_ci      {
1007bf215546Sopenharmony_ci         struct tgsi_full_immediate *imm;
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci         imm = &p.FullToken.FullImmediate;
1010bf215546Sopenharmony_ci         assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
1011bf215546Sopenharmony_ci         assert(fpc->nr_imm < fpc->fp->info.immediate_count);
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_ci         fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float);
1014bf215546Sopenharmony_ci         break;
1015bf215546Sopenharmony_ci      }
1016bf215546Sopenharmony_ci      default:
1017bf215546Sopenharmony_ci         break;
1018bf215546Sopenharmony_ci      }
1019bf215546Sopenharmony_ci   }
1020bf215546Sopenharmony_ci   tgsi_parse_free(&p);
1021bf215546Sopenharmony_ci
1022bf215546Sopenharmony_ci   tgsi_parse_init(&p, fpc->fp->pipe.tokens);
1023bf215546Sopenharmony_ci   while (!tgsi_parse_end_of_tokens(&p)) {
1024bf215546Sopenharmony_ci      const struct tgsi_full_declaration *fdec;
1025bf215546Sopenharmony_ci      tgsi_parse_token(&p);
1026bf215546Sopenharmony_ci      switch(p.FullToken.Token.Type) {
1027bf215546Sopenharmony_ci      case TGSI_TOKEN_TYPE_DECLARATION:
1028bf215546Sopenharmony_ci         fdec = &p.FullToken.FullDeclaration;
1029bf215546Sopenharmony_ci         switch (fdec->Declaration.File) {
1030bf215546Sopenharmony_ci         case TGSI_FILE_INPUT:
1031bf215546Sopenharmony_ci            if (!nvfx_fragprog_assign_generic(fpc, fdec))
1032bf215546Sopenharmony_ci               goto out_err;
1033bf215546Sopenharmony_ci            break;
1034bf215546Sopenharmony_ci         default:
1035bf215546Sopenharmony_ci            break;
1036bf215546Sopenharmony_ci         }
1037bf215546Sopenharmony_ci         break;
1038bf215546Sopenharmony_ci      default:
1039bf215546Sopenharmony_ci         break;
1040bf215546Sopenharmony_ci      }
1041bf215546Sopenharmony_ci   }
1042bf215546Sopenharmony_ci   tgsi_parse_free(&p);
1043bf215546Sopenharmony_ci
1044bf215546Sopenharmony_ci   if (++high_temp) {
1045bf215546Sopenharmony_ci      fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
1046bf215546Sopenharmony_ci      for (i = 0; i < high_temp; i++)
1047bf215546Sopenharmony_ci         fpc->r_temp[i] = temp(fpc);
1048bf215546Sopenharmony_ci      fpc->r_temps_discard = 0ULL;
1049bf215546Sopenharmony_ci   }
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci   return true;
1052bf215546Sopenharmony_ci
1053bf215546Sopenharmony_ciout_err:
1054bf215546Sopenharmony_ci   FREE(fpc->r_temp);
1055bf215546Sopenharmony_ci   fpc->r_temp = NULL;
1056bf215546Sopenharmony_ci
1057bf215546Sopenharmony_ci   tgsi_parse_free(&p);
1058bf215546Sopenharmony_ci   return false;
1059bf215546Sopenharmony_ci}
1060bf215546Sopenharmony_ci
1061bf215546Sopenharmony_ciDEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", false)
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_civoid
1064bf215546Sopenharmony_ci_nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
1065bf215546Sopenharmony_ci{
1066bf215546Sopenharmony_ci   struct tgsi_parse_context parse;
1067bf215546Sopenharmony_ci   struct nvfx_fpc *fpc = NULL;
1068bf215546Sopenharmony_ci   struct util_dynarray insns;
1069bf215546Sopenharmony_ci
1070bf215546Sopenharmony_ci   fp->translated = false;
1071bf215546Sopenharmony_ci   fp->point_sprite_control = 0;
1072bf215546Sopenharmony_ci   fp->vp_or = 0;
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci   fpc = CALLOC_STRUCT(nvfx_fpc);
1075bf215546Sopenharmony_ci   if (!fpc)
1076bf215546Sopenharmony_ci      goto out_err;
1077bf215546Sopenharmony_ci
1078bf215546Sopenharmony_ci   fpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0;
1079bf215546Sopenharmony_ci   fpc->max_temps = fpc->is_nv4x ? 48 : 32;
1080bf215546Sopenharmony_ci   fpc->fp = fp;
1081bf215546Sopenharmony_ci   fpc->num_regs = 2;
1082bf215546Sopenharmony_ci   memset(fp->texcoord, 0xff, sizeof(fp->texcoord));
1083bf215546Sopenharmony_ci
1084bf215546Sopenharmony_ci   if (fp->info.properties[TGSI_PROPERTY_FS_COORD_ORIGIN])
1085bf215546Sopenharmony_ci      fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED;
1086bf215546Sopenharmony_ci   if (fp->info.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER])
1087bf215546Sopenharmony_ci      fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER;
1088bf215546Sopenharmony_ci   if (fp->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
1089bf215546Sopenharmony_ci      fp->rt_enable |= NV30_3D_RT_ENABLE_MRT;
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci   if (!nvfx_fragprog_prepare(fpc))
1092bf215546Sopenharmony_ci      goto out_err;
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci   tgsi_parse_init(&parse, fp->pipe.tokens);
1095bf215546Sopenharmony_ci   util_dynarray_init(&insns, NULL);
1096bf215546Sopenharmony_ci
1097bf215546Sopenharmony_ci   while (!tgsi_parse_end_of_tokens(&parse)) {
1098bf215546Sopenharmony_ci      tgsi_parse_token(&parse);
1099bf215546Sopenharmony_ci
1100bf215546Sopenharmony_ci      switch (parse.FullToken.Token.Type) {
1101bf215546Sopenharmony_ci      case TGSI_TOKEN_TYPE_INSTRUCTION:
1102bf215546Sopenharmony_ci      {
1103bf215546Sopenharmony_ci         const struct tgsi_full_instruction *finst;
1104bf215546Sopenharmony_ci
1105bf215546Sopenharmony_ci         util_dynarray_append(&insns, unsigned, fp->insn_len);
1106bf215546Sopenharmony_ci         finst = &parse.FullToken.FullInstruction;
1107bf215546Sopenharmony_ci         if (!nvfx_fragprog_parse_instruction(fpc, finst))
1108bf215546Sopenharmony_ci            goto out_err;
1109bf215546Sopenharmony_ci      }
1110bf215546Sopenharmony_ci         break;
1111bf215546Sopenharmony_ci      default:
1112bf215546Sopenharmony_ci         break;
1113bf215546Sopenharmony_ci      }
1114bf215546Sopenharmony_ci   }
1115bf215546Sopenharmony_ci   util_dynarray_append(&insns, unsigned, fp->insn_len);
1116bf215546Sopenharmony_ci
1117bf215546Sopenharmony_ci   for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
1118bf215546Sopenharmony_ci   {
1119bf215546Sopenharmony_ci      struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i);
1120bf215546Sopenharmony_ci      fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target];
1121bf215546Sopenharmony_ci   }
1122bf215546Sopenharmony_ci   util_dynarray_fini(&insns);
1123bf215546Sopenharmony_ci
1124bf215546Sopenharmony_ci   if(!fpc->is_nv4x)
1125bf215546Sopenharmony_ci      fp->fp_control |= (fpc->num_regs-1)/2;
1126bf215546Sopenharmony_ci   else
1127bf215546Sopenharmony_ci      fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT;
1128bf215546Sopenharmony_ci
1129bf215546Sopenharmony_ci   /* Terminate final instruction */
1130bf215546Sopenharmony_ci   if(fp->insn)
1131bf215546Sopenharmony_ci      fp->insn[fpc->inst_offset] |= 0x00000001;
1132bf215546Sopenharmony_ci
1133bf215546Sopenharmony_ci   /* Append NOP + END instruction for branches to the end of the program */
1134bf215546Sopenharmony_ci   fpc->inst_offset = fp->insn_len;
1135bf215546Sopenharmony_ci   grow_insns(fpc, 4);
1136bf215546Sopenharmony_ci   fp->insn[fpc->inst_offset + 0] = 0x00000001;
1137bf215546Sopenharmony_ci   fp->insn[fpc->inst_offset + 1] = 0x00000000;
1138bf215546Sopenharmony_ci   fp->insn[fpc->inst_offset + 2] = 0x00000000;
1139bf215546Sopenharmony_ci   fp->insn[fpc->inst_offset + 3] = 0x00000000;
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_ci   if(debug_get_option_nvfx_dump_fp())
1142bf215546Sopenharmony_ci   {
1143bf215546Sopenharmony_ci      debug_printf("\n");
1144bf215546Sopenharmony_ci      tgsi_dump(fp->pipe.tokens, 0);
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci      debug_printf("\n%s fragment program:\n", fpc->is_nv4x ? "nv4x" : "nv3x");
1147bf215546Sopenharmony_ci      for (unsigned i = 0; i < fp->insn_len; i += 4)
1148bf215546Sopenharmony_ci         debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]);
1149bf215546Sopenharmony_ci      debug_printf("\n");
1150bf215546Sopenharmony_ci   }
1151bf215546Sopenharmony_ci
1152bf215546Sopenharmony_ci   fp->translated = true;
1153bf215546Sopenharmony_ci
1154bf215546Sopenharmony_ciout:
1155bf215546Sopenharmony_ci   tgsi_parse_free(&parse);
1156bf215546Sopenharmony_ci   if (fpc)
1157bf215546Sopenharmony_ci   {
1158bf215546Sopenharmony_ci      FREE(fpc->r_temp);
1159bf215546Sopenharmony_ci      FREE(fpc->r_imm);
1160bf215546Sopenharmony_ci      util_dynarray_fini(&fpc->if_stack);
1161bf215546Sopenharmony_ci      util_dynarray_fini(&fpc->label_relocs);
1162bf215546Sopenharmony_ci      util_dynarray_fini(&fpc->imm_data);
1163bf215546Sopenharmony_ci      //util_dynarray_fini(&fpc->loop_stack);
1164bf215546Sopenharmony_ci      FREE(fpc);
1165bf215546Sopenharmony_ci   }
1166bf215546Sopenharmony_ci
1167bf215546Sopenharmony_ci   return;
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ciout_err:
1170bf215546Sopenharmony_ci   _debug_printf("Error: failed to compile this fragment program:\n");
1171bf215546Sopenharmony_ci   tgsi_dump(fp->pipe.tokens, 0);
1172bf215546Sopenharmony_ci   goto out;
1173bf215546Sopenharmony_ci}
1174