1#include <float.h>
2#include "pipe/p_context.h"
3#include "pipe/p_defines.h"
4#include "pipe/p_state.h"
5#include "util/u_dynarray.h"
6#include "util/u_inlines.h"
7#include "util/u_debug.h"
8#include "util/u_memory.h"
9
10#include "pipe/p_shader_tokens.h"
11#include "tgsi/tgsi_parse.h"
12#include "tgsi/tgsi_util.h"
13#include "tgsi/tgsi_dump.h"
14#include "tgsi/tgsi_ureg.h"
15
16#include "nouveau_debug.h"
17#include "nv_object.xml.h"
18#include "nv30/nv30-40_3d.xml.h"
19#include "nv30/nvfx_shader.h"
20#include "nv30/nv30_state.h"
21
22struct nvfx_fpc {
23   struct nv30_fragprog *fp;
24
25   unsigned max_temps;
26   unsigned long long r_temps;
27   unsigned long long r_temps_discard;
28   struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
29   struct nvfx_reg r_input[PIPE_MAX_SHADER_INPUTS];
30   struct nvfx_reg *r_temp;
31
32   int num_regs;
33
34   unsigned inst_offset;
35   unsigned have_const;
36   unsigned is_nv4x;
37
38   struct util_dynarray imm_data;
39
40   struct nvfx_reg* r_imm;
41   unsigned nr_imm;
42
43   struct util_dynarray if_stack;
44   //struct util_dynarray loop_stack;
45   struct util_dynarray label_relocs;
46};
47
48static inline struct nvfx_reg
49temp(struct nvfx_fpc *fpc)
50{
51   int idx = __builtin_ctzll(~fpc->r_temps);
52
53   if (idx >= fpc->max_temps) {
54      NOUVEAU_ERR("out of temps!!\n");
55      return nvfx_reg(NVFXSR_TEMP, 0);
56   }
57
58   fpc->r_temps |= (1ULL << idx);
59   fpc->r_temps_discard |= (1ULL << idx);
60   return nvfx_reg(NVFXSR_TEMP, idx);
61}
62
63static inline void
64release_temps(struct nvfx_fpc *fpc)
65{
66   fpc->r_temps &= ~fpc->r_temps_discard;
67   fpc->r_temps_discard = 0ULL;
68}
69
70static inline struct nvfx_reg
71nvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d)
72{
73   float v[4] = {a, b, c, d};
74   int idx = fpc->imm_data.size >> 4;
75
76   memcpy(util_dynarray_grow(&fpc->imm_data, float, 4), v, 4 * sizeof(float));
77   return nvfx_reg(NVFXSR_IMM, idx);
78}
79
80static void
81grow_insns(struct nvfx_fpc *fpc, int size)
82{
83   struct nv30_fragprog *fp = fpc->fp;
84
85   fp->insn_len += size;
86   fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
87}
88
89static void
90emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src)
91{
92   struct nv30_fragprog *fp = fpc->fp;
93   uint32_t *hw = &fp->insn[fpc->inst_offset];
94   uint32_t sr = 0;
95
96   switch (src.reg.type) {
97   case NVFXSR_INPUT:
98      sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
99      hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
100      break;
101   case NVFXSR_OUTPUT:
102      sr |= NVFX_FP_REG_SRC_HALF;
103      FALLTHROUGH;
104   case NVFXSR_TEMP:
105      sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
106      sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT);
107      break;
108   case NVFXSR_IMM:
109      if (!fpc->have_const) {
110         grow_insns(fpc, 4);
111         hw = &fp->insn[fpc->inst_offset];
112         fpc->have_const = 1;
113      }
114
115      memcpy(&fp->insn[fpc->inst_offset + 4],
116            (float*)fpc->imm_data.data + src.reg.index * 4,
117            sizeof(uint32_t) * 4);
118
119      sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
120      break;
121   case NVFXSR_CONST:
122      if (!fpc->have_const) {
123         grow_insns(fpc, 4);
124         hw = &fp->insn[fpc->inst_offset];
125         fpc->have_const = 1;
126      }
127
128      {
129         struct nv30_fragprog_data *fpd;
130
131         fp->consts = realloc(fp->consts, ++fp->nr_consts *
132                    sizeof(*fpd));
133         fpd = &fp->consts[fp->nr_consts - 1];
134         fpd->offset = fpc->inst_offset + 4;
135         fpd->index = src.reg.index;
136         memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
137      }
138
139      sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
140      break;
141   case NVFXSR_NONE:
142      sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
143      break;
144   default:
145      assert(0);
146   }
147
148   if (src.negate)
149      sr |= NVFX_FP_REG_NEGATE;
150
151   if (src.abs)
152      hw[1] |= (1 << (29 + pos));
153
154   sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
155          (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
156          (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
157          (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
158
159   hw[pos + 1] |= sr;
160}
161
162static void
163emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst)
164{
165   struct nv30_fragprog *fp = fpc->fp;
166   uint32_t *hw = &fp->insn[fpc->inst_offset];
167
168   switch (dst.type) {
169   case NVFXSR_OUTPUT:
170      if (dst.index == 1)
171         fp->fp_control |= 0x0000000e;
172      else {
173         hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
174         dst.index <<= 1;
175      }
176      FALLTHROUGH;
177   case NVFXSR_TEMP:
178      if (fpc->num_regs < (dst.index + 1))
179         fpc->num_regs = dst.index + 1;
180      break;
181   case NVFXSR_NONE:
182      hw[0] |= (1 << 30);
183      break;
184   default:
185      assert(0);
186   }
187
188   hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
189}
190
191static void
192nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn)
193{
194   struct nv30_fragprog *fp = fpc->fp;
195   uint32_t *hw;
196
197   fpc->inst_offset = fp->insn_len;
198   fpc->have_const = 0;
199   grow_insns(fpc, 4);
200   hw = &fp->insn[fpc->inst_offset];
201   memset(hw, 0, sizeof(uint32_t) * 4);
202
203   if (insn.op == NVFX_FP_OP_OPCODE_KIL)
204      fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL;
205   hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT);
206   hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT);
207   hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT);
208
209   if (insn.sat)
210      hw[0] |= NVFX_FP_OP_OUT_SAT;
211
212   if (insn.cc_update)
213      hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
214   hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT);
215   hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
216        (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
217        (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
218        (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
219
220   if(insn.unit >= 0)
221   {
222      hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
223   }
224
225   emit_dst(fpc, insn.dst);
226   emit_src(fpc, 0, insn.src[0]);
227   emit_src(fpc, 1, insn.src[1]);
228   emit_src(fpc, 2, insn.src[2]);
229}
230
231#define arith(s,o,d,m,s0,s1,s2) \
232       nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \
233                       (d), (m), (s0), (s1), (s2))
234
235#define tex(s,o,u,d,m,s0,s1,s2) \
236   nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \
237                   (d), (m), (s0), none, none)
238
239/* IF src.x != 0, as TGSI specifies */
240static void
241nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src)
242{
243   const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
244   struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none);
245   uint32_t *hw;
246   insn.cc_update = 1;
247   nvfx_fp_emit(fpc, insn);
248
249   fpc->inst_offset = fpc->fp->insn_len;
250   grow_insns(fpc, 4);
251   hw = &fpc->fp->insn[fpc->inst_offset];
252   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
253   hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
254      NV40_FP_OP_OUT_NONE |
255      (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
256   /* Use .xxxx swizzle so that we check only src[0].x*/
257   hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
258         (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
259         (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
260         (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) |
261         (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT);
262   hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */
263   hw[3] = 0; /* | endif_offset */
264   util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset);
265}
266
267/* IF src.x != 0, as TGSI specifies */
268static void
269nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target)
270{
271        struct nvfx_relocation reloc;
272        uint32_t *hw;
273        fpc->inst_offset = fpc->fp->insn_len;
274        grow_insns(fpc, 4);
275        hw = &fpc->fp->insn[fpc->inst_offset];
276        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
277        hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT);
278        /* Use .xxxx swizzle so that we check only src[0].x*/
279        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
280                        (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
281        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
282        hw[3] = 0;
283        reloc.target = target;
284        reloc.location = fpc->inst_offset + 2;
285        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
286}
287
288static void
289nv40_fp_ret(struct nvfx_fpc *fpc)
290{
291   uint32_t *hw;
292   fpc->inst_offset = fpc->fp->insn_len;
293   grow_insns(fpc, 4);
294   hw = &fpc->fp->insn[fpc->inst_offset];
295   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
296   hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT);
297   /* Use .xxxx swizzle so that we check only src[0].x*/
298   hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
299         (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
300   hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
301   hw[3] = 0;
302}
303
304static void
305nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target)
306{
307        struct nvfx_relocation reloc;
308        uint32_t *hw;
309        fpc->inst_offset = fpc->fp->insn_len;
310        grow_insns(fpc, 4);
311        hw = &fpc->fp->insn[fpc->inst_offset];
312        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
313        hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) |
314                        NV40_FP_OP_OUT_NONE |
315                        (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
316        /* Use .xxxx swizzle so that we check only src[0].x*/
317        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
318                        (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
319        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH |
320                        (count << NV40_FP_OP_REP_COUNT1_SHIFT) |
321                        (count << NV40_FP_OP_REP_COUNT2_SHIFT) |
322                        (count << NV40_FP_OP_REP_COUNT3_SHIFT);
323        hw[3] = 0; /* | end_offset */
324        reloc.target = target;
325        reloc.location = fpc->inst_offset + 3;
326        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
327        //util_dynarray_append(&fpc->loop_stack, unsigned, target);
328}
329
330#if 0
331/* documentation only */
332/* warning: this only works forward, and probably only if not inside any IF */
333static void
334nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
335{
336        struct nvfx_relocation reloc;
337        uint32_t *hw;
338        fpc->inst_offset = fpc->fp->insn_len;
339        grow_insns(fpc, 4);
340        hw = &fpc->fp->insn[fpc->inst_offset];
341        /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
342        hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
343                NV40_FP_OP_OUT_NONE |
344                (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
345        /* Use .xxxx swizzle so that we check only src[0].x*/
346        hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
347                        (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT);
348        hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */
349        hw[3] = 0; /* | endif_offset */
350        reloc.target = target;
351        reloc.location = fpc->inst_offset + 2;
352        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
353        reloc.target = target;
354        reloc.location = fpc->inst_offset + 3;
355        util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
356}
357#endif
358
359static void
360nv40_fp_brk(struct nvfx_fpc *fpc)
361{
362   uint32_t *hw;
363   fpc->inst_offset = fpc->fp->insn_len;
364   grow_insns(fpc, 4);
365   hw = &fpc->fp->insn[fpc->inst_offset];
366   /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
367   hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) |
368      NV40_FP_OP_OUT_NONE;
369   /* Use .xxxx swizzle so that we check only src[0].x*/
370   hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
371         (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
372   hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH;
373   hw[3] = 0;
374}
375
376static inline struct nvfx_src
377tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
378{
379   struct nvfx_src src;
380
381   switch (fsrc->Register.File) {
382   case TGSI_FILE_INPUT:
383      src.reg = fpc->r_input[fsrc->Register.Index];
384      break;
385   case TGSI_FILE_CONSTANT:
386      src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index);
387      break;
388   case TGSI_FILE_IMMEDIATE:
389      assert(fsrc->Register.Index < fpc->nr_imm);
390      src.reg = fpc->r_imm[fsrc->Register.Index];
391      break;
392   case TGSI_FILE_TEMPORARY:
393      src.reg = fpc->r_temp[fsrc->Register.Index];
394      break;
395   /* NV40 fragprog result regs are just temps, so this is simple */
396   case TGSI_FILE_OUTPUT:
397      src.reg = fpc->r_result[fsrc->Register.Index];
398      break;
399   default:
400      NOUVEAU_ERR("bad src file\n");
401      src.reg.index = 0;
402      src.reg.type = 0;
403      break;
404   }
405
406   src.abs = fsrc->Register.Absolute;
407   src.negate = fsrc->Register.Negate;
408   src.swz[0] = fsrc->Register.SwizzleX;
409   src.swz[1] = fsrc->Register.SwizzleY;
410   src.swz[2] = fsrc->Register.SwizzleZ;
411   src.swz[3] = fsrc->Register.SwizzleW;
412   src.indirect = 0;
413   src.indirect_reg = 0;
414   src.indirect_swz = 0;
415   return src;
416}
417
418static inline struct nvfx_reg
419tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
420   switch (fdst->Register.File) {
421   case TGSI_FILE_OUTPUT:
422      return fpc->r_result[fdst->Register.Index];
423   case TGSI_FILE_TEMPORARY:
424      return fpc->r_temp[fdst->Register.Index];
425   case TGSI_FILE_NULL:
426      return nvfx_reg(NVFXSR_NONE, 0);
427   default:
428      NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
429      return nvfx_reg(NVFXSR_NONE, 0);
430   }
431}
432
433static inline int
434tgsi_mask(uint tgsi)
435{
436   int mask = 0;
437
438   if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
439   if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
440   if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
441   if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
442   return mask;
443}
444
445static bool
446nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
447            const struct tgsi_full_instruction *finst)
448{
449   const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
450   struct nvfx_insn insn;
451   struct nvfx_src src[3], tmp;
452   struct nvfx_reg dst;
453   int mask, sat, unit = 0;
454   int ai = -1, ci = -1, ii = -1;
455   int i;
456
457   if (finst->Instruction.Opcode == TGSI_OPCODE_END)
458      return true;
459
460   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
461      const struct tgsi_full_src_register *fsrc;
462
463      fsrc = &finst->Src[i];
464      if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
465         src[i] = tgsi_src(fpc, fsrc);
466      }
467   }
468
469   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
470      const struct tgsi_full_src_register *fsrc;
471
472      fsrc = &finst->Src[i];
473
474      switch (fsrc->Register.File) {
475      case TGSI_FILE_INPUT:
476         if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0
477               || fsrc->Register.SwizzleX == PIPE_SWIZZLE_W
478               || fsrc->Register.SwizzleY == PIPE_SWIZZLE_W
479               || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_W
480               || fsrc->Register.SwizzleW == PIPE_SWIZZLE_W
481               )) {
482            /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */
483            struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1));
484            addend.swz[0] = fsrc->Register.SwizzleX;
485            addend.swz[1] = fsrc->Register.SwizzleY;
486            addend.swz[2] = fsrc->Register.SwizzleZ;
487            addend.swz[3] = fsrc->Register.SwizzleW;
488            src[i] = nvfx_src(temp(fpc));
489            nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none));
490         } else if (ai == -1 || ai == fsrc->Register.Index) {
491            ai = fsrc->Register.Index;
492            src[i] = tgsi_src(fpc, fsrc);
493         } else {
494            src[i] = nvfx_src(temp(fpc));
495            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
496         }
497         break;
498      case TGSI_FILE_CONSTANT:
499         if ((ci == -1 && ii == -1) ||
500             ci == fsrc->Register.Index) {
501            ci = fsrc->Register.Index;
502            src[i] = tgsi_src(fpc, fsrc);
503         } else {
504            src[i] = nvfx_src(temp(fpc));
505            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
506         }
507         break;
508      case TGSI_FILE_IMMEDIATE:
509         if ((ci == -1 && ii == -1) ||
510             ii == fsrc->Register.Index) {
511            ii = fsrc->Register.Index;
512            src[i] = tgsi_src(fpc, fsrc);
513         } else {
514            src[i] = nvfx_src(temp(fpc));
515            nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
516         }
517         break;
518      case TGSI_FILE_TEMPORARY:
519         /* handled above */
520         break;
521      case TGSI_FILE_SAMPLER:
522         unit = fsrc->Register.Index;
523         break;
524      case TGSI_FILE_OUTPUT:
525         break;
526      default:
527         NOUVEAU_ERR("bad src file\n");
528         return false;
529      }
530   }
531
532   dst  = tgsi_dst(fpc, &finst->Dst[0]);
533   mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
534   sat  = finst->Instruction.Saturate;
535
536   switch (finst->Instruction.Opcode) {
537   case TGSI_OPCODE_ADD:
538      nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
539      break;
540   case TGSI_OPCODE_CEIL:
541      tmp = nvfx_src(temp(fpc));
542      nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none));
543      nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none));
544      break;
545   case TGSI_OPCODE_CMP:
546      insn = arith(0, MOV, none.reg, mask, src[0], none, none);
547      insn.cc_update = 1;
548      nvfx_fp_emit(fpc, insn);
549
550      insn = arith(sat, MOV, dst, mask, src[2], none, none);
551      insn.cc_test = NVFX_COND_GE;
552      nvfx_fp_emit(fpc, insn);
553
554      insn = arith(sat, MOV, dst, mask, src[1], none, none);
555      insn.cc_test = NVFX_COND_LT;
556      nvfx_fp_emit(fpc, insn);
557      break;
558   case TGSI_OPCODE_COS:
559      nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none));
560      break;
561   case TGSI_OPCODE_DDX:
562      if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
563         tmp = nvfx_src(temp(fpc));
564         nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
565         nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
566         nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
567         nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
568      } else {
569         nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none));
570      }
571      break;
572   case TGSI_OPCODE_DDY:
573      if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
574         tmp = nvfx_src(temp(fpc));
575         nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
576         nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
577         nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
578         nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
579      } else {
580         nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none));
581      }
582      break;
583   case TGSI_OPCODE_DP2:
584      tmp = nvfx_src(temp(fpc));
585      nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none));
586      nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
587      break;
588   case TGSI_OPCODE_DP3:
589      nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none));
590      break;
591   case TGSI_OPCODE_DP4:
592      nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
593      break;
594   case TGSI_OPCODE_DST:
595      nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
596      break;
597   case TGSI_OPCODE_EX2:
598      nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none));
599      break;
600   case TGSI_OPCODE_FLR:
601      nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none));
602      break;
603   case TGSI_OPCODE_FRC:
604      nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none));
605      break;
606   case TGSI_OPCODE_KILL:
607      nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none));
608      break;
609   case TGSI_OPCODE_KILL_IF:
610      insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none);
611      insn.cc_update = 1;
612      nvfx_fp_emit(fpc, insn);
613
614      insn = arith(0, KIL, none.reg, 0, none, none, none);
615      insn.cc_test = NVFX_COND_LT;
616      nvfx_fp_emit(fpc, insn);
617      break;
618   case TGSI_OPCODE_LG2:
619      nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
620      break;
621   case TGSI_OPCODE_LIT:
622      if(!fpc->is_nv4x)
623         nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none));
624      else {
625         /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by
626          * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement
627          *
628          * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead
629          */
630         struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0));
631         tmp = nvfx_src(temp(fpc));
632         if (ci>= 0 || ii >= 0) {
633            nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none));
634            maxs = tmp;
635         }
636         nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none));
637         nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none));
638         nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none));
639         nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none));
640      }
641      break;
642   case TGSI_OPCODE_LRP:
643      if(!fpc->is_nv4x)
644         nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));
645      else {
646         tmp = nvfx_src(temp(fpc));
647         nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
648         nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp));
649      }
650      break;
651   case TGSI_OPCODE_MAD:
652      nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2]));
653      break;
654   case TGSI_OPCODE_MAX:
655      nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none));
656      break;
657   case TGSI_OPCODE_MIN:
658      nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none));
659      break;
660   case TGSI_OPCODE_MOV:
661      nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none));
662      break;
663   case TGSI_OPCODE_MUL:
664      nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none));
665      break;
666   case TGSI_OPCODE_NOP:
667      break;
668   case TGSI_OPCODE_POW:
669      if(!fpc->is_nv4x)
670         nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none));
671      else {
672         tmp = nvfx_src(temp(fpc));
673         nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
674         nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
675         nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none));
676      }
677      break;
678   case TGSI_OPCODE_RCP:
679      nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none));
680      break;
681   case TGSI_OPCODE_RSQ:
682      if(!fpc->is_nv4x)
683         nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none));
684      else {
685         tmp = nvfx_src(temp(fpc));
686         insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none);
687         insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X;
688         nvfx_fp_emit(fpc, insn);
689         nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
690      }
691      break;
692   case TGSI_OPCODE_SEQ:
693      nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
694      break;
695   case TGSI_OPCODE_SGE:
696      nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none));
697      break;
698   case TGSI_OPCODE_SGT:
699      nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none));
700      break;
701   case TGSI_OPCODE_SIN:
702      nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none));
703      break;
704   case TGSI_OPCODE_SLE:
705      nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none));
706      break;
707   case TGSI_OPCODE_SLT:
708      nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none));
709      break;
710   case TGSI_OPCODE_SNE:
711      nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none));
712      break;
713   case TGSI_OPCODE_SSG:
714   {
715      struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X);
716
717      insn = arith(sat, MOV, dst, mask, src[0], none, none);
718      insn.cc_update = 1;
719      nvfx_fp_emit(fpc, insn);
720
721      insn = arith(0, STR, dst, mask, none, none, none);
722      insn.cc_test = NVFX_COND_GT;
723      nvfx_fp_emit(fpc, insn);
724
725      if(!sat) {
726         insn = arith(0, MOV, dst, mask, minones, none, none);
727         insn.cc_test = NVFX_COND_LT;
728         nvfx_fp_emit(fpc, insn);
729      }
730      break;
731   }
732   case TGSI_OPCODE_TEX:
733      nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
734      break;
735        case TGSI_OPCODE_TRUNC:
736                tmp = nvfx_src(temp(fpc));
737                insn = arith(0, MOV, none.reg, mask, src[0], none, none);
738                insn.cc_update = 1;
739                nvfx_fp_emit(fpc, insn);
740
741                nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none));
742                nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none));
743
744                insn = arith(sat, MOV, dst, mask, neg(tmp), none, none);
745                insn.cc_test = NVFX_COND_LT;
746                nvfx_fp_emit(fpc, insn);
747                break;
748        case TGSI_OPCODE_TXB:
749                nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none));
750                break;
751        case TGSI_OPCODE_TXL:
752                if(fpc->is_nv4x)
753                        nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none));
754                else /* unsupported on nv30, use TEX and hope they like it */
755                        nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
756                break;
757        case TGSI_OPCODE_TXP:
758                nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
759                break;
760
761   case TGSI_OPCODE_IF:
762      // MOVRC0 R31 (TR0.xyzw), R<src>:
763      // IF (NE.xxxx) ELSE <else> END <end>
764      if(!fpc->is_nv4x)
765         goto nv3x_cflow;
766      nv40_fp_if(fpc, src[0]);
767      break;
768
769   case TGSI_OPCODE_ELSE:
770   {
771      uint32_t *hw;
772      if(!fpc->is_nv4x)
773         goto nv3x_cflow;
774      assert(util_dynarray_contains(&fpc->if_stack, unsigned));
775      hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)];
776      hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
777      break;
778   }
779
780   case TGSI_OPCODE_ENDIF:
781   {
782      uint32_t *hw;
783      if(!fpc->is_nv4x)
784         goto nv3x_cflow;
785      assert(util_dynarray_contains(&fpc->if_stack, unsigned));
786      hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)];
787      if(!hw[2])
788         hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
789      hw[3] = fpc->fp->insn_len;
790      break;
791   }
792
793   case TGSI_OPCODE_BGNSUB:
794   case TGSI_OPCODE_ENDSUB:
795      /* nothing to do here */
796      break;
797
798   case TGSI_OPCODE_CAL:
799      if(!fpc->is_nv4x)
800         goto nv3x_cflow;
801      nv40_fp_cal(fpc, finst->Label.Label);
802      break;
803
804   case TGSI_OPCODE_RET:
805      if(!fpc->is_nv4x)
806         goto nv3x_cflow;
807      nv40_fp_ret(fpc);
808      break;
809
810   case TGSI_OPCODE_BGNLOOP:
811      if(!fpc->is_nv4x)
812         goto nv3x_cflow;
813      /* TODO: we should support using two nested REPs to allow a > 255 iteration count */
814      nv40_fp_rep(fpc, 255, finst->Label.Label);
815      break;
816
817   case TGSI_OPCODE_ENDLOOP:
818      break;
819
820   case TGSI_OPCODE_BRK:
821      if(!fpc->is_nv4x)
822         goto nv3x_cflow;
823      nv40_fp_brk(fpc);
824      break;
825
826   case TGSI_OPCODE_CONT:
827   {
828      static int warned = 0;
829      if(!warned) {
830         NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n");
831         warned = 1;
832      }
833      break;
834   }
835
836        default:
837      NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
838      return false;
839   }
840
841out:
842   release_temps(fpc);
843   return true;
844nv3x_cflow:
845   {
846      static int warned = 0;
847      if(!warned) {
848         NOUVEAU_ERR(
849               "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n"
850               "If rendering is incorrect, try to disable GLSL support in the application.\n");
851         warned = 1;
852      }
853   }
854   goto out;
855}
856
857static bool
858nvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc,
859                               const struct tgsi_full_declaration *fdec)
860{
861   unsigned idx = fdec->Range.First;
862   unsigned hw;
863
864   switch (fdec->Semantic.Name) {
865   case TGSI_SEMANTIC_POSITION:
866      hw = NVFX_FP_OP_INPUT_SRC_POSITION;
867      break;
868   case TGSI_SEMANTIC_COLOR:
869      hw = NVFX_FP_OP_INPUT_SRC_COL0 + fdec->Semantic.Index;
870      break;
871   case TGSI_SEMANTIC_FOG:
872      hw = NVFX_FP_OP_INPUT_SRC_FOGC;
873      break;
874   case TGSI_SEMANTIC_FACE:
875      hw = NV40_FP_OP_INPUT_SRC_FACING;
876      break;
877   case TGSI_SEMANTIC_TEXCOORD:
878      assert(fdec->Semantic.Index < 8);
879      fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index;
880      fpc->fp->texcoords |= (1 << fdec->Semantic.Index);
881      fpc->fp->vp_or |= (0x00004000 << fdec->Semantic.Index);
882      hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.Index);
883      break;
884   case TGSI_SEMANTIC_GENERIC:
885   case TGSI_SEMANTIC_PCOORD:
886      /* will be assigned to remaining TC slots later */
887      return true;
888   default:
889      assert(0);
890      return false;
891   }
892
893   fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
894   return true;
895}
896
897static bool
898nvfx_fragprog_assign_generic(struct nvfx_fpc *fpc,
899                             const struct tgsi_full_declaration *fdec)
900{
901   unsigned num_texcoords = fpc->is_nv4x ? 10 : 8;
902   unsigned idx = fdec->Range.First;
903   unsigned hw;
904
905   switch (fdec->Semantic.Name) {
906   case TGSI_SEMANTIC_GENERIC:
907   case TGSI_SEMANTIC_PCOORD:
908      for (hw = 0; hw < num_texcoords; hw++) {
909         if (fpc->fp->texcoord[hw] == 0xffff) {
910            if (hw <= 7) {
911               fpc->fp->texcoords |= (0x1 << hw);
912               fpc->fp->vp_or |= (0x00004000 << hw);
913            } else {
914               fpc->fp->vp_or |= (0x00001000 << (hw - 8));
915            }
916            if (fdec->Semantic.Name == TGSI_SEMANTIC_PCOORD) {
917               fpc->fp->texcoord[hw] = 0xfffe;
918               fpc->fp->point_sprite_control |= (0x00000100 << hw);
919            } else {
920               fpc->fp->texcoord[hw] = fdec->Semantic.Index + 8;
921            }
922            hw = NVFX_FP_OP_INPUT_SRC_TC(hw);
923            fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
924            return true;
925         }
926      }
927      return false;
928   default:
929      return true;
930   }
931}
932
933static bool
934nvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc,
935            const struct tgsi_full_declaration *fdec)
936{
937   unsigned idx = fdec->Range.First;
938   unsigned hw;
939
940   switch (fdec->Semantic.Name) {
941   case TGSI_SEMANTIC_POSITION:
942      hw = 1;
943      break;
944   case TGSI_SEMANTIC_COLOR:
945      hw = ~0;
946      switch (fdec->Semantic.Index) {
947      case 0: hw = 0; break;
948      case 1: hw = 2; break;
949      case 2: hw = 3; break;
950      case 3: hw = 4; break;
951      }
952      if(hw > ((fpc->is_nv4x) ? 4 : 2)) {
953         NOUVEAU_ERR("bad rcol index\n");
954         return false;
955      }
956      break;
957   default:
958      NOUVEAU_ERR("bad output semantic\n");
959      return false;
960   }
961
962   fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
963   fpc->r_temps |= (1ULL << hw);
964   return true;
965}
966
967static bool
968nvfx_fragprog_prepare(struct nvfx_fpc *fpc)
969{
970   struct tgsi_parse_context p;
971   int high_temp = -1, i;
972
973   fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg));
974
975   tgsi_parse_init(&p, fpc->fp->pipe.tokens);
976   while (!tgsi_parse_end_of_tokens(&p)) {
977      const union tgsi_full_token *tok = &p.FullToken;
978
979      tgsi_parse_token(&p);
980      switch(tok->Token.Type) {
981      case TGSI_TOKEN_TYPE_DECLARATION:
982      {
983         const struct tgsi_full_declaration *fdec;
984         fdec = &p.FullToken.FullDeclaration;
985         switch (fdec->Declaration.File) {
986         case TGSI_FILE_INPUT:
987            if (!nvfx_fragprog_parse_decl_input(fpc, fdec))
988               goto out_err;
989            break;
990         case TGSI_FILE_OUTPUT:
991            if (!nvfx_fragprog_parse_decl_output(fpc, fdec))
992               goto out_err;
993            break;
994         case TGSI_FILE_TEMPORARY:
995            if (fdec->Range.Last > high_temp) {
996               high_temp =
997                  fdec->Range.Last;
998            }
999            break;
1000         default:
1001            break;
1002         }
1003      }
1004         break;
1005      case TGSI_TOKEN_TYPE_IMMEDIATE:
1006      {
1007         struct tgsi_full_immediate *imm;
1008
1009         imm = &p.FullToken.FullImmediate;
1010         assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
1011         assert(fpc->nr_imm < fpc->fp->info.immediate_count);
1012
1013         fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float);
1014         break;
1015      }
1016      default:
1017         break;
1018      }
1019   }
1020   tgsi_parse_free(&p);
1021
1022   tgsi_parse_init(&p, fpc->fp->pipe.tokens);
1023   while (!tgsi_parse_end_of_tokens(&p)) {
1024      const struct tgsi_full_declaration *fdec;
1025      tgsi_parse_token(&p);
1026      switch(p.FullToken.Token.Type) {
1027      case TGSI_TOKEN_TYPE_DECLARATION:
1028         fdec = &p.FullToken.FullDeclaration;
1029         switch (fdec->Declaration.File) {
1030         case TGSI_FILE_INPUT:
1031            if (!nvfx_fragprog_assign_generic(fpc, fdec))
1032               goto out_err;
1033            break;
1034         default:
1035            break;
1036         }
1037         break;
1038      default:
1039         break;
1040      }
1041   }
1042   tgsi_parse_free(&p);
1043
1044   if (++high_temp) {
1045      fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
1046      for (i = 0; i < high_temp; i++)
1047         fpc->r_temp[i] = temp(fpc);
1048      fpc->r_temps_discard = 0ULL;
1049   }
1050
1051   return true;
1052
1053out_err:
1054   FREE(fpc->r_temp);
1055   fpc->r_temp = NULL;
1056
1057   tgsi_parse_free(&p);
1058   return false;
1059}
1060
1061DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", false)
1062
1063void
1064_nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
1065{
1066   struct tgsi_parse_context parse;
1067   struct nvfx_fpc *fpc = NULL;
1068   struct util_dynarray insns;
1069
1070   fp->translated = false;
1071   fp->point_sprite_control = 0;
1072   fp->vp_or = 0;
1073
1074   fpc = CALLOC_STRUCT(nvfx_fpc);
1075   if (!fpc)
1076      goto out_err;
1077
1078   fpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0;
1079   fpc->max_temps = fpc->is_nv4x ? 48 : 32;
1080   fpc->fp = fp;
1081   fpc->num_regs = 2;
1082   memset(fp->texcoord, 0xff, sizeof(fp->texcoord));
1083
1084   if (fp->info.properties[TGSI_PROPERTY_FS_COORD_ORIGIN])
1085      fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED;
1086   if (fp->info.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER])
1087      fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER;
1088   if (fp->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
1089      fp->rt_enable |= NV30_3D_RT_ENABLE_MRT;
1090
1091   if (!nvfx_fragprog_prepare(fpc))
1092      goto out_err;
1093
1094   tgsi_parse_init(&parse, fp->pipe.tokens);
1095   util_dynarray_init(&insns, NULL);
1096
1097   while (!tgsi_parse_end_of_tokens(&parse)) {
1098      tgsi_parse_token(&parse);
1099
1100      switch (parse.FullToken.Token.Type) {
1101      case TGSI_TOKEN_TYPE_INSTRUCTION:
1102      {
1103         const struct tgsi_full_instruction *finst;
1104
1105         util_dynarray_append(&insns, unsigned, fp->insn_len);
1106         finst = &parse.FullToken.FullInstruction;
1107         if (!nvfx_fragprog_parse_instruction(fpc, finst))
1108            goto out_err;
1109      }
1110         break;
1111      default:
1112         break;
1113      }
1114   }
1115   util_dynarray_append(&insns, unsigned, fp->insn_len);
1116
1117   for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
1118   {
1119      struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i);
1120      fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target];
1121   }
1122   util_dynarray_fini(&insns);
1123
1124   if(!fpc->is_nv4x)
1125      fp->fp_control |= (fpc->num_regs-1)/2;
1126   else
1127      fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT;
1128
1129   /* Terminate final instruction */
1130   if(fp->insn)
1131      fp->insn[fpc->inst_offset] |= 0x00000001;
1132
1133   /* Append NOP + END instruction for branches to the end of the program */
1134   fpc->inst_offset = fp->insn_len;
1135   grow_insns(fpc, 4);
1136   fp->insn[fpc->inst_offset + 0] = 0x00000001;
1137   fp->insn[fpc->inst_offset + 1] = 0x00000000;
1138   fp->insn[fpc->inst_offset + 2] = 0x00000000;
1139   fp->insn[fpc->inst_offset + 3] = 0x00000000;
1140
1141   if(debug_get_option_nvfx_dump_fp())
1142   {
1143      debug_printf("\n");
1144      tgsi_dump(fp->pipe.tokens, 0);
1145
1146      debug_printf("\n%s fragment program:\n", fpc->is_nv4x ? "nv4x" : "nv3x");
1147      for (unsigned i = 0; i < fp->insn_len; i += 4)
1148         debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]);
1149      debug_printf("\n");
1150   }
1151
1152   fp->translated = true;
1153
1154out:
1155   tgsi_parse_free(&parse);
1156   if (fpc)
1157   {
1158      FREE(fpc->r_temp);
1159      FREE(fpc->r_imm);
1160      util_dynarray_fini(&fpc->if_stack);
1161      util_dynarray_fini(&fpc->label_relocs);
1162      util_dynarray_fini(&fpc->imm_data);
1163      //util_dynarray_fini(&fpc->loop_stack);
1164      FREE(fpc);
1165   }
1166
1167   return;
1168
1169out_err:
1170   _debug_printf("Error: failed to compile this fragment program:\n");
1171   tgsi_dump(fp->pipe.tokens, 0);
1172   goto out;
1173}
1174