1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include <stdarg.h>
29
30#include "i915_context.h"
31#include "i915_debug.h"
32#include "i915_debug_private.h"
33#include "i915_fpc.h"
34#include "i915_reg.h"
35
36#include "pipe/p_shader_tokens.h"
37#include "tgsi/tgsi_dump.h"
38#include "tgsi/tgsi_from_mesa.h"
39#include "tgsi/tgsi_info.h"
40#include "tgsi/tgsi_parse.h"
41#include "util/log.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "util/u_string.h"
45
46#include "draw/draw_vertex.h"
47
48#ifndef M_PI
49#define M_PI 3.14159265358979323846
50#endif
51
52/**
53 * Simple pass-through fragment shader to use when we don't have
54 * a real shader (or it fails to compile for some reason).
55 */
56static unsigned passthrough_program[] = {
57   _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
58   /* move to output color:
59    */
60   (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
61    (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
62   ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
63    (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
64    (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
65    (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
66   0};
67
68/**
69 * component-wise negation of ureg
70 */
71static inline int
72negate(int reg, int x, int y, int z, int w)
73{
74   /* Another neat thing about the UREG representation */
75   return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
76                 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
77                 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
78                 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
79}
80
81/**
82 * In the event of a translation failure, we'll generate a simple color
83 * pass-through program.
84 */
85static void
86i915_use_passthrough_shader(struct i915_fragment_shader *fs)
87{
88   fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
89   if (fs->program) {
90      memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
91      fs->program_len = ARRAY_SIZE(passthrough_program);
92   }
93   fs->num_constants = 0;
94}
95
96void
97i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
98{
99   if (p->log_program_errors) {
100      va_list args;
101
102      va_start(args, msg);
103      mesa_loge_v(msg, args);
104      va_end(args);
105   }
106
107   p->error = 1;
108}
109
110static uint32_t
111get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
112            int index)
113{
114   int i;
115   for (i = 0; i < I915_TEX_UNITS; i++) {
116      if (fs->texcoords[i].semantic == -1) {
117         fs->texcoords[i].semantic = semantic;
118         fs->texcoords[i].index = index;
119         return i;
120      }
121      if (fs->texcoords[i].semantic == semantic &&
122          fs->texcoords[i].index == index)
123         return i;
124   }
125   debug_printf("Exceeded max generics\n");
126   return 0;
127}
128
129/**
130 * Construct a ureg for the given source register.  Will emit
131 * constants, apply swizzling and negation as needed.
132 */
133static uint32_t
134src_vector(struct i915_fp_compile *p,
135           const struct i915_full_src_register *source,
136           struct i915_fragment_shader *fs)
137{
138   uint32_t index = source->Register.Index;
139   uint32_t src = 0, sem_name, sem_ind;
140
141   switch (source->Register.File) {
142   case TGSI_FILE_TEMPORARY:
143      if (source->Register.Index >= I915_MAX_TEMPORARY) {
144         i915_program_error(p, "Exceeded max temporary reg");
145         return 0;
146      }
147      src = UREG(REG_TYPE_R, index);
148      break;
149   case TGSI_FILE_INPUT:
150      /* XXX: Packing COL1, FOGC into a single attribute works for
151       * texenv programs, but will fail for real fragment programs
152       * that use these attributes and expect them to be a full 4
153       * components wide.  Could use a texcoord to pass these
154       * attributes if necessary, but that won't work in the general
155       * case.
156       *
157       * We also use a texture coordinate to pass wpos when possible.
158       */
159
160      sem_name = p->shader->info.input_semantic_name[index];
161      sem_ind = p->shader->info.input_semantic_index[index];
162
163      switch (sem_name) {
164      case TGSI_SEMANTIC_GENERIC:
165      case TGSI_SEMANTIC_TEXCOORD:
166      case TGSI_SEMANTIC_PCOORD:
167      case TGSI_SEMANTIC_POSITION: {
168         if (sem_name == TGSI_SEMANTIC_PCOORD)
169            fs->reads_pntc = true;
170
171         int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
172         src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
173                              D0_CHANNEL_ALL);
174         break;
175      }
176      case TGSI_SEMANTIC_COLOR:
177         if (sem_ind == 0) {
178            src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
179         } else {
180            /* secondary color */
181            assert(sem_ind == 1);
182            src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
183            src = swizzle(src, X, Y, Z, ONE);
184         }
185         break;
186      case TGSI_SEMANTIC_FOG:
187         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
188         src = swizzle(src, W, W, W, W);
189         break;
190      case TGSI_SEMANTIC_FACE: {
191         /* for back/front faces */
192         int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
193         src =
194            i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
195         break;
196      }
197      default:
198         i915_program_error(p, "Bad source->Index");
199         return 0;
200      }
201      break;
202
203   case TGSI_FILE_IMMEDIATE: {
204      assert(index < p->num_immediates);
205
206      uint8_t swiz[4] = {
207         source->Register.SwizzleX,
208         source->Register.SwizzleY,
209         source->Register.SwizzleZ,
210         source->Register.SwizzleW
211      };
212
213      uint8_t neg[4] = {
214         source->Register.Negate,
215         source->Register.Negate,
216         source->Register.Negate,
217         source->Register.Negate
218      };
219
220      unsigned i;
221
222      for (i = 0; i < 4; i++) {
223         if (swiz[i] == TGSI_SWIZZLE_ZERO || swiz[i] == TGSI_SWIZZLE_ONE) {
224            continue;
225         } else if (p->immediates[index][swiz[i]] == 0.0) {
226            swiz[i] = TGSI_SWIZZLE_ZERO;
227         } else if (p->immediates[index][swiz[i]] == 1.0) {
228            swiz[i] = TGSI_SWIZZLE_ONE;
229         } else if (p->immediates[index][swiz[i]] == -1.0) {
230            swiz[i] = TGSI_SWIZZLE_ONE;
231            neg[i] ^= 1;
232         } else {
233            break;
234         }
235      }
236
237      if (i == 4) {
238         return negate(swizzle(UREG(REG_TYPE_R, 0),
239                               swiz[0], swiz[1], swiz[2], swiz[3]),
240                       neg[0], neg[1], neg[2], neg[3]);
241      }
242
243      index = p->immediates_map[index];
244      FALLTHROUGH;
245   }
246
247   case TGSI_FILE_CONSTANT:
248      src = UREG(REG_TYPE_CONST, index);
249      break;
250
251   default:
252      i915_program_error(p, "Bad source->File");
253      return 0;
254   }
255
256   src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,
257                 source->Register.SwizzleZ, source->Register.SwizzleW);
258
259   /* No HW abs flag, so we have to max with the negation. */
260   if (source->Register.Absolute) {
261      uint32_t tmp = i915_get_utemp(p);
262      i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,
263                      negate(src, 1, 1, 1, 1), 0);
264      src = tmp;
265   }
266
267   /* There's both negate-all-components and per-component negation.
268    * Try to handle both here.
269    */
270   {
271      int n = source->Register.Negate;
272      src = negate(src, n, n, n, n);
273   }
274
275   return src;
276}
277
278/**
279 * Construct a ureg for a destination register.
280 */
281static uint32_t
282get_result_vector(struct i915_fp_compile *p,
283                  const struct i915_full_dst_register *dest)
284{
285   switch (dest->Register.File) {
286   case TGSI_FILE_OUTPUT: {
287      uint32_t sem_name =
288         p->shader->info.output_semantic_name[dest->Register.Index];
289      switch (sem_name) {
290      case TGSI_SEMANTIC_POSITION:
291         return UREG(REG_TYPE_OD, 0);
292      case TGSI_SEMANTIC_COLOR:
293         return UREG(REG_TYPE_OC, 0);
294      default:
295         i915_program_error(p, "Bad inst->DstReg.Index/semantics");
296         return 0;
297      }
298   }
299   case TGSI_FILE_TEMPORARY:
300      return UREG(REG_TYPE_R, dest->Register.Index);
301   default:
302      i915_program_error(p, "Bad inst->DstReg.File");
303      return 0;
304   }
305}
306
307/**
308 * Compute flags for saturation and writemask.
309 */
310static uint32_t
311get_result_flags(const struct i915_full_instruction *inst)
312{
313   const uint32_t writeMask = inst->Dst[0].Register.WriteMask;
314   uint32_t flags = 0x0;
315
316   if (inst->Instruction.Saturate)
317      flags |= A0_DEST_SATURATE;
318
319   if (writeMask & TGSI_WRITEMASK_X)
320      flags |= A0_DEST_CHANNEL_X;
321   if (writeMask & TGSI_WRITEMASK_Y)
322      flags |= A0_DEST_CHANNEL_Y;
323   if (writeMask & TGSI_WRITEMASK_Z)
324      flags |= A0_DEST_CHANNEL_Z;
325   if (writeMask & TGSI_WRITEMASK_W)
326      flags |= A0_DEST_CHANNEL_W;
327
328   return flags;
329}
330
331/**
332 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
333 */
334static uint32_t
335translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)
336{
337   switch (tex) {
338   case TGSI_TEXTURE_SHADOW1D:
339      FALLTHROUGH;
340   case TGSI_TEXTURE_1D:
341      return D0_SAMPLE_TYPE_2D;
342
343   case TGSI_TEXTURE_SHADOW2D:
344      FALLTHROUGH;
345   case TGSI_TEXTURE_2D:
346      return D0_SAMPLE_TYPE_2D;
347
348   case TGSI_TEXTURE_SHADOWRECT:
349      FALLTHROUGH;
350   case TGSI_TEXTURE_RECT:
351      return D0_SAMPLE_TYPE_2D;
352
353   case TGSI_TEXTURE_3D:
354      return D0_SAMPLE_TYPE_VOLUME;
355
356   case TGSI_TEXTURE_CUBE:
357      return D0_SAMPLE_TYPE_CUBE;
358
359   default:
360      i915_program_error(p, "TexSrc type");
361      return 0;
362   }
363}
364
365/**
366 * Return the number of coords needed to access a given TGSI_TEXTURE_*
367 */
368uint32_t
369i915_coord_mask(enum tgsi_opcode opcode, enum tgsi_texture_type tex)
370{
371   uint32_t coord_mask = 0;
372
373   if (opcode == TGSI_OPCODE_TXP || opcode == TGSI_OPCODE_TXB)
374      coord_mask |= TGSI_WRITEMASK_W;
375
376   switch (tex) {
377   case TGSI_TEXTURE_1D: /* See the 1D coord swizzle below. */
378   case TGSI_TEXTURE_2D:
379   case TGSI_TEXTURE_RECT:
380      return coord_mask | TGSI_WRITEMASK_XY;
381
382   case TGSI_TEXTURE_SHADOW1D:
383   case TGSI_TEXTURE_SHADOW2D:
384   case TGSI_TEXTURE_SHADOWRECT:
385   case TGSI_TEXTURE_3D:
386   case TGSI_TEXTURE_CUBE:
387      return coord_mask | TGSI_WRITEMASK_XYZ;
388
389   default:
390      unreachable("bad texture target");
391   }
392}
393
394/**
395 * Generate texel lookup instruction.
396 */
397static void
398emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,
399         uint32_t opcode, struct i915_fragment_shader *fs)
400{
401   uint32_t texture = inst->Texture.Texture;
402   uint32_t unit = inst->Src[1].Register.Index;
403   uint32_t tex = translate_tex_src_target(p, texture);
404   uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
405   uint32_t coord = src_vector(p, &inst->Src[0], fs);
406
407   /* For 1D textures, set the Y coord to the same as X.  Otherwise, we could
408    * select the wrong LOD based on the uninitialized Y coord when we sample our
409    * 1D textures as 2D.
410    */
411   if (texture == TGSI_TEXTURE_1D || texture == TGSI_TEXTURE_SHADOW1D)
412      coord = swizzle(coord, X, X, Z, W);
413
414   i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),
415                   get_result_flags(inst), sampler, coord, opcode,
416                   i915_coord_mask(inst->Instruction.Opcode, texture));
417}
418
419/**
420 * Generate a simple arithmetic instruction
421 * \param opcode  the i915 opcode
422 * \param numArgs  the number of input/src arguments
423 */
424static void
425emit_simple_arith(struct i915_fp_compile *p,
426                  const struct i915_full_instruction *inst, uint32_t opcode,
427                  uint32_t numArgs, struct i915_fragment_shader *fs)
428{
429   uint32_t arg1, arg2, arg3;
430
431   assert(numArgs <= 3);
432
433   arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);
434   arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);
435   arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);
436
437   i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),
438                   get_result_flags(inst), 0, arg1, arg2, arg3);
439}
440
441/** As above, but swap the first two src regs */
442static void
443emit_simple_arith_swap2(struct i915_fp_compile *p,
444                        const struct i915_full_instruction *inst,
445                        uint32_t opcode, uint32_t numArgs,
446                        struct i915_fragment_shader *fs)
447{
448   struct i915_full_instruction inst2;
449
450   assert(numArgs == 2);
451
452   /* transpose first two registers */
453   inst2 = *inst;
454   inst2.Src[0] = inst->Src[1];
455   inst2.Src[1] = inst->Src[0];
456
457   emit_simple_arith(p, &inst2, opcode, numArgs, fs);
458}
459
460/*
461 * Translate TGSI instruction to i915 instruction.
462 *
463 * Possible concerns:
464 *
465 * DDX, DDY -- return 0
466 * SIN, COS -- could use another taylor step?
467 * LIT      -- results seem a little different to sw mesa
468 * LOG      -- different to mesa on negative numbers, but this is conformant.
469 */
470static void
471i915_translate_instruction(struct i915_fp_compile *p,
472                           const struct i915_full_instruction *inst,
473                           struct i915_fragment_shader *fs)
474{
475   uint32_t src0, src1, src2, flags;
476   uint32_t tmp = 0;
477
478   switch (inst->Instruction.Opcode) {
479   case TGSI_OPCODE_ADD:
480      emit_simple_arith(p, inst, A0_ADD, 2, fs);
481      break;
482
483   case TGSI_OPCODE_CEIL:
484      src0 = src_vector(p, &inst->Src[0], fs);
485      tmp = i915_get_utemp(p);
486      flags = get_result_flags(inst);
487      i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,
488                      negate(src0, 1, 1, 1, 1), 0, 0);
489      i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,
490                      negate(tmp, 1, 1, 1, 1), 0, 0);
491      break;
492
493   case TGSI_OPCODE_CMP:
494      src0 = src_vector(p, &inst->Src[0], fs);
495      src1 = src_vector(p, &inst->Src[1], fs);
496      src2 = src_vector(p, &inst->Src[2], fs);
497      i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),
498                      get_result_flags(inst), 0, src0, src2,
499                      src1); /* NOTE: order of src2, src1 */
500      break;
501
502   case TGSI_OPCODE_DDX:
503   case TGSI_OPCODE_DDY:
504      /* XXX We just output 0 here */
505      debug_printf("Punting DDX/DDY\n");
506      src0 = get_result_vector(p, &inst->Dst[0]);
507      i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),
508                      get_result_flags(inst), 0,
509                      swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
510      break;
511
512   case TGSI_OPCODE_DP2:
513      src0 = src_vector(p, &inst->Src[0], fs);
514      src1 = src_vector(p, &inst->Src[1], fs);
515
516      i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),
517                      get_result_flags(inst), 0,
518                      swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
519      break;
520
521   case TGSI_OPCODE_DP3:
522      emit_simple_arith(p, inst, A0_DP3, 2, fs);
523      break;
524
525   case TGSI_OPCODE_DP4:
526      emit_simple_arith(p, inst, A0_DP4, 2, fs);
527      break;
528
529   case TGSI_OPCODE_DST:
530      src0 = src_vector(p, &inst->Src[0], fs);
531      src1 = src_vector(p, &inst->Src[1], fs);
532
533      /* result[0] = 1    * 1;
534       * result[1] = a[1] * b[1];
535       * result[2] = a[2] * 1;
536       * result[3] = 1    * b[3];
537       */
538      i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
539                      get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),
540                      swizzle(src1, ONE, Y, ONE, W), 0);
541      break;
542
543   case TGSI_OPCODE_END:
544      /* no-op */
545      break;
546
547   case TGSI_OPCODE_EX2:
548      src0 = src_vector(p, &inst->Src[0], fs);
549
550      i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),
551                      get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
552                      0);
553      break;
554
555   case TGSI_OPCODE_FLR:
556      emit_simple_arith(p, inst, A0_FLR, 1, fs);
557      break;
558
559   case TGSI_OPCODE_FRC:
560      emit_simple_arith(p, inst, A0_FRC, 1, fs);
561      break;
562
563   case TGSI_OPCODE_KILL_IF:
564      /* kill if src[0].x < 0 || src[0].y < 0 ... */
565      src0 = src_vector(p, &inst->Src[0], fs);
566      tmp = i915_get_utemp(p);
567
568      i915_emit_texld(p, tmp,              /* dest reg: a dummy reg */
569                      A0_DEST_CHANNEL_ALL, /* dest writemask */
570                      0,                   /* sampler */
571                      src0,                /* coord*/
572                      T0_TEXKILL,          /* opcode */
573                      TGSI_WRITEMASK_XYZW);/* coord_mask */
574      break;
575
576   case TGSI_OPCODE_KILL:
577      /* unconditional kill */
578      tmp = i915_get_utemp(p);
579
580      i915_emit_texld(p, tmp,              /* dest reg: a dummy reg */
581                      A0_DEST_CHANNEL_ALL, /* dest writemask */
582                      0,                   /* sampler */
583                      negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
584                             1, 1, 1, 1), /* coord */
585                      T0_TEXKILL,         /* opcode */
586                      TGSI_WRITEMASK_X);  /* coord_mask */
587      break;
588
589   case TGSI_OPCODE_LG2:
590      src0 = src_vector(p, &inst->Src[0], fs);
591
592      i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),
593                      get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
594                      0);
595      break;
596
597   case TGSI_OPCODE_LIT:
598      src0 = src_vector(p, &inst->Src[0], fs);
599      tmp = i915_get_utemp(p);
600
601      /* tmp = max( a.xyzw, a.00zw )
602       * XXX: Clamp tmp.w to -128..128
603       * tmp.y = log(tmp.y)
604       * tmp.y = tmp.w * tmp.y
605       * tmp.y = exp(tmp.y)
606       * result = cmp (a.11-x1, a.1x01, a.1xy1 )
607       */
608      i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
609                      swizzle(src0, ZERO, ZERO, Z, W), 0);
610
611      i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
612                      swizzle(tmp, Y, Y, Y, Y), 0, 0);
613
614      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
615                      swizzle(tmp, ZERO, Y, ZERO, ZERO),
616                      swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
617
618      i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
619                      swizzle(tmp, Y, Y, Y, Y), 0, 0);
620
621      i915_emit_arith(
622         p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
623         0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
624         swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));
625
626      break;
627
628   case TGSI_OPCODE_LRP:
629      src0 = src_vector(p, &inst->Src[0], fs);
630      src1 = src_vector(p, &inst->Src[1], fs);
631      src2 = src_vector(p, &inst->Src[2], fs);
632      flags = get_result_flags(inst);
633      tmp = i915_get_utemp(p);
634
635      /* b*a + c*(1-a)
636       *
637       * b*a + c - ca
638       *
639       * tmp = b*a + c,
640       * result = (-c)*a + tmp
641       */
642      i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,
643                      src0, src2);
644
645      i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,
646                      negate(src2, 1, 1, 1, 1), src0, tmp);
647      break;
648
649   case TGSI_OPCODE_MAD:
650      emit_simple_arith(p, inst, A0_MAD, 3, fs);
651      break;
652
653   case TGSI_OPCODE_MAX:
654      emit_simple_arith(p, inst, A0_MAX, 2, fs);
655      break;
656
657   case TGSI_OPCODE_MIN:
658      emit_simple_arith(p, inst, A0_MIN, 2, fs);
659      break;
660
661   case TGSI_OPCODE_MOV:
662      emit_simple_arith(p, inst, A0_MOV, 1, fs);
663      break;
664
665   case TGSI_OPCODE_MUL:
666      emit_simple_arith(p, inst, A0_MUL, 2, fs);
667      break;
668
669   case TGSI_OPCODE_NOP:
670      break;
671
672   case TGSI_OPCODE_POW:
673      src0 = src_vector(p, &inst->Src[0], fs);
674      src1 = src_vector(p, &inst->Src[1], fs);
675      tmp = i915_get_utemp(p);
676      flags = get_result_flags(inst);
677
678      /* XXX: masking on intermediate values, here and elsewhere.
679       */
680      i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
681                      swizzle(src0, X, X, X, X), 0, 0);
682
683      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
684
685      i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,
686                      swizzle(tmp, X, X, X, X), 0, 0);
687      break;
688
689   case TGSI_OPCODE_RET:
690      /* XXX: no-op? */
691      break;
692
693   case TGSI_OPCODE_RCP:
694      src0 = src_vector(p, &inst->Src[0], fs);
695
696      i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),
697                      get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
698                      0);
699      break;
700
701   case TGSI_OPCODE_RSQ:
702      src0 = src_vector(p, &inst->Src[0], fs);
703
704      i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),
705                      get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
706                      0);
707      break;
708
709   case TGSI_OPCODE_SEQ: {
710      const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
711                                    SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
712
713      /* if we're both >= and <= then we're == */
714      src0 = src_vector(p, &inst->Src[0], fs);
715      src1 = src_vector(p, &inst->Src[1], fs);
716      tmp = i915_get_utemp(p);
717
718      if (src0 == zero || src1 == zero) {
719         if (src0 == zero)
720            src0 = src1;
721
722         /* x == 0 is equivalent to -abs(x) >= 0, but the latter requires only
723          * two instructions instead of three.
724          */
725         i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
726                         negate(src0, 1, 1, 1, 1), 0);
727         i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
728                         get_result_flags(inst), 0,
729                         negate(tmp, 1, 1, 1, 1), zero, 0);
730      } else {
731         i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
732
733         i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
734                         get_result_flags(inst), 0, src1, src0, 0);
735
736         i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
737                         get_result_flags(inst), 0,
738                         get_result_vector(p, &inst->Dst[0]), tmp, 0);
739      }
740
741      break;
742   }
743
744   case TGSI_OPCODE_SGE:
745      emit_simple_arith(p, inst, A0_SGE, 2, fs);
746      break;
747
748   case TGSI_OPCODE_SLE:
749      /* like SGE, but swap reg0, reg1 */
750      emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
751      break;
752
753   case TGSI_OPCODE_SLT:
754      emit_simple_arith(p, inst, A0_SLT, 2, fs);
755      break;
756
757   case TGSI_OPCODE_SGT:
758      /* like SLT, but swap reg0, reg1 */
759      emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
760      break;
761
762   case TGSI_OPCODE_SNE: {
763      const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
764                                    SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
765
766      /* if we're < or > then we're != */
767      src0 = src_vector(p, &inst->Src[0], fs);
768      src1 = src_vector(p, &inst->Src[1], fs);
769      tmp = i915_get_utemp(p);
770
771      if (src0 == zero || src1 == zero) {
772         if (src0 == zero)
773            src0 = src1;
774
775         /* x != 0 is equivalent to -abs(x) < 0, but the latter requires only
776          * two instructions instead of three.
777          */
778         i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
779                         negate(src0, 1, 1, 1, 1), 0);
780         i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
781                         get_result_flags(inst), 0,
782                         negate(tmp, 1, 1, 1, 1), zero, 0);
783      } else {
784         i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
785
786         i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
787                         get_result_flags(inst), 0, src1, src0, 0);
788
789         i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
790                         get_result_flags(inst), 0,
791                         get_result_vector(p, &inst->Dst[0]), tmp, 0);
792      }
793      break;
794   }
795
796   case TGSI_OPCODE_SSG:
797      /* compute (src>0) - (src<0) */
798      src0 = src_vector(p, &inst->Src[0], fs);
799      tmp = i915_get_utemp(p);
800
801      i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
802                      swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
803
804      i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
805                      get_result_flags(inst), 0,
806                      swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);
807
808      i915_emit_arith(
809         p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
810         0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);
811      break;
812
813   case TGSI_OPCODE_TEX:
814      emit_tex(p, inst, T0_TEXLD, fs);
815      break;
816
817   case TGSI_OPCODE_TRUNC:
818      emit_simple_arith(p, inst, A0_TRC, 1, fs);
819      break;
820
821   case TGSI_OPCODE_TXB:
822      emit_tex(p, inst, T0_TEXLDB, fs);
823      break;
824
825   case TGSI_OPCODE_TXP:
826      emit_tex(p, inst, T0_TEXLDP, fs);
827      break;
828
829   default:
830      i915_program_error(p, "bad opcode %s (%d)",
831                         tgsi_get_opcode_name(inst->Instruction.Opcode),
832                         inst->Instruction.Opcode);
833      return;
834   }
835
836   i915_release_utemps(p);
837}
838
839static void
840i915_translate_token(struct i915_fp_compile *p,
841                     const union i915_full_token *token,
842                     struct i915_fragment_shader *fs)
843{
844   struct i915_fragment_shader *ifs = p->shader;
845   switch (token->Token.Type) {
846   case TGSI_TOKEN_TYPE_PROPERTY:
847      /* Ignore properties where we only support one value. */
848      assert(token->FullProperty.Property.PropertyName ==
849                TGSI_PROPERTY_FS_COORD_ORIGIN ||
850             token->FullProperty.Property.PropertyName ==
851                TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||
852             token->FullProperty.Property.PropertyName ==
853                TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS ||
854             token->FullProperty.Property.PropertyName ==
855                TGSI_PROPERTY_SEPARABLE_PROGRAM);
856      break;
857
858   case TGSI_TOKEN_TYPE_DECLARATION:
859      if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {
860         if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {
861            i915_program_error(p, "Exceeded %d max uniforms",
862                               I915_MAX_CONSTANT);
863         } else {
864            uint32_t i;
865            for (i = token->FullDeclaration.Range.First;
866                 i <= token->FullDeclaration.Range.Last; i++) {
867               ifs->constant_flags[i] = I915_CONSTFLAG_USER;
868               ifs->num_constants = MAX2(ifs->num_constants, i + 1);
869            }
870         }
871      } else if (token->FullDeclaration.Declaration.File ==
872                 TGSI_FILE_TEMPORARY) {
873         if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {
874            i915_program_error(p, "Exceeded %d max TGSI temps",
875                               I915_MAX_TEMPORARY);
876         } else {
877            uint32_t i;
878            for (i = token->FullDeclaration.Range.First;
879                 i <= token->FullDeclaration.Range.Last; i++) {
880               /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
881               p->temp_flag |= (1 << i); /* mark temp as used */
882            }
883         }
884      }
885      break;
886
887   case TGSI_TOKEN_TYPE_IMMEDIATE: {
888      const struct tgsi_full_immediate *imm = &token->FullImmediate;
889      const uint32_t pos = p->num_immediates++;
890      uint32_t j;
891      assert(imm->Immediate.NrTokens <= 4 + 1);
892      for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
893         p->immediates[pos][j] = imm->u[j].Float;
894      }
895   } break;
896
897   case TGSI_TOKEN_TYPE_INSTRUCTION:
898      if (p->first_instruction) {
899         /* resolve location of immediates */
900         uint32_t i, j;
901         for (i = 0; i < p->num_immediates; i++) {
902            /* find constant slot for this immediate */
903            for (j = 0; j < I915_MAX_CONSTANT; j++) {
904               if (ifs->constant_flags[j] == 0x0) {
905                  memcpy(ifs->constants[j], p->immediates[i],
906                         4 * sizeof(float));
907                  /*printf("immediate %d maps to const %d\n", i, j);*/
908                  ifs->constant_flags[j] = 0xf; /* all four comps used */
909                  p->immediates_map[i] = j;
910                  ifs->num_constants = MAX2(ifs->num_constants, j + 1);
911                  break;
912               }
913            }
914            if (j == I915_MAX_CONSTANT) {
915               i915_program_error(p, "Exceeded %d max uniforms and immediates.",
916                                  I915_MAX_CONSTANT);
917            }
918         }
919
920         p->first_instruction = false;
921      }
922
923      i915_translate_instruction(p, &token->FullInstruction, fs);
924      break;
925
926   default:
927      assert(0);
928   }
929}
930
931/**
932 * Translate TGSI fragment shader into i915 hardware instructions.
933 * \param p  the translation state
934 * \param tokens  the TGSI token array
935 */
936static void
937i915_translate_instructions(struct i915_fp_compile *p,
938                            const struct i915_token_list *tokens,
939                            struct i915_fragment_shader *fs)
940{
941   int i;
942   for (i = 0; i < tokens->NumTokens && !p->error; i++) {
943      i915_translate_token(p, &tokens->Tokens[i], fs);
944   }
945}
946
947static struct i915_fp_compile *
948i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs)
949{
950   struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
951   int i;
952
953   p->shader = ifs;
954
955   /* Put new constants at end of const buffer, growing downward.
956    * The problem is we don't know how many user-defined constants might
957    * be specified with pipe->set_constant_buffer().
958    * Should pre-scan the user's program to determine the highest-numbered
959    * constant referenced.
960    */
961   ifs->num_constants = 0;
962   memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
963
964   memset(&p->register_phases, 0, sizeof(p->register_phases));
965
966   for (i = 0; i < I915_TEX_UNITS; i++)
967      ifs->texcoords[i].semantic = -1;
968
969   p->log_program_errors = !i915->no_log_program_errors;
970
971   p->first_instruction = true;
972
973   p->nr_tex_indirect = 1; /* correct? */
974   p->nr_tex_insn = 0;
975   p->nr_alu_insn = 0;
976   p->nr_decl_insn = 0;
977
978   p->csr = p->program;
979   p->decl = p->declarations;
980   p->decl_s = 0;
981   p->decl_t = 0;
982   p->temp_flag = ~0x0U << I915_MAX_TEMPORARY;
983   p->utemp_flag = ~0x7;
984
985   /* initialize the first program word */
986   *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
987
988   return p;
989}
990
991/* Copy compile results to the fragment program struct and destroy the
992 * compilation context.
993 */
994static void
995i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
996{
997   struct i915_fragment_shader *ifs = p->shader;
998   unsigned long program_size = (unsigned long)(p->csr - p->program);
999   unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
1000
1001   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
1002      debug_printf("Exceeded max nr indirect texture lookups\n");
1003
1004   if (p->nr_tex_insn > I915_MAX_TEX_INSN)
1005      i915_program_error(p, "Exceeded max TEX instructions");
1006
1007   if (p->nr_alu_insn > I915_MAX_ALU_INSN)
1008      i915_program_error(p, "Exceeded max ALU instructions");
1009
1010   if (p->nr_decl_insn > I915_MAX_DECL_INSN)
1011      i915_program_error(p, "Exceeded max DECL instructions");
1012
1013   /* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
1014    * TGSI_END), even when the depth write fixup gets emitted below - maybe that
1015    * one is fishy, too?
1016    */
1017   if (ifs->info.num_instructions == 1)
1018      i915_program_error(p, "Empty fragment shader");
1019
1020   if (p->error) {
1021      p->NumNativeInstructions = 0;
1022      p->NumNativeAluInstructions = 0;
1023      p->NumNativeTexInstructions = 0;
1024      p->NumNativeTexIndirections = 0;
1025
1026      i915_use_passthrough_shader(ifs);
1027   } else {
1028      p->NumNativeInstructions =
1029         p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
1030      p->NumNativeAluInstructions = p->nr_alu_insn;
1031      p->NumNativeTexInstructions = p->nr_tex_insn;
1032      p->NumNativeTexIndirections = p->nr_tex_indirect;
1033
1034      /* patch in the program length */
1035      p->declarations[0] |= program_size + decl_size - 2;
1036
1037      /* Copy compilation results to fragment program struct:
1038       */
1039      assert(!ifs->program);
1040
1041      ifs->program_len = decl_size + program_size;
1042      ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
1043      memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
1044      memcpy(&ifs->program[decl_size], p->program,
1045             program_size * sizeof(uint32_t));
1046
1047      util_debug_message(
1048         &i915->debug, SHADER_INFO,
1049         "%s shader: %d inst, %d tex, %d tex_indirect, %d temps, %d const",
1050         _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT), (int)program_size,
1051         p->nr_tex_insn, p->nr_tex_indirect,
1052         p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1,
1053         ifs->num_constants);
1054   }
1055
1056   /* Release the compilation struct:
1057    */
1058   FREE(p);
1059}
1060
1061/**
1062 * Rather than trying to intercept and jiggle depth writes during
1063 * emit, just move the value into its correct position at the end of
1064 * the program:
1065 */
1066static void
1067i915_fixup_depth_write(struct i915_fp_compile *p)
1068{
1069   for (int i = 0; i < p->shader->info.num_outputs; i++) {
1070      if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
1071         continue;
1072
1073      const uint32_t depth = UREG(REG_TYPE_OD, 0);
1074
1075      i915_emit_arith(p, A0_MOV,                  /* opcode */
1076                      depth,                      /* dest reg */
1077                      A0_DEST_CHANNEL_W,          /* write mask */
1078                      0,                          /* saturate? */
1079                      swizzle(depth, X, Y, Z, Z), /* src0 */
1080                      0, 0 /* src1, src2 */);
1081   }
1082}
1083
1084void
1085i915_translate_fragment_program(struct i915_context *i915,
1086                                struct i915_fragment_shader *fs)
1087{
1088   struct i915_fp_compile *p;
1089   const struct tgsi_token *tokens = fs->state.tokens;
1090   struct i915_token_list *i_tokens;
1091
1092   if (I915_DBG_ON(DBG_FS)) {
1093      mesa_logi("TGSI fragment shader:");
1094      tgsi_dump(tokens, 0);
1095   }
1096
1097   p = i915_init_compile(i915, fs);
1098
1099   i_tokens = i915_optimize(tokens);
1100   i915_translate_instructions(p, i_tokens, fs);
1101   i915_fixup_depth_write(p);
1102
1103   i915_fini_compile(i915, p);
1104   i915_optimize_free(i_tokens);
1105
1106   if (I915_DBG_ON(DBG_FS)) {
1107      mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,
1108                fs->num_constants ? ":" : "");
1109
1110      for (int i = 0; i < I915_MAX_CONSTANT; i++) {
1111         if (fs->constant_flags[i] &&
1112             fs->constant_flags[i] != I915_CONSTFLAG_USER) {
1113            mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],
1114                      fs->constants[i][1], fs->constants[i][2],
1115                      fs->constants[i][3]);
1116         }
1117      }
1118      i915_disassemble_program(fs->program, fs->program_len);
1119   }
1120}
1121