1/*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26#include "compiler/nir/nir.h"
27#include "compiler/nir/nir_builder.h"
28#include "compiler/glsl/list.h"
29
30#include "main/mtypes.h"
31#include "main/shader_types.h"
32#include "util/ralloc.h"
33
34#include "prog_to_nir.h"
35#include "prog_instruction.h"
36#include "prog_parameter.h"
37#include "prog_print.h"
38#include "program.h"
39
40/**
41 * \file prog_to_nir.c
42 *
43 * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
44 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
45 * vertex processing.  Full GLSL support should use glsl_to_nir instead.
46 */
47
48struct ptn_compile {
49   const struct gl_context *ctx;
50   const struct gl_program *prog;
51   nir_builder build;
52   bool error;
53
54   nir_variable *parameters;
55   nir_variable *input_vars[VARYING_SLOT_MAX];
56   nir_variable *output_vars[VARYING_SLOT_MAX];
57   nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
58   nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
59   nir_register **output_regs;
60   nir_register **temp_regs;
61
62   nir_register *addr_reg;
63};
64
65#define SWIZ(X, Y, Z, W) \
66   (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
67#define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
68
69static nir_ssa_def *
70ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
71{
72   nir_builder *b = &c->build;
73
74   nir_alu_src src;
75   memset(&src, 0, sizeof(src));
76
77   if (dest->dest.is_ssa)
78      src.src = nir_src_for_ssa(&dest->dest.ssa);
79   else {
80      assert(!dest->dest.reg.indirect);
81      src.src = nir_src_for_reg(dest->dest.reg.reg);
82      src.src.reg.base_offset = dest->dest.reg.base_offset;
83   }
84
85   for (int i = 0; i < 4; i++)
86      src.swizzle[i] = i;
87
88   return nir_mov_alu(b, src, 4);
89}
90
91static nir_alu_dest
92ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
93{
94   nir_alu_dest dest;
95
96   memset(&dest, 0, sizeof(dest));
97
98   switch (prog_dst->File) {
99   case PROGRAM_TEMPORARY:
100      dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
101      break;
102   case PROGRAM_OUTPUT:
103      dest.dest.reg.reg = c->output_regs[prog_dst->Index];
104      break;
105   case PROGRAM_ADDRESS:
106      assert(prog_dst->Index == 0);
107      dest.dest.reg.reg = c->addr_reg;
108      break;
109   case PROGRAM_UNDEFINED:
110      break;
111   }
112
113   dest.write_mask = prog_dst->WriteMask;
114   dest.saturate = false;
115
116   assert(!prog_dst->RelAddr);
117
118   return dest;
119}
120
121static nir_ssa_def *
122ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
123{
124   nir_builder *b = &c->build;
125   nir_alu_src src;
126
127   memset(&src, 0, sizeof(src));
128
129   switch (prog_src->File) {
130   case PROGRAM_UNDEFINED:
131      return nir_imm_float(b, 0.0);
132   case PROGRAM_TEMPORARY:
133      assert(!prog_src->RelAddr && prog_src->Index >= 0);
134      src.src.reg.reg = c->temp_regs[prog_src->Index];
135      break;
136   case PROGRAM_INPUT: {
137      /* ARB_vertex_program doesn't allow relative addressing on vertex
138       * attributes; ARB_fragment_program has no relative addressing at all.
139       */
140      assert(!prog_src->RelAddr);
141
142      assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
143
144      nir_variable *var = c->input_vars[prog_src->Index];
145      src.src = nir_src_for_ssa(nir_load_var(b, var));
146      break;
147   }
148   case PROGRAM_SYSTEM_VALUE: {
149      assert(!prog_src->RelAddr);
150
151      assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
152
153      nir_variable *var = c->sysval_vars[prog_src->Index];
154      src.src = nir_src_for_ssa(nir_load_var(b, var));
155      break;
156   }
157   case PROGRAM_STATE_VAR:
158   case PROGRAM_CONSTANT: {
159      /* We actually want to look at the type in the Parameters list for this,
160       * because it lets us upload constant builtin uniforms as actual
161       * constants.
162       */
163      struct gl_program_parameter_list *plist = c->prog->Parameters;
164      gl_register_file file = prog_src->RelAddr ? prog_src->File :
165         plist->Parameters[prog_src->Index].Type;
166
167      switch (file) {
168      case PROGRAM_CONSTANT:
169         if ((c->prog->arb.IndirectRegisterFiles &
170              (1 << PROGRAM_CONSTANT)) == 0) {
171            unsigned pvo = plist->Parameters[prog_src->Index].ValueOffset;
172            float *v = (float *) plist->ParameterValues + pvo;
173            src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
174            break;
175         }
176         FALLTHROUGH;
177      case PROGRAM_STATE_VAR: {
178         assert(c->parameters != NULL);
179
180         nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
181
182         nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
183         if (prog_src->RelAddr)
184            index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
185         deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
186
187         src.src = nir_src_for_ssa(nir_load_deref(b, deref));
188         break;
189      }
190      default:
191         fprintf(stderr, "bad uniform src register file: %s (%d)\n",
192                 _mesa_register_file_name(file), file);
193         abort();
194      }
195      break;
196   }
197   default:
198      fprintf(stderr, "unknown src register file: %s (%d)\n",
199              _mesa_register_file_name(prog_src->File), prog_src->File);
200      abort();
201   }
202
203   nir_ssa_def *def;
204   if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
205       (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
206      /* The simple non-SWZ case. */
207      for (int i = 0; i < 4; i++)
208         src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
209
210      def = nir_mov_alu(b, src, 4);
211
212      if (prog_src->Negate)
213         def = nir_fneg(b, def);
214   } else {
215      /* The SWZ instruction allows per-component zero/one swizzles, and also
216       * per-component negation.
217       */
218      nir_ssa_def *chans[4];
219      for (int i = 0; i < 4; i++) {
220         int swizzle = GET_SWZ(prog_src->Swizzle, i);
221         if (swizzle == SWIZZLE_ZERO) {
222            chans[i] = nir_imm_float(b, 0.0);
223         } else if (swizzle == SWIZZLE_ONE) {
224            chans[i] = nir_imm_float(b, 1.0);
225         } else {
226            assert(swizzle != SWIZZLE_NIL);
227            nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
228            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
229            mov->dest.write_mask = 0x1;
230            mov->src[0] = src;
231            mov->src[0].swizzle[0] = swizzle;
232            nir_builder_instr_insert(b, &mov->instr);
233
234            chans[i] = &mov->dest.dest.ssa;
235         }
236
237         if (prog_src->Negate & (1 << i))
238            chans[i] = nir_fneg(b, chans[i]);
239      }
240      def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
241   }
242
243   return def;
244}
245
246static void
247ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
248{
249   unsigned num_srcs = nir_op_infos[op].num_inputs;
250   nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
251   unsigned i;
252
253   for (i = 0; i < num_srcs; i++)
254      instr->src[i].src = nir_src_for_ssa(src[i]);
255
256   instr->dest = dest;
257   nir_builder_instr_insert(b, &instr->instr);
258}
259
260static void
261ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
262                     nir_ssa_def *def, unsigned write_mask)
263{
264   if (!(dest.write_mask & write_mask))
265      return;
266
267   nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
268   if (!mov)
269      return;
270
271   mov->dest = dest;
272   mov->dest.write_mask &= write_mask;
273   mov->src[0].src = nir_src_for_ssa(def);
274   for (unsigned i = def->num_components; i < 4; i++)
275      mov->src[0].swizzle[i] = def->num_components - 1;
276   nir_builder_instr_insert(b, &mov->instr);
277}
278
279static void
280ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
281{
282   ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
283}
284
285static void
286ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
287{
288   ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
289}
290
291/* EXP - Approximate Exponential Base 2
292 *  dst.x = 2^{\lfloor src.x\rfloor}
293 *  dst.y = src.x - \lfloor src.x\rfloor
294 *  dst.z = 2^{src.x}
295 *  dst.w = 1.0
296 */
297static void
298ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
299{
300   nir_ssa_def *srcx = ptn_channel(b, src[0], X);
301
302   ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
303   ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
304   ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
305   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
306}
307
308/* LOG - Approximate Logarithm Base 2
309 *  dst.x = \lfloor\log_2{|src.x|}\rfloor
310 *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
311 *  dst.z = \log_2{|src.x|}
312 *  dst.w = 1.0
313 */
314static void
315ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
316{
317   nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
318   nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
319   nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
320
321   ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
322   ptn_move_dest_masked(b, dest,
323                        nir_fmul(b, abs_srcx,
324                                 nir_fexp2(b, nir_fneg(b, floor_log2))),
325                        WRITEMASK_Y);
326   ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
327   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
328}
329
330/* DST - Distance Vector
331 *   dst.x = 1.0
332 *   dst.y = src0.y \times src1.y
333 *   dst.z = src0.z
334 *   dst.w = src1.w
335 */
336static void
337ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
338{
339   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
340   ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
341   ptn_move_dest_masked(b, dest, nir_mov(b, src[0]), WRITEMASK_Z);
342   ptn_move_dest_masked(b, dest, nir_mov(b, src[1]), WRITEMASK_W);
343}
344
345/* LIT - Light Coefficients
346 *  dst.x = 1.0
347 *  dst.y = max(src.x, 0.0)
348 *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
349 *  dst.w = 1.0
350 */
351static void
352ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
353{
354   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
355
356   ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
357                                          nir_imm_float(b, 0.0)), WRITEMASK_Y);
358
359   if (dest.write_mask & WRITEMASK_Z) {
360      nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
361      nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
362                                                 nir_imm_float(b, 128.0)),
363                                     nir_imm_float(b, -128.0));
364      nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
365                                  wclamp);
366
367      nir_ssa_def *z = nir_bcsel(b,
368                                 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
369                                 nir_imm_float(b, 0.0),
370                                 pow);
371
372      ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
373   }
374}
375
376/* SCS - Sine Cosine
377 *   dst.x = \cos{src.x}
378 *   dst.y = \sin{src.x}
379 *   dst.z = 0.0
380 *   dst.w = 1.0
381 */
382static void
383ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
384{
385   ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
386                        WRITEMASK_X);
387   ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
388                        WRITEMASK_Y);
389   ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
390   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
391}
392
393static void
394ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
395{
396   ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
397}
398
399static void
400ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
401{
402   ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
403}
404
405static void
406ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
407{
408   ptn_move_dest_masked(b, dest,
409                        nir_fsub(b,
410                                 nir_fmul(b,
411                                          nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3),
412                                          nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)),
413                                 nir_fmul(b,
414                                          nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3),
415                                          nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3))),
416                        WRITEMASK_XYZ);
417   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
418}
419
420static void
421ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
422{
423   ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
424}
425
426static void
427ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
428{
429   ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
430}
431
432static void
433ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
434{
435   ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
436}
437
438static void
439ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
440{
441   ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
442}
443
444static void
445ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
446{
447   ptn_move_dest(b, dest, nir_bcsel(b,
448                                    nir_flt(b, src[0], nir_imm_float(b, 0.0)),
449                                    src[1], src[2]));
450}
451
452static void
453ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
454{
455   ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
456}
457
458static void
459ptn_kil(nir_builder *b, nir_ssa_def **src)
460{
461   /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
462   b->exact = true;
463   nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
464   b->exact = false;
465
466   nir_discard_if(b, cmp);
467}
468
469enum glsl_sampler_dim
470_mesa_texture_index_to_sampler_dim(gl_texture_index index, bool *is_array)
471{
472   *is_array = false;
473
474   switch (index) {
475   case TEXTURE_2D_MULTISAMPLE_INDEX:
476      return GLSL_SAMPLER_DIM_MS;
477   case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
478      *is_array = true;
479      return GLSL_SAMPLER_DIM_MS;
480   case TEXTURE_BUFFER_INDEX:
481      return GLSL_SAMPLER_DIM_BUF;
482   case TEXTURE_1D_INDEX:
483      return GLSL_SAMPLER_DIM_1D;
484   case TEXTURE_2D_INDEX:
485      return GLSL_SAMPLER_DIM_2D;
486   case TEXTURE_3D_INDEX:
487      return GLSL_SAMPLER_DIM_3D;
488   case TEXTURE_CUBE_INDEX:
489      return GLSL_SAMPLER_DIM_CUBE;
490   case TEXTURE_CUBE_ARRAY_INDEX:
491      *is_array = true;
492      return GLSL_SAMPLER_DIM_CUBE;
493   case TEXTURE_RECT_INDEX:
494      return GLSL_SAMPLER_DIM_RECT;
495   case TEXTURE_1D_ARRAY_INDEX:
496      *is_array = true;
497      return GLSL_SAMPLER_DIM_1D;
498   case TEXTURE_2D_ARRAY_INDEX:
499      *is_array = true;
500      return GLSL_SAMPLER_DIM_2D;
501   case TEXTURE_EXTERNAL_INDEX:
502      return GLSL_SAMPLER_DIM_EXTERNAL;
503   case NUM_TEXTURE_TARGETS:
504      break;
505   }
506   unreachable("unknown texture target");
507}
508
509static void
510ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
511        struct prog_instruction *prog_inst)
512{
513   nir_builder *b = &c->build;
514   nir_tex_instr *instr;
515   nir_texop op;
516   unsigned num_srcs;
517
518   switch (prog_inst->Opcode) {
519   case OPCODE_TEX:
520      op = nir_texop_tex;
521      num_srcs = 1;
522      break;
523   case OPCODE_TXB:
524      op = nir_texop_txb;
525      num_srcs = 2;
526      break;
527   case OPCODE_TXD:
528      op = nir_texop_txd;
529      num_srcs = 3;
530      break;
531   case OPCODE_TXL:
532      op = nir_texop_txl;
533      num_srcs = 2;
534      break;
535   case OPCODE_TXP:
536      op = nir_texop_tex;
537      num_srcs = 2;
538      break;
539   default:
540      fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
541      abort();
542   }
543
544   /* Deref sources */
545   num_srcs += 2;
546
547   if (prog_inst->TexShadow)
548      num_srcs++;
549
550   instr = nir_tex_instr_create(b->shader, num_srcs);
551   instr->op = op;
552   instr->dest_type = nir_type_float32;
553   instr->is_shadow = prog_inst->TexShadow;
554
555   bool is_array;
556   instr->sampler_dim = _mesa_texture_index_to_sampler_dim(prog_inst->TexSrcTarget, &is_array);
557
558   instr->coord_components =
559      glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
560
561   nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
562   if (!var) {
563      const struct glsl_type *type =
564         glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT);
565      char samplerName[20];
566      snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit);
567      var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName);
568      var->data.binding = prog_inst->TexSrcUnit;
569      var->data.explicit_binding = true;
570      c->sampler_vars[prog_inst->TexSrcUnit] = var;
571   }
572
573   nir_deref_instr *deref = nir_build_deref_var(b, var);
574
575   unsigned src_number = 0;
576
577   instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
578   instr->src[src_number].src_type = nir_tex_src_texture_deref;
579   src_number++;
580   instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
581   instr->src[src_number].src_type = nir_tex_src_sampler_deref;
582   src_number++;
583
584   instr->src[src_number].src =
585      nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
586                                  instr->coord_components));
587   instr->src[src_number].src_type = nir_tex_src_coord;
588   src_number++;
589
590   if (prog_inst->Opcode == OPCODE_TXP) {
591      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
592      instr->src[src_number].src_type = nir_tex_src_projector;
593      src_number++;
594   }
595
596   if (prog_inst->Opcode == OPCODE_TXB) {
597      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
598      instr->src[src_number].src_type = nir_tex_src_bias;
599      src_number++;
600   }
601
602   if (prog_inst->Opcode == OPCODE_TXL) {
603      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
604      instr->src[src_number].src_type = nir_tex_src_lod;
605      src_number++;
606   }
607
608   if (instr->is_shadow) {
609      if (instr->coord_components < 3)
610         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
611      else
612         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
613
614      instr->src[src_number].src_type = nir_tex_src_comparator;
615      src_number++;
616   }
617
618   assert(src_number == num_srcs);
619
620   nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
621   nir_builder_instr_insert(b, &instr->instr);
622
623   /* Resolve the writemask on the texture op. */
624   ptn_move_dest(b, dest, &instr->dest.ssa);
625}
626
627static const nir_op op_trans[MAX_OPCODE] = {
628   [OPCODE_NOP] = 0,
629   [OPCODE_ABS] = nir_op_fabs,
630   [OPCODE_ADD] = nir_op_fadd,
631   [OPCODE_ARL] = 0,
632   [OPCODE_CMP] = 0,
633   [OPCODE_COS] = 0,
634   [OPCODE_DDX] = nir_op_fddx,
635   [OPCODE_DDY] = nir_op_fddy,
636   [OPCODE_DP2] = 0,
637   [OPCODE_DP3] = 0,
638   [OPCODE_DP4] = 0,
639   [OPCODE_DPH] = 0,
640   [OPCODE_DST] = 0,
641   [OPCODE_END] = 0,
642   [OPCODE_EX2] = 0,
643   [OPCODE_EXP] = 0,
644   [OPCODE_FLR] = nir_op_ffloor,
645   [OPCODE_FRC] = nir_op_ffract,
646   [OPCODE_LG2] = 0,
647   [OPCODE_LIT] = 0,
648   [OPCODE_LOG] = 0,
649   [OPCODE_LRP] = 0,
650   [OPCODE_MAD] = 0,
651   [OPCODE_MAX] = nir_op_fmax,
652   [OPCODE_MIN] = nir_op_fmin,
653   [OPCODE_MOV] = nir_op_mov,
654   [OPCODE_MUL] = nir_op_fmul,
655   [OPCODE_POW] = 0,
656   [OPCODE_RCP] = 0,
657
658   [OPCODE_RSQ] = 0,
659   [OPCODE_SCS] = 0,
660   [OPCODE_SGE] = 0,
661   [OPCODE_SIN] = 0,
662   [OPCODE_SLT] = 0,
663   [OPCODE_SSG] = nir_op_fsign,
664   [OPCODE_SUB] = nir_op_fsub,
665   [OPCODE_SWZ] = 0,
666   [OPCODE_TEX] = 0,
667   [OPCODE_TRUNC] = nir_op_ftrunc,
668   [OPCODE_TXB] = 0,
669   [OPCODE_TXD] = 0,
670   [OPCODE_TXL] = 0,
671   [OPCODE_TXP] = 0,
672   [OPCODE_XPD] = 0,
673};
674
675static void
676ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
677{
678   nir_builder *b = &c->build;
679   unsigned i;
680   const unsigned op = prog_inst->Opcode;
681
682   if (op == OPCODE_END)
683      return;
684
685   nir_ssa_def *src[3];
686   for (i = 0; i < 3; i++) {
687      src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
688   }
689   nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
690   if (c->error)
691      return;
692
693   switch (op) {
694   case OPCODE_RSQ:
695      ptn_move_dest(b, dest,
696                    nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
697      break;
698
699   case OPCODE_RCP:
700      ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
701      break;
702
703   case OPCODE_EX2:
704      ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
705      break;
706
707   case OPCODE_LG2:
708      ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
709      break;
710
711   case OPCODE_POW:
712      ptn_move_dest(b, dest, nir_fpow(b,
713                                      ptn_channel(b, src[0], X),
714                                      ptn_channel(b, src[1], X)));
715      break;
716
717   case OPCODE_COS:
718      ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
719      break;
720
721   case OPCODE_SIN:
722      ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
723      break;
724
725   case OPCODE_ARL:
726      ptn_arl(b, dest, src);
727      break;
728
729   case OPCODE_EXP:
730      ptn_exp(b, dest, src);
731      break;
732
733   case OPCODE_LOG:
734      ptn_log(b, dest, src);
735      break;
736
737   case OPCODE_LRP:
738      ptn_lrp(b, dest, src);
739      break;
740
741   case OPCODE_MAD:
742      ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
743      break;
744
745   case OPCODE_DST:
746      ptn_dst(b, dest, src);
747      break;
748
749   case OPCODE_LIT:
750      ptn_lit(b, dest, src);
751      break;
752
753   case OPCODE_XPD:
754      ptn_xpd(b, dest, src);
755      break;
756
757   case OPCODE_DP2:
758      ptn_dp2(b, dest, src);
759      break;
760
761   case OPCODE_DP3:
762      ptn_dp3(b, dest, src);
763      break;
764
765   case OPCODE_DP4:
766      ptn_dp4(b, dest, src);
767      break;
768
769   case OPCODE_DPH:
770      ptn_dph(b, dest, src);
771      break;
772
773   case OPCODE_KIL:
774      ptn_kil(b, src);
775      break;
776
777   case OPCODE_CMP:
778      ptn_cmp(b, dest, src);
779      break;
780
781   case OPCODE_SCS:
782      ptn_scs(b, dest, src);
783      break;
784
785   case OPCODE_SLT:
786      ptn_slt(b, dest, src);
787      break;
788
789   case OPCODE_SGE:
790      ptn_sge(b, dest, src);
791      break;
792
793   case OPCODE_TEX:
794   case OPCODE_TXB:
795   case OPCODE_TXD:
796   case OPCODE_TXL:
797   case OPCODE_TXP:
798      ptn_tex(c, dest, src, prog_inst);
799      break;
800
801   case OPCODE_SWZ:
802      /* Extended swizzles were already handled in ptn_get_src(). */
803      ptn_alu(b, nir_op_mov, dest, src);
804      break;
805
806   case OPCODE_NOP:
807      break;
808
809   default:
810      if (op_trans[op] != 0) {
811         ptn_alu(b, op_trans[op], dest, src);
812      } else {
813         fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
814         abort();
815      }
816      break;
817   }
818
819   if (prog_inst->Saturate) {
820      assert(prog_inst->Saturate);
821      assert(!dest.dest.is_ssa);
822      ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
823   }
824}
825
826/**
827 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
828 * variables at the end of the shader.
829 *
830 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
831 * written, because there's no output load intrinsic, which means we couldn't
832 * handle writemasks.
833 */
834static void
835ptn_add_output_stores(struct ptn_compile *c)
836{
837   nir_builder *b = &c->build;
838
839   nir_foreach_shader_out_variable(var, b->shader) {
840      nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
841      if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
842          var->data.location == FRAG_RESULT_DEPTH) {
843         /* result.depth has this strange convention of being the .z component of
844          * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
845          * match GLSL's gl_FragDepth and the expectations of most backends.
846          */
847         src = nir_channel(b, src, 2);
848      }
849      if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
850          (var->data.location == VARYING_SLOT_FOGC ||
851           var->data.location == VARYING_SLOT_PSIZ)) {
852         /* result.{fogcoord,psiz} is a single component value */
853         src = nir_channel(b, src, 0);
854      }
855      unsigned num_components = glsl_get_vector_elements(var->type);
856      nir_store_var(b, var, src, (1 << num_components) - 1);
857   }
858}
859
860static void
861setup_registers_and_variables(struct ptn_compile *c)
862{
863   nir_builder *b = &c->build;
864   struct nir_shader *shader = b->shader;
865
866   /* Create input variables. */
867   uint64_t inputs_read = c->prog->info.inputs_read;
868   while (inputs_read) {
869      const int i = u_bit_scan64(&inputs_read);
870
871      if (c->ctx->Const.GLSLFragCoordIsSysVal &&
872          shader->info.stage == MESA_SHADER_FRAGMENT &&
873          i == VARYING_SLOT_POS) {
874         nir_variable *var = nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
875                                                 "frag_coord");
876         var->data.location = SYSTEM_VALUE_FRAG_COORD;
877         c->input_vars[i] = var;
878         continue;
879      }
880
881      nir_variable *var =
882         nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
883                             ralloc_asprintf(shader, "in_%d", i));
884      var->data.location = i;
885      var->data.index = 0;
886
887      if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
888         if (i == VARYING_SLOT_FOGC) {
889            /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
890             * input variable a float, and create a local containing the
891             * full vec4 value.
892             */
893            var->type = glsl_float_type();
894
895            nir_variable *fullvar =
896               nir_local_variable_create(b->impl, glsl_vec4_type(),
897                                         "fogcoord_tmp");
898
899            nir_store_var(b, fullvar,
900                          nir_vec4(b, nir_load_var(b, var),
901                                   nir_imm_float(b, 0.0),
902                                   nir_imm_float(b, 0.0),
903                                   nir_imm_float(b, 1.0)),
904                          WRITEMASK_XYZW);
905
906            /* We inserted the real input into the list so the driver has real
907             * inputs, but we set c->input_vars[i] to the temporary so we use
908             * the splatted value.
909             */
910            c->input_vars[i] = fullvar;
911            continue;
912         }
913      }
914
915      c->input_vars[i] = var;
916   }
917
918   /* Create system value variables */
919   int i;
920   BITSET_FOREACH_SET(i, c->prog->info.system_values_read, SYSTEM_VALUE_MAX) {
921      nir_variable *var =
922         nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
923                             ralloc_asprintf(shader, "sv_%d", i));
924      var->data.location = i;
925      var->data.index = 0;
926
927      c->sysval_vars[i] = var;
928   }
929
930   /* Create output registers and variables. */
931   int max_outputs = util_last_bit64(c->prog->info.outputs_written);
932   c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
933
934   uint64_t outputs_written = c->prog->info.outputs_written;
935   while (outputs_written) {
936      const int i = u_bit_scan64(&outputs_written);
937
938      /* Since we can't load from outputs in the IR, we make temporaries
939       * for the outputs and emit stores to the real outputs at the end of
940       * the shader.
941       */
942      nir_register *reg = nir_local_reg_create(b->impl);
943      reg->num_components = 4;
944
945      const struct glsl_type *type;
946      if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
947          (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC) ||
948          (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_PSIZ))
949         type = glsl_float_type();
950      else
951         type = glsl_vec4_type();
952
953      nir_variable *var =
954         nir_variable_create(shader, nir_var_shader_out, type,
955                             ralloc_asprintf(shader, "out_%d", i));
956      var->data.location = i;
957      var->data.index = 0;
958
959      c->output_regs[i] = reg;
960      c->output_vars[i] = var;
961   }
962
963   /* Create temporary registers. */
964   c->temp_regs = rzalloc_array(c, nir_register *,
965                                c->prog->arb.NumTemporaries);
966
967   nir_register *reg;
968   for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
969      reg = nir_local_reg_create(b->impl);
970      if (!reg) {
971         c->error = true;
972         return;
973      }
974      reg->num_components = 4;
975      c->temp_regs[i] = reg;
976   }
977
978   /* Create the address register (for ARB_vertex_program). */
979   reg = nir_local_reg_create(b->impl);
980   if (!reg) {
981      c->error = true;
982      return;
983   }
984   reg->num_components = 1;
985   c->addr_reg = reg;
986}
987
988struct nir_shader *
989prog_to_nir(const struct gl_context *ctx, const struct gl_program *prog,
990            const nir_shader_compiler_options *options)
991{
992   struct ptn_compile *c;
993   struct nir_shader *s;
994   gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
995
996   c = rzalloc(NULL, struct ptn_compile);
997   if (!c)
998      return NULL;
999   c->prog = prog;
1000   c->ctx = ctx;
1001
1002   c->build = nir_builder_init_simple_shader(stage, options, NULL);
1003
1004   /* Copy the shader_info from the gl_program */
1005   c->build.shader->info = prog->info;
1006
1007   s = c->build.shader;
1008
1009   if (prog->Parameters->NumParameters > 0) {
1010      const struct glsl_type *type =
1011         glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
1012      c->parameters =
1013         nir_variable_create(s, nir_var_uniform, type,
1014                             prog->Parameters->Parameters[0].Name);
1015   }
1016
1017   setup_registers_and_variables(c);
1018   if (unlikely(c->error))
1019      goto fail;
1020
1021   for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1022      ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1023
1024      if (unlikely(c->error))
1025         break;
1026   }
1027
1028   ptn_add_output_stores(c);
1029
1030   s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1031   s->info.num_textures = util_last_bit(prog->SamplersUsed);
1032   s->info.num_ubos = 0;
1033   s->info.num_abos = 0;
1034   s->info.num_ssbos = 0;
1035   s->info.num_images = 0;
1036   s->info.uses_texture_gather = false;
1037   s->info.clip_distance_array_size = 0;
1038   s->info.cull_distance_array_size = 0;
1039   s->info.separate_shader = false;
1040   s->info.io_lowered = false;
1041   s->info.internal = false;
1042
1043fail:
1044   if (c->error) {
1045      ralloc_free(s);
1046      s = NULL;
1047   }
1048   ralloc_free(c);
1049   return s;
1050}
1051