1/*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25#include "compiler/nir/nir.h"
26
27#include "util/u_debug.h"
28#include "util/u_prim.h"
29
30#include "nv50_ir.h"
31#include "nv50_ir_from_common.h"
32#include "nv50_ir_lowering_helper.h"
33#include "nv50_ir_target.h"
34#include "nv50_ir_util.h"
35#include "tgsi/tgsi_from_mesa.h"
36
37#include <unordered_map>
38#include <cstring>
39#include <list>
40#include <vector>
41
42namespace {
43
44using namespace nv50_ir;
45
46int
47type_size(const struct glsl_type *type, bool bindless)
48{
49   return glsl_count_attribute_slots(type, false);
50}
51
52static void
53function_temp_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
54{
55   assert(glsl_type_is_vector_or_scalar(type));
56
57   if (glsl_type_is_scalar(type)) {
58      glsl_get_natural_size_align_bytes(type, size, align);
59   } else {
60      unsigned comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
61      unsigned length = glsl_get_vector_elements(type);
62
63      *size = comp_size * length;
64      *align = 0x10;
65   }
66}
67
68class Converter : public ConverterCommon
69{
70public:
71   Converter(Program *, nir_shader *, nv50_ir_prog_info *, nv50_ir_prog_info_out *);
72
73   bool run();
74private:
75   typedef std::vector<LValue*> LValues;
76   typedef std::unordered_map<unsigned, LValues> NirDefMap;
77   typedef std::unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
78   typedef std::unordered_map<unsigned, BasicBlock*> NirBlockMap;
79
80   CacheMode convert(enum gl_access_qualifier);
81   TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
82   LValues& convert(nir_alu_dest *);
83   BasicBlock* convert(nir_block *);
84   LValues& convert(nir_dest *);
85   SVSemantic convert(nir_intrinsic_op);
86   Value* convert(nir_load_const_instr*, uint8_t);
87   LValues& convert(nir_register *);
88   LValues& convert(nir_ssa_def *);
89
90   Value* getSrc(nir_alu_src *, uint8_t component = 0);
91   Value* getSrc(nir_register *, uint8_t);
92   Value* getSrc(nir_src *, uint8_t, bool indirect = false);
93   Value* getSrc(nir_ssa_def *, uint8_t);
94
95   // returned value is the constant part of the given source (either the
96   // nir_src or the selected source component of an intrinsic). Even though
97   // this is mostly an optimization to be able to skip indirects in a few
98   // cases, sometimes we require immediate values or set some fileds on
99   // instructions (e.g. tex) in order for codegen to consume those.
100   // If the found value has not a constant part, the Value gets returned
101   // through the Value parameter.
102   uint32_t getIndirect(nir_src *, uint8_t, Value *&);
103   // isScalar indicates that the addressing is scalar, vec4 addressing is
104   // assumed otherwise
105   uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
106                        bool isScalar = false);
107
108   uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
109
110   void setInterpolate(nv50_ir_varying *,
111                       uint8_t,
112                       bool centroid,
113                       unsigned semantics);
114
115   Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
116                         uint8_t c, Value *indirect0 = NULL,
117                         Value *indirect1 = NULL, bool patch = false);
118   void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
119                Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
120                Value *indirect1 = NULL);
121
122   bool isFloatType(nir_alu_type);
123   bool isSignedType(nir_alu_type);
124   bool isResultFloat(nir_op);
125   bool isResultSigned(nir_op);
126
127   DataType getDType(nir_alu_instr *);
128   DataType getDType(nir_intrinsic_instr *);
129   DataType getDType(nir_op, uint8_t);
130
131   DataFile getFile(nir_intrinsic_op);
132
133   std::vector<DataType> getSTypes(nir_alu_instr *);
134   DataType getSType(nir_src &, bool isFloat, bool isSigned);
135
136   operation getOperation(nir_intrinsic_op);
137   operation getOperation(nir_op);
138   operation getOperation(nir_texop);
139   operation preOperationNeeded(nir_op);
140
141   int getSubOp(nir_intrinsic_op);
142   int getSubOp(nir_op);
143
144   CondCode getCondCode(nir_op);
145
146   bool assignSlots();
147   bool parseNIR();
148
149   bool visit(nir_alu_instr *);
150   bool visit(nir_block *);
151   bool visit(nir_cf_node *);
152   bool visit(nir_function *);
153   bool visit(nir_if *);
154   bool visit(nir_instr *);
155   bool visit(nir_intrinsic_instr *);
156   bool visit(nir_jump_instr *);
157   bool visit(nir_load_const_instr*);
158   bool visit(nir_loop *);
159   bool visit(nir_ssa_undef_instr *);
160   bool visit(nir_tex_instr *);
161
162   // tex stuff
163   unsigned int getNIRArgCount(TexInstruction::Target&);
164
165   nir_shader *nir;
166
167   NirDefMap ssaDefs;
168   NirDefMap regDefs;
169   ImmediateMap immediates;
170   NirBlockMap blocks;
171   unsigned int curLoopDepth;
172   unsigned int curIfDepth;
173
174   BasicBlock *exit;
175   Value *zero;
176   Instruction *immInsertPos;
177
178   int clipVertexOutput;
179
180   union {
181      struct {
182         Value *position;
183      } fp;
184   };
185};
186
187Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info,
188                     nv50_ir_prog_info_out *info_out)
189   : ConverterCommon(prog, info, info_out),
190     nir(nir),
191     curLoopDepth(0),
192     curIfDepth(0),
193     exit(NULL),
194     immInsertPos(NULL),
195     clipVertexOutput(-1)
196{
197   zero = mkImm((uint32_t)0);
198}
199
200BasicBlock *
201Converter::convert(nir_block *block)
202{
203   NirBlockMap::iterator it = blocks.find(block->index);
204   if (it != blocks.end())
205      return it->second;
206
207   BasicBlock *bb = new BasicBlock(func);
208   blocks[block->index] = bb;
209   return bb;
210}
211
212bool
213Converter::isFloatType(nir_alu_type type)
214{
215   return nir_alu_type_get_base_type(type) == nir_type_float;
216}
217
218bool
219Converter::isSignedType(nir_alu_type type)
220{
221   return nir_alu_type_get_base_type(type) == nir_type_int;
222}
223
224bool
225Converter::isResultFloat(nir_op op)
226{
227   const nir_op_info &info = nir_op_infos[op];
228   if (info.output_type != nir_type_invalid)
229      return isFloatType(info.output_type);
230
231   ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
232   assert(false);
233   return true;
234}
235
236bool
237Converter::isResultSigned(nir_op op)
238{
239   switch (op) {
240   // there is no umul and we get wrong results if we treat all muls as signed
241   case nir_op_imul:
242   case nir_op_inot:
243      return false;
244   default:
245      const nir_op_info &info = nir_op_infos[op];
246      if (info.output_type != nir_type_invalid)
247         return isSignedType(info.output_type);
248      ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
249      assert(false);
250      return true;
251   }
252}
253
254DataType
255Converter::getDType(nir_alu_instr *insn)
256{
257   if (insn->dest.dest.is_ssa)
258      return getDType(insn->op, insn->dest.dest.ssa.bit_size);
259   else
260      return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
261}
262
263DataType
264Converter::getDType(nir_intrinsic_instr *insn)
265{
266   bool isFloat, isSigned;
267   switch (insn->intrinsic) {
268   case nir_intrinsic_bindless_image_atomic_fadd:
269   case nir_intrinsic_global_atomic_fadd:
270   case nir_intrinsic_image_atomic_fadd:
271   case nir_intrinsic_shared_atomic_fadd:
272   case nir_intrinsic_ssbo_atomic_fadd:
273      isFloat = true;
274      isSigned = false;
275      break;
276   case nir_intrinsic_shared_atomic_imax:
277   case nir_intrinsic_shared_atomic_imin:
278   case nir_intrinsic_ssbo_atomic_imax:
279   case nir_intrinsic_ssbo_atomic_imin:
280      isFloat = false;
281      isSigned = true;
282      break;
283   default:
284      isFloat = false;
285      isSigned = false;
286      break;
287   }
288
289   if (insn->dest.is_ssa)
290      return typeOfSize(insn->dest.ssa.bit_size / 8, isFloat, isSigned);
291   else
292      return typeOfSize(insn->dest.reg.reg->bit_size / 8, isFloat, isSigned);
293}
294
295DataType
296Converter::getDType(nir_op op, uint8_t bitSize)
297{
298   DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
299   if (ty == TYPE_NONE) {
300      ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
301      assert(false);
302   }
303   return ty;
304}
305
306std::vector<DataType>
307Converter::getSTypes(nir_alu_instr *insn)
308{
309   const nir_op_info &info = nir_op_infos[insn->op];
310   std::vector<DataType> res(info.num_inputs);
311
312   for (uint8_t i = 0; i < info.num_inputs; ++i) {
313      if (info.input_types[i] != nir_type_invalid) {
314         res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
315      } else {
316         ERROR("getSType not implemented for %s idx %u\n", info.name, i);
317         assert(false);
318         res[i] = TYPE_NONE;
319         break;
320      }
321   }
322
323   return res;
324}
325
326DataType
327Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
328{
329   uint8_t bitSize;
330   if (src.is_ssa)
331      bitSize = src.ssa->bit_size;
332   else
333      bitSize = src.reg.reg->bit_size;
334
335   DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
336   if (ty == TYPE_NONE) {
337      const char *str;
338      if (isFloat)
339         str = "float";
340      else if (isSigned)
341         str = "int";
342      else
343         str = "uint";
344      ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
345      assert(false);
346   }
347   return ty;
348}
349
350DataFile
351Converter::getFile(nir_intrinsic_op op)
352{
353   switch (op) {
354   case nir_intrinsic_load_global:
355   case nir_intrinsic_store_global:
356   case nir_intrinsic_load_global_constant:
357      return FILE_MEMORY_GLOBAL;
358   case nir_intrinsic_load_scratch:
359   case nir_intrinsic_store_scratch:
360      return FILE_MEMORY_LOCAL;
361   case nir_intrinsic_load_shared:
362   case nir_intrinsic_store_shared:
363      return FILE_MEMORY_SHARED;
364   case nir_intrinsic_load_kernel_input:
365      return FILE_SHADER_INPUT;
366   default:
367      ERROR("couldn't get DateFile for op %s\n", nir_intrinsic_infos[op].name);
368      assert(false);
369   }
370   return FILE_NULL;
371}
372
373operation
374Converter::getOperation(nir_op op)
375{
376   switch (op) {
377   // basic ops with float and int variants
378   case nir_op_fabs:
379   case nir_op_iabs:
380      return OP_ABS;
381   case nir_op_fadd:
382   case nir_op_iadd:
383      return OP_ADD;
384   case nir_op_iand:
385      return OP_AND;
386   case nir_op_ifind_msb:
387   case nir_op_ufind_msb:
388      return OP_BFIND;
389   case nir_op_fceil:
390      return OP_CEIL;
391   case nir_op_fcos:
392      return OP_COS;
393   case nir_op_f2f32:
394   case nir_op_f2f64:
395   case nir_op_f2i32:
396   case nir_op_f2i64:
397   case nir_op_f2u32:
398   case nir_op_f2u64:
399   case nir_op_i2f32:
400   case nir_op_i2f64:
401   case nir_op_i2i32:
402   case nir_op_i2i64:
403   case nir_op_u2f32:
404   case nir_op_u2f64:
405   case nir_op_u2u32:
406   case nir_op_u2u64:
407      return OP_CVT;
408   case nir_op_fddx:
409   case nir_op_fddx_coarse:
410   case nir_op_fddx_fine:
411      return OP_DFDX;
412   case nir_op_fddy:
413   case nir_op_fddy_coarse:
414   case nir_op_fddy_fine:
415      return OP_DFDY;
416   case nir_op_fdiv:
417   case nir_op_idiv:
418   case nir_op_udiv:
419      return OP_DIV;
420   case nir_op_fexp2:
421      return OP_EX2;
422   case nir_op_ffloor:
423      return OP_FLOOR;
424   case nir_op_ffma:
425      /* No FMA op pre-nvc0 */
426      if (info->target < 0xc0)
427         return OP_MAD;
428      return OP_FMA;
429   case nir_op_flog2:
430      return OP_LG2;
431   case nir_op_fmax:
432   case nir_op_imax:
433   case nir_op_umax:
434      return OP_MAX;
435   case nir_op_pack_64_2x32_split:
436      return OP_MERGE;
437   case nir_op_fmin:
438   case nir_op_imin:
439   case nir_op_umin:
440      return OP_MIN;
441   case nir_op_fmod:
442   case nir_op_imod:
443   case nir_op_umod:
444   case nir_op_frem:
445   case nir_op_irem:
446      return OP_MOD;
447   case nir_op_fmul:
448   case nir_op_imul:
449   case nir_op_imul_high:
450   case nir_op_umul_high:
451      return OP_MUL;
452   case nir_op_fneg:
453   case nir_op_ineg:
454      return OP_NEG;
455   case nir_op_inot:
456      return OP_NOT;
457   case nir_op_ior:
458      return OP_OR;
459   case nir_op_fpow:
460      return OP_POW;
461   case nir_op_frcp:
462      return OP_RCP;
463   case nir_op_frsq:
464      return OP_RSQ;
465   case nir_op_fsat:
466      return OP_SAT;
467   case nir_op_feq32:
468   case nir_op_ieq32:
469   case nir_op_fge32:
470   case nir_op_ige32:
471   case nir_op_uge32:
472   case nir_op_flt32:
473   case nir_op_ilt32:
474   case nir_op_ult32:
475   case nir_op_fneu32:
476   case nir_op_ine32:
477      return OP_SET;
478   case nir_op_ishl:
479      return OP_SHL;
480   case nir_op_ishr:
481   case nir_op_ushr:
482      return OP_SHR;
483   case nir_op_fsin:
484      return OP_SIN;
485   case nir_op_fsqrt:
486      return OP_SQRT;
487   case nir_op_ftrunc:
488      return OP_TRUNC;
489   case nir_op_ixor:
490      return OP_XOR;
491   default:
492      ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
493      assert(false);
494      return OP_NOP;
495   }
496}
497
498operation
499Converter::getOperation(nir_texop op)
500{
501   switch (op) {
502   case nir_texop_tex:
503      return OP_TEX;
504   case nir_texop_lod:
505      return OP_TXLQ;
506   case nir_texop_txb:
507      return OP_TXB;
508   case nir_texop_txd:
509      return OP_TXD;
510   case nir_texop_txf:
511   case nir_texop_txf_ms:
512      return OP_TXF;
513   case nir_texop_tg4:
514      return OP_TXG;
515   case nir_texop_txl:
516      return OP_TXL;
517   case nir_texop_query_levels:
518   case nir_texop_texture_samples:
519   case nir_texop_txs:
520      return OP_TXQ;
521   default:
522      ERROR("couldn't get operation for nir_texop %u\n", op);
523      assert(false);
524      return OP_NOP;
525   }
526}
527
528operation
529Converter::getOperation(nir_intrinsic_op op)
530{
531   switch (op) {
532   case nir_intrinsic_emit_vertex:
533      return OP_EMIT;
534   case nir_intrinsic_end_primitive:
535      return OP_RESTART;
536   case nir_intrinsic_bindless_image_atomic_add:
537   case nir_intrinsic_image_atomic_add:
538   case nir_intrinsic_bindless_image_atomic_and:
539   case nir_intrinsic_image_atomic_and:
540   case nir_intrinsic_bindless_image_atomic_comp_swap:
541   case nir_intrinsic_image_atomic_comp_swap:
542   case nir_intrinsic_bindless_image_atomic_exchange:
543   case nir_intrinsic_image_atomic_exchange:
544   case nir_intrinsic_bindless_image_atomic_imax:
545   case nir_intrinsic_image_atomic_imax:
546   case nir_intrinsic_bindless_image_atomic_umax:
547   case nir_intrinsic_image_atomic_umax:
548   case nir_intrinsic_bindless_image_atomic_imin:
549   case nir_intrinsic_image_atomic_imin:
550   case nir_intrinsic_bindless_image_atomic_umin:
551   case nir_intrinsic_image_atomic_umin:
552   case nir_intrinsic_bindless_image_atomic_or:
553   case nir_intrinsic_image_atomic_or:
554   case nir_intrinsic_bindless_image_atomic_xor:
555   case nir_intrinsic_image_atomic_xor:
556   case nir_intrinsic_bindless_image_atomic_inc_wrap:
557   case nir_intrinsic_image_atomic_inc_wrap:
558   case nir_intrinsic_bindless_image_atomic_dec_wrap:
559   case nir_intrinsic_image_atomic_dec_wrap:
560      return OP_SUREDP;
561   case nir_intrinsic_bindless_image_load:
562   case nir_intrinsic_image_load:
563      return OP_SULDP;
564   case nir_intrinsic_bindless_image_samples:
565   case nir_intrinsic_image_samples:
566   case nir_intrinsic_bindless_image_size:
567   case nir_intrinsic_image_size:
568      return OP_SUQ;
569   case nir_intrinsic_bindless_image_store:
570   case nir_intrinsic_image_store:
571      return OP_SUSTP;
572   default:
573      ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
574      assert(false);
575      return OP_NOP;
576   }
577}
578
579operation
580Converter::preOperationNeeded(nir_op op)
581{
582   switch (op) {
583   case nir_op_fcos:
584   case nir_op_fsin:
585      return OP_PRESIN;
586   default:
587      return OP_NOP;
588   }
589}
590
591int
592Converter::getSubOp(nir_op op)
593{
594   switch (op) {
595   case nir_op_imul_high:
596   case nir_op_umul_high:
597      return NV50_IR_SUBOP_MUL_HIGH;
598   case nir_op_ishl:
599   case nir_op_ishr:
600   case nir_op_ushr:
601      return NV50_IR_SUBOP_SHIFT_WRAP;
602   default:
603      return 0;
604   }
605}
606
607int
608Converter::getSubOp(nir_intrinsic_op op)
609{
610   switch (op) {
611   case nir_intrinsic_bindless_image_atomic_add:
612   case nir_intrinsic_global_atomic_add:
613   case nir_intrinsic_image_atomic_add:
614   case nir_intrinsic_shared_atomic_add:
615   case nir_intrinsic_ssbo_atomic_add:
616      return  NV50_IR_SUBOP_ATOM_ADD;
617   case nir_intrinsic_bindless_image_atomic_fadd:
618   case nir_intrinsic_global_atomic_fadd:
619   case nir_intrinsic_image_atomic_fadd:
620   case nir_intrinsic_shared_atomic_fadd:
621   case nir_intrinsic_ssbo_atomic_fadd:
622      return  NV50_IR_SUBOP_ATOM_ADD;
623   case nir_intrinsic_bindless_image_atomic_and:
624   case nir_intrinsic_global_atomic_and:
625   case nir_intrinsic_image_atomic_and:
626   case nir_intrinsic_shared_atomic_and:
627   case nir_intrinsic_ssbo_atomic_and:
628      return  NV50_IR_SUBOP_ATOM_AND;
629   case nir_intrinsic_bindless_image_atomic_comp_swap:
630   case nir_intrinsic_global_atomic_comp_swap:
631   case nir_intrinsic_image_atomic_comp_swap:
632   case nir_intrinsic_shared_atomic_comp_swap:
633   case nir_intrinsic_ssbo_atomic_comp_swap:
634      return  NV50_IR_SUBOP_ATOM_CAS;
635   case nir_intrinsic_bindless_image_atomic_exchange:
636   case nir_intrinsic_global_atomic_exchange:
637   case nir_intrinsic_image_atomic_exchange:
638   case nir_intrinsic_shared_atomic_exchange:
639   case nir_intrinsic_ssbo_atomic_exchange:
640      return  NV50_IR_SUBOP_ATOM_EXCH;
641   case nir_intrinsic_bindless_image_atomic_or:
642   case nir_intrinsic_global_atomic_or:
643   case nir_intrinsic_image_atomic_or:
644   case nir_intrinsic_shared_atomic_or:
645   case nir_intrinsic_ssbo_atomic_or:
646      return  NV50_IR_SUBOP_ATOM_OR;
647   case nir_intrinsic_bindless_image_atomic_imax:
648   case nir_intrinsic_bindless_image_atomic_umax:
649   case nir_intrinsic_global_atomic_imax:
650   case nir_intrinsic_global_atomic_umax:
651   case nir_intrinsic_image_atomic_imax:
652   case nir_intrinsic_image_atomic_umax:
653   case nir_intrinsic_shared_atomic_imax:
654   case nir_intrinsic_shared_atomic_umax:
655   case nir_intrinsic_ssbo_atomic_imax:
656   case nir_intrinsic_ssbo_atomic_umax:
657      return  NV50_IR_SUBOP_ATOM_MAX;
658   case nir_intrinsic_bindless_image_atomic_imin:
659   case nir_intrinsic_bindless_image_atomic_umin:
660   case nir_intrinsic_global_atomic_imin:
661   case nir_intrinsic_global_atomic_umin:
662   case nir_intrinsic_image_atomic_imin:
663   case nir_intrinsic_image_atomic_umin:
664   case nir_intrinsic_shared_atomic_imin:
665   case nir_intrinsic_shared_atomic_umin:
666   case nir_intrinsic_ssbo_atomic_imin:
667   case nir_intrinsic_ssbo_atomic_umin:
668      return  NV50_IR_SUBOP_ATOM_MIN;
669   case nir_intrinsic_bindless_image_atomic_xor:
670   case nir_intrinsic_global_atomic_xor:
671   case nir_intrinsic_image_atomic_xor:
672   case nir_intrinsic_shared_atomic_xor:
673   case nir_intrinsic_ssbo_atomic_xor:
674      return  NV50_IR_SUBOP_ATOM_XOR;
675   case nir_intrinsic_bindless_image_atomic_inc_wrap:
676   case nir_intrinsic_image_atomic_inc_wrap:
677      return NV50_IR_SUBOP_ATOM_INC;
678   case nir_intrinsic_bindless_image_atomic_dec_wrap:
679   case nir_intrinsic_image_atomic_dec_wrap:
680      return NV50_IR_SUBOP_ATOM_DEC;
681
682   case nir_intrinsic_group_memory_barrier:
683   case nir_intrinsic_memory_barrier:
684   case nir_intrinsic_memory_barrier_buffer:
685   case nir_intrinsic_memory_barrier_image:
686      return NV50_IR_SUBOP_MEMBAR(M, GL);
687   case nir_intrinsic_memory_barrier_shared:
688      return NV50_IR_SUBOP_MEMBAR(M, CTA);
689
690   case nir_intrinsic_vote_all:
691      return NV50_IR_SUBOP_VOTE_ALL;
692   case nir_intrinsic_vote_any:
693      return NV50_IR_SUBOP_VOTE_ANY;
694   case nir_intrinsic_vote_ieq:
695      return NV50_IR_SUBOP_VOTE_UNI;
696   default:
697      return 0;
698   }
699}
700
701CondCode
702Converter::getCondCode(nir_op op)
703{
704   switch (op) {
705   case nir_op_feq32:
706   case nir_op_ieq32:
707      return CC_EQ;
708   case nir_op_fge32:
709   case nir_op_ige32:
710   case nir_op_uge32:
711      return CC_GE;
712   case nir_op_flt32:
713   case nir_op_ilt32:
714   case nir_op_ult32:
715      return CC_LT;
716   case nir_op_fneu32:
717      return CC_NEU;
718   case nir_op_ine32:
719      return CC_NE;
720   default:
721      ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
722      assert(false);
723      return CC_FL;
724   }
725}
726
727Converter::LValues&
728Converter::convert(nir_alu_dest *dest)
729{
730   return convert(&dest->dest);
731}
732
733Converter::LValues&
734Converter::convert(nir_dest *dest)
735{
736   if (dest->is_ssa)
737      return convert(&dest->ssa);
738   if (dest->reg.indirect) {
739      ERROR("no support for indirects.");
740      assert(false);
741   }
742   return convert(dest->reg.reg);
743}
744
745Converter::LValues&
746Converter::convert(nir_register *reg)
747{
748   assert(!reg->num_array_elems);
749
750   NirDefMap::iterator it = regDefs.find(reg->index);
751   if (it != regDefs.end())
752      return it->second;
753
754   LValues newDef(reg->num_components);
755   for (uint8_t i = 0; i < reg->num_components; i++)
756      newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
757   return regDefs[reg->index] = newDef;
758}
759
760Converter::LValues&
761Converter::convert(nir_ssa_def *def)
762{
763   NirDefMap::iterator it = ssaDefs.find(def->index);
764   if (it != ssaDefs.end())
765      return it->second;
766
767   LValues newDef(def->num_components);
768   for (uint8_t i = 0; i < def->num_components; i++)
769      newDef[i] = getSSA(std::max(4, def->bit_size / 8));
770   return ssaDefs[def->index] = newDef;
771}
772
773Value*
774Converter::getSrc(nir_alu_src *src, uint8_t component)
775{
776   if (src->abs || src->negate) {
777      ERROR("modifiers currently not supported on nir_alu_src\n");
778      assert(false);
779   }
780   return getSrc(&src->src, src->swizzle[component]);
781}
782
783Value*
784Converter::getSrc(nir_register *reg, uint8_t idx)
785{
786   NirDefMap::iterator it = regDefs.find(reg->index);
787   if (it == regDefs.end())
788      return convert(reg)[idx];
789   return it->second[idx];
790}
791
792Value*
793Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
794{
795   if (src->is_ssa)
796      return getSrc(src->ssa, idx);
797
798   if (src->reg.indirect) {
799      if (indirect)
800         return getSrc(src->reg.indirect, idx);
801      ERROR("no support for indirects.");
802      assert(false);
803      return NULL;
804   }
805
806   return getSrc(src->reg.reg, idx);
807}
808
809Value*
810Converter::getSrc(nir_ssa_def *src, uint8_t idx)
811{
812   ImmediateMap::iterator iit = immediates.find(src->index);
813   if (iit != immediates.end())
814      return convert((*iit).second, idx);
815
816   NirDefMap::iterator it = ssaDefs.find(src->index);
817   if (it == ssaDefs.end()) {
818      ERROR("SSA value %u not found\n", src->index);
819      assert(false);
820      return NULL;
821   }
822   return it->second[idx];
823}
824
825uint32_t
826Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
827{
828   nir_const_value *offset = nir_src_as_const_value(*src);
829
830   if (offset) {
831      indirect = NULL;
832      return offset[0].u32;
833   }
834
835   indirect = getSrc(src, idx, true);
836   return 0;
837}
838
839uint32_t
840Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
841{
842   int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
843
844   if (indirect && !isScalar)
845      indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
846   return idx;
847}
848
849static void
850vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
851{
852   assert(name && index);
853
854   if (slot >= VERT_ATTRIB_MAX) {
855      ERROR("invalid varying slot %u\n", slot);
856      assert(false);
857      return;
858   }
859
860   if (slot >= VERT_ATTRIB_GENERIC0 &&
861       slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
862      *name = TGSI_SEMANTIC_GENERIC;
863      *index = slot - VERT_ATTRIB_GENERIC0;
864      return;
865   }
866
867   if (slot >= VERT_ATTRIB_TEX0 &&
868       slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
869      *name = TGSI_SEMANTIC_TEXCOORD;
870      *index = slot - VERT_ATTRIB_TEX0;
871      return;
872   }
873
874   switch (slot) {
875   case VERT_ATTRIB_COLOR0:
876      *name = TGSI_SEMANTIC_COLOR;
877      *index = 0;
878      break;
879   case VERT_ATTRIB_COLOR1:
880      *name = TGSI_SEMANTIC_COLOR;
881      *index = 1;
882      break;
883   case VERT_ATTRIB_EDGEFLAG:
884      *name = TGSI_SEMANTIC_EDGEFLAG;
885      *index = 0;
886      break;
887   case VERT_ATTRIB_FOG:
888      *name = TGSI_SEMANTIC_FOG;
889      *index = 0;
890      break;
891   case VERT_ATTRIB_NORMAL:
892      *name = TGSI_SEMANTIC_NORMAL;
893      *index = 0;
894      break;
895   case VERT_ATTRIB_POS:
896      *name = TGSI_SEMANTIC_POSITION;
897      *index = 0;
898      break;
899   case VERT_ATTRIB_POINT_SIZE:
900      *name = TGSI_SEMANTIC_PSIZE;
901      *index = 0;
902      break;
903   default:
904      ERROR("unknown vert attrib slot %u\n", slot);
905      assert(false);
906      break;
907   }
908}
909
910void
911Converter::setInterpolate(nv50_ir_varying *var,
912                          uint8_t mode,
913                          bool centroid,
914                          unsigned semantic)
915{
916   switch (mode) {
917   case INTERP_MODE_FLAT:
918      var->flat = 1;
919      break;
920   case INTERP_MODE_NONE:
921      if (semantic == TGSI_SEMANTIC_COLOR)
922         var->sc = 1;
923      else if (semantic == TGSI_SEMANTIC_POSITION)
924         var->linear = 1;
925      break;
926   case INTERP_MODE_NOPERSPECTIVE:
927      var->linear = 1;
928      break;
929   case INTERP_MODE_SMOOTH:
930      break;
931   }
932   var->centroid = centroid;
933}
934
935static uint16_t
936calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
937          bool input, const nir_variable *var)
938{
939   if (!type->is_array())
940      return type->count_attribute_slots(false);
941
942   uint16_t slots;
943   switch (stage) {
944   case Program::TYPE_GEOMETRY:
945      slots = type->count_attribute_slots(false);
946      if (input)
947         slots /= info.gs.vertices_in;
948      break;
949   case Program::TYPE_TESSELLATION_CONTROL:
950   case Program::TYPE_TESSELLATION_EVAL:
951      // remove first dimension
952      if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
953         slots = type->count_attribute_slots(false);
954      else
955         slots = type->fields.array->count_attribute_slots(false);
956      break;
957   default:
958      slots = type->count_attribute_slots(false);
959      break;
960   }
961
962   return slots;
963}
964
965static uint8_t
966getMaskForType(const glsl_type *type, uint8_t slot) {
967   uint16_t comp = type->without_array()->components();
968   comp = comp ? comp : 4;
969
970   if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
971      comp *= 2;
972      if (comp > 4) {
973         if (slot % 2)
974            comp -= 4;
975         else
976            comp = 4;
977      }
978   }
979
980   return (1 << comp) - 1;
981}
982
983bool Converter::assignSlots() {
984   unsigned name;
985   unsigned index;
986
987   info->io.viewportId = -1;
988   info->io.mul_zero_wins = nir->info.use_legacy_math_rules;
989   info_out->numInputs = 0;
990   info_out->numOutputs = 0;
991   info_out->numSysVals = 0;
992
993   uint8_t i;
994   BITSET_FOREACH_SET(i, nir->info.system_values_read, SYSTEM_VALUE_MAX) {
995      info_out->sv[info_out->numSysVals].sn = tgsi_get_sysval_semantic(i);
996      info_out->sv[info_out->numSysVals].si = 0;
997      info_out->sv[info_out->numSysVals].input = 0;
998
999      switch (i) {
1000      case SYSTEM_VALUE_VERTEX_ID:
1001         info_out->sv[info_out->numSysVals].input = 1;
1002         info_out->io.vertexId = info_out->numSysVals;
1003         break;
1004      case SYSTEM_VALUE_INSTANCE_ID:
1005         info_out->sv[info_out->numSysVals].input = 1;
1006         info_out->io.instanceId = info_out->numSysVals;
1007         break;
1008      case SYSTEM_VALUE_TESS_LEVEL_INNER:
1009      case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1010         info_out->sv[info_out->numSysVals].patch = 1;
1011         break;
1012      default:
1013         break;
1014      }
1015
1016      info_out->numSysVals += 1;
1017   }
1018
1019   if (prog->getType() == Program::TYPE_COMPUTE)
1020      return true;
1021
1022   nir_foreach_shader_in_variable(var, nir) {
1023      const glsl_type *type = var->type;
1024      int slot = var->data.location;
1025      uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1026      uint32_t vary = var->data.driver_location;
1027      assert(vary + slots <= NV50_CODEGEN_MAX_VARYINGS);
1028
1029      switch(prog->getType()) {
1030      case Program::TYPE_FRAGMENT:
1031         tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1032                                      &name, &index);
1033         for (uint16_t i = 0; i < slots; ++i) {
1034            setInterpolate(&info_out->in[vary + i], var->data.interpolation,
1035                           var->data.centroid | var->data.sample, name);
1036         }
1037         break;
1038      case Program::TYPE_GEOMETRY:
1039         tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1040                                      &name, &index);
1041         break;
1042      case Program::TYPE_TESSELLATION_CONTROL:
1043      case Program::TYPE_TESSELLATION_EVAL:
1044         tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1045                                      &name, &index);
1046         if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1047            info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);
1048         break;
1049      case Program::TYPE_VERTEX:
1050         if (slot >= VERT_ATTRIB_GENERIC0 && slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX)
1051            slot = VERT_ATTRIB_GENERIC0 + vary;
1052         vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1053         switch (name) {
1054         case TGSI_SEMANTIC_EDGEFLAG:
1055            info_out->io.edgeFlagIn = vary;
1056            break;
1057         default:
1058            break;
1059         }
1060         break;
1061      default:
1062         ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1063         return false;
1064      }
1065
1066      for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1067         nv50_ir_varying *v = &info_out->in[vary];
1068
1069         v->patch = var->data.patch;
1070         v->sn = name;
1071         v->si = index + i;
1072         v->mask |= getMaskForType(type, i) << var->data.location_frac;
1073      }
1074      info_out->numInputs = std::max<uint8_t>(info_out->numInputs, vary);
1075   }
1076
1077   nir_foreach_shader_out_variable(var, nir) {
1078      const glsl_type *type = var->type;
1079      int slot = var->data.location;
1080      uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1081      uint32_t vary = var->data.driver_location;
1082
1083      assert(vary < NV50_CODEGEN_MAX_VARYINGS);
1084
1085      switch(prog->getType()) {
1086      case Program::TYPE_FRAGMENT:
1087         tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);
1088         switch (name) {
1089         case TGSI_SEMANTIC_COLOR:
1090            if (!var->data.fb_fetch_output)
1091               info_out->prop.fp.numColourResults++;
1092            if (var->data.location == FRAG_RESULT_COLOR &&
1093                nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
1094            info_out->prop.fp.separateFragData = true;
1095            // sometimes we get FRAG_RESULT_DATAX with data.index 0
1096            // sometimes we get FRAG_RESULT_DATA0 with data.index X
1097            index = index == 0 ? var->data.index : index;
1098            break;
1099         case TGSI_SEMANTIC_POSITION:
1100            info_out->io.fragDepth = vary;
1101            info_out->prop.fp.writesDepth = true;
1102            break;
1103         case TGSI_SEMANTIC_SAMPLEMASK:
1104            info_out->io.sampleMask = vary;
1105            break;
1106         default:
1107            break;
1108         }
1109         break;
1110      case Program::TYPE_GEOMETRY:
1111      case Program::TYPE_TESSELLATION_CONTROL:
1112      case Program::TYPE_TESSELLATION_EVAL:
1113      case Program::TYPE_VERTEX:
1114         tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1115                                      &name, &index);
1116
1117         if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1118             name != TGSI_SEMANTIC_TESSOUTER)
1119            info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);
1120
1121         switch (name) {
1122         case TGSI_SEMANTIC_CLIPDIST:
1123            info_out->io.genUserClip = -1;
1124            break;
1125         case TGSI_SEMANTIC_CLIPVERTEX:
1126            clipVertexOutput = vary;
1127            break;
1128         case TGSI_SEMANTIC_EDGEFLAG:
1129            info_out->io.edgeFlagOut = vary;
1130            break;
1131         case TGSI_SEMANTIC_POSITION:
1132            if (clipVertexOutput < 0)
1133               clipVertexOutput = vary;
1134            break;
1135         default:
1136            break;
1137         }
1138         break;
1139      default:
1140         ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1141         return false;
1142      }
1143
1144      for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1145         nv50_ir_varying *v = &info_out->out[vary];
1146         v->patch = var->data.patch;
1147         v->sn = name;
1148         v->si = index + i;
1149         v->mask |= getMaskForType(type, i) << var->data.location_frac;
1150
1151         if (nir->info.outputs_read & 1ull << slot)
1152            v->oread = 1;
1153      }
1154      info_out->numOutputs = std::max<uint8_t>(info_out->numOutputs, vary);
1155   }
1156
1157   if (info_out->io.genUserClip > 0) {
1158      info_out->io.clipDistances = info_out->io.genUserClip;
1159
1160      const unsigned int nOut = (info_out->io.genUserClip + 3) / 4;
1161
1162      for (unsigned int n = 0; n < nOut; ++n) {
1163         unsigned int i = info_out->numOutputs++;
1164         info_out->out[i].id = i;
1165         info_out->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1166         info_out->out[i].si = n;
1167         info_out->out[i].mask = ((1 << info_out->io.clipDistances) - 1) >> (n * 4);
1168      }
1169   }
1170
1171   return info->assignSlots(info_out) == 0;
1172}
1173
1174uint32_t
1175Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1176{
1177   DataType ty;
1178   int offset = nir_intrinsic_component(insn);
1179   bool input;
1180
1181   if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1182      ty = getDType(insn);
1183   else
1184      ty = getSType(insn->src[0], false, false);
1185
1186   switch (insn->intrinsic) {
1187   case nir_intrinsic_load_input:
1188   case nir_intrinsic_load_interpolated_input:
1189   case nir_intrinsic_load_per_vertex_input:
1190      input = true;
1191      break;
1192   case nir_intrinsic_load_output:
1193   case nir_intrinsic_load_per_vertex_output:
1194   case nir_intrinsic_store_output:
1195   case nir_intrinsic_store_per_vertex_output:
1196      input = false;
1197      break;
1198   default:
1199      ERROR("unknown intrinsic in getSlotAddress %s",
1200            nir_intrinsic_infos[insn->intrinsic].name);
1201      input = false;
1202      assert(false);
1203      break;
1204   }
1205
1206   if (typeSizeof(ty) == 8) {
1207      slot *= 2;
1208      slot += offset;
1209      if (slot >= 4) {
1210         idx += 1;
1211         slot -= 4;
1212      }
1213   } else {
1214      slot += offset;
1215   }
1216
1217   assert(slot < 4);
1218   assert(!input || idx < NV50_CODEGEN_MAX_VARYINGS);
1219   assert(input || idx < NV50_CODEGEN_MAX_VARYINGS);
1220
1221   const nv50_ir_varying *vary = input ? info_out->in : info_out->out;
1222   return vary[idx].slot[slot] * 4;
1223}
1224
1225Instruction *
1226Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1227                    uint32_t base, uint8_t c, Value *indirect0,
1228                    Value *indirect1, bool patch)
1229{
1230   unsigned int tySize = typeSizeof(ty);
1231
1232   if (tySize == 8 &&
1233       (indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) {
1234      Value *lo = getSSA();
1235      Value *hi = getSSA();
1236
1237      Instruction *loi =
1238         mkLoad(TYPE_U32, lo,
1239                mkSymbol(file, i, TYPE_U32, base + c * tySize),
1240                indirect0);
1241      loi->setIndirect(0, 1, indirect1);
1242      loi->perPatch = patch;
1243
1244      Instruction *hii =
1245         mkLoad(TYPE_U32, hi,
1246                mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1247                indirect0);
1248      hii->setIndirect(0, 1, indirect1);
1249      hii->perPatch = patch;
1250
1251      return mkOp2(OP_MERGE, ty, def, lo, hi);
1252   } else {
1253      Instruction *ld =
1254         mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1255      ld->setIndirect(0, 1, indirect1);
1256      ld->perPatch = patch;
1257      return ld;
1258   }
1259}
1260
1261void
1262Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1263                   DataType ty, Value *src, uint8_t idx, uint8_t c,
1264                   Value *indirect0, Value *indirect1)
1265{
1266   uint8_t size = typeSizeof(ty);
1267   uint32_t address = getSlotAddress(insn, idx, c);
1268
1269   if (size == 8 && indirect0) {
1270      Value *split[2];
1271      mkSplit(split, 4, src);
1272
1273      if (op == OP_EXPORT) {
1274         split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1275         split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1276      }
1277
1278      mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1279              split[0])->perPatch = info_out->out[idx].patch;
1280      mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1281              split[1])->perPatch = info_out->out[idx].patch;
1282   } else {
1283      if (op == OP_EXPORT)
1284         src = mkMov(getSSA(size), src, ty)->getDef(0);
1285      mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1286              src)->perPatch = info_out->out[idx].patch;
1287   }
1288}
1289
1290bool
1291Converter::parseNIR()
1292{
1293   info_out->bin.tlsSpace = nir->scratch_size;
1294   info_out->io.clipDistances = nir->info.clip_distance_array_size;
1295   info_out->io.cullDistances = nir->info.cull_distance_array_size;
1296   info_out->io.layer_viewport_relative = nir->info.layer_viewport_relative;
1297
1298   switch(prog->getType()) {
1299   case Program::TYPE_COMPUTE:
1300      info->prop.cp.numThreads[0] = nir->info.workgroup_size[0];
1301      info->prop.cp.numThreads[1] = nir->info.workgroup_size[1];
1302      info->prop.cp.numThreads[2] = nir->info.workgroup_size[2];
1303      info_out->bin.smemSize = std::max(info_out->bin.smemSize, nir->info.shared_size);
1304
1305      if (info->target < NVISA_GF100_CHIPSET) {
1306         int gmemSlot = 0;
1307
1308         for (unsigned i = 0; i < nir->info.num_ssbos; i++) {
1309            info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 0, .slot = i};
1310            assert(gmemSlot < 16);
1311         }
1312         nir_foreach_image_variable(var, nir) {
1313            int image_count = glsl_type_get_image_count(var->type);
1314            for (int i = 0; i < image_count; i++) {
1315               info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 1, .slot = var->data.binding + i};
1316               assert(gmemSlot < 16);
1317            }
1318         }
1319      }
1320
1321      break;
1322   case Program::TYPE_FRAGMENT:
1323      info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1324      prog->persampleInvocation =
1325         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
1326         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
1327      info_out->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1328      info_out->prop.fp.readsSampleLocations =
1329         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
1330      info_out->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote;
1331      info_out->prop.fp.usesSampleMaskIn =
1332         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
1333      break;
1334   case Program::TYPE_GEOMETRY:
1335      info_out->prop.gp.instanceCount = nir->info.gs.invocations;
1336      info_out->prop.gp.maxVertices = nir->info.gs.vertices_out;
1337      info_out->prop.gp.outputPrim = nir->info.gs.output_primitive;
1338      break;
1339   case Program::TYPE_TESSELLATION_CONTROL:
1340   case Program::TYPE_TESSELLATION_EVAL:
1341      info_out->prop.tp.domain = u_tess_prim_from_shader(nir->info.tess._primitive_mode);
1342      info_out->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1343      info_out->prop.tp.outputPrim =
1344         nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1345      info_out->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1346      info_out->prop.tp.winding = !nir->info.tess.ccw;
1347      break;
1348   case Program::TYPE_VERTEX:
1349      info_out->prop.vp.usesDrawParameters =
1350         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX) ||
1351         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) ||
1352         BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1353      break;
1354   default:
1355      break;
1356   }
1357
1358   return true;
1359}
1360
1361bool
1362Converter::visit(nir_function *function)
1363{
1364   assert(function->impl);
1365
1366   // usually the blocks will set everything up, but main is special
1367   BasicBlock *entry = new BasicBlock(prog->main);
1368   exit = new BasicBlock(prog->main);
1369   blocks[nir_start_block(function->impl)->index] = entry;
1370   prog->main->setEntry(entry);
1371   prog->main->setExit(exit);
1372
1373   setPosition(entry, true);
1374
1375   if (info_out->io.genUserClip > 0) {
1376      for (int c = 0; c < 4; ++c)
1377         clipVtx[c] = getScratch();
1378   }
1379
1380   switch (prog->getType()) {
1381   case Program::TYPE_TESSELLATION_CONTROL:
1382      outBase = mkOp2v(
1383         OP_SUB, TYPE_U32, getSSA(),
1384         mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1385         mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1386      break;
1387   case Program::TYPE_FRAGMENT: {
1388      Symbol *sv = mkSysVal(SV_POSITION, 3);
1389      fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1390      fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1391      break;
1392   }
1393   default:
1394      break;
1395   }
1396
1397   nir_index_ssa_defs(function->impl);
1398   foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1399      if (!visit(node))
1400         return false;
1401   }
1402
1403   bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1404   setPosition(exit, true);
1405
1406   if ((prog->getType() == Program::TYPE_VERTEX ||
1407        prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1408       && info_out->io.genUserClip > 0)
1409      handleUserClipPlanes();
1410
1411   // TODO: for non main function this needs to be a OP_RETURN
1412   mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1413   return true;
1414}
1415
1416bool
1417Converter::visit(nir_cf_node *node)
1418{
1419   switch (node->type) {
1420   case nir_cf_node_block:
1421      return visit(nir_cf_node_as_block(node));
1422   case nir_cf_node_if:
1423      return visit(nir_cf_node_as_if(node));
1424   case nir_cf_node_loop:
1425      return visit(nir_cf_node_as_loop(node));
1426   default:
1427      ERROR("unknown nir_cf_node type %u\n", node->type);
1428      return false;
1429   }
1430}
1431
1432bool
1433Converter::visit(nir_block *block)
1434{
1435   if (!block->predecessors->entries && block->instr_list.is_empty())
1436      return true;
1437
1438   BasicBlock *bb = convert(block);
1439
1440   setPosition(bb, true);
1441   nir_foreach_instr(insn, block) {
1442      if (!visit(insn))
1443         return false;
1444   }
1445   return true;
1446}
1447
1448bool
1449Converter::visit(nir_if *nif)
1450{
1451   curIfDepth++;
1452
1453   DataType sType = getSType(nif->condition, false, false);
1454   Value *src = getSrc(&nif->condition, 0);
1455
1456   nir_block *lastThen = nir_if_last_then_block(nif);
1457   nir_block *lastElse = nir_if_last_else_block(nif);
1458
1459   BasicBlock *headBB = bb;
1460   BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1461   BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1462
1463   bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1464   bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1465
1466   bool insertJoins = lastThen->successors[0] == lastElse->successors[0];
1467   mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1468
1469   foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1470      if (!visit(node))
1471         return false;
1472   }
1473
1474   setPosition(convert(lastThen), true);
1475   if (!bb->isTerminated()) {
1476      BasicBlock *tailBB = convert(lastThen->successors[0]);
1477      mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1478      bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1479   } else {
1480      insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
1481   }
1482
1483   foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1484      if (!visit(node))
1485         return false;
1486   }
1487
1488   setPosition(convert(lastElse), true);
1489   if (!bb->isTerminated()) {
1490      BasicBlock *tailBB = convert(lastElse->successors[0]);
1491      mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1492      bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1493   } else {
1494      insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
1495   }
1496
1497   if (curIfDepth > 6) {
1498      insertJoins = false;
1499   }
1500
1501   /* we made sure that all threads would converge at the same block */
1502   if (insertJoins) {
1503      BasicBlock *conv = convert(lastThen->successors[0]);
1504      setPosition(headBB->getExit(), false);
1505      headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL);
1506      setPosition(conv, false);
1507      mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1508   }
1509
1510   curIfDepth--;
1511
1512   return true;
1513}
1514
1515// TODO: add convergency
1516bool
1517Converter::visit(nir_loop *loop)
1518{
1519   curLoopDepth += 1;
1520   func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1521
1522   BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1523   BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1524
1525   bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1526
1527   mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1528   setPosition(loopBB, false);
1529   mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1530
1531   foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1532      if (!visit(node))
1533         return false;
1534   }
1535
1536   if (!bb->isTerminated()) {
1537      mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1538      bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1539   }
1540
1541   if (tailBB->cfg.incidentCount() == 0)
1542      loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1543
1544   curLoopDepth -= 1;
1545
1546   info_out->loops++;
1547
1548   return true;
1549}
1550
1551bool
1552Converter::visit(nir_instr *insn)
1553{
1554   // we need an insertion point for on the fly generated immediate loads
1555   immInsertPos = bb->getExit();
1556   switch (insn->type) {
1557   case nir_instr_type_alu:
1558      return visit(nir_instr_as_alu(insn));
1559   case nir_instr_type_intrinsic:
1560      return visit(nir_instr_as_intrinsic(insn));
1561   case nir_instr_type_jump:
1562      return visit(nir_instr_as_jump(insn));
1563   case nir_instr_type_load_const:
1564      return visit(nir_instr_as_load_const(insn));
1565   case nir_instr_type_ssa_undef:
1566      return visit(nir_instr_as_ssa_undef(insn));
1567   case nir_instr_type_tex:
1568      return visit(nir_instr_as_tex(insn));
1569   default:
1570      ERROR("unknown nir_instr type %u\n", insn->type);
1571      return false;
1572   }
1573   return true;
1574}
1575
1576SVSemantic
1577Converter::convert(nir_intrinsic_op intr)
1578{
1579   switch (intr) {
1580   case nir_intrinsic_load_base_vertex:
1581      return SV_BASEVERTEX;
1582   case nir_intrinsic_load_base_instance:
1583      return SV_BASEINSTANCE;
1584   case nir_intrinsic_load_draw_id:
1585      return SV_DRAWID;
1586   case nir_intrinsic_load_front_face:
1587      return SV_FACE;
1588   case nir_intrinsic_is_helper_invocation:
1589   case nir_intrinsic_load_helper_invocation:
1590      return SV_THREAD_KILL;
1591   case nir_intrinsic_load_instance_id:
1592      return SV_INSTANCE_ID;
1593   case nir_intrinsic_load_invocation_id:
1594      return SV_INVOCATION_ID;
1595   case nir_intrinsic_load_workgroup_size:
1596      return SV_NTID;
1597   case nir_intrinsic_load_local_invocation_id:
1598      return SV_TID;
1599   case nir_intrinsic_load_num_workgroups:
1600      return SV_NCTAID;
1601   case nir_intrinsic_load_patch_vertices_in:
1602      return SV_VERTEX_COUNT;
1603   case nir_intrinsic_load_primitive_id:
1604      return SV_PRIMITIVE_ID;
1605   case nir_intrinsic_load_sample_id:
1606      return SV_SAMPLE_INDEX;
1607   case nir_intrinsic_load_sample_mask_in:
1608      return SV_SAMPLE_MASK;
1609   case nir_intrinsic_load_sample_pos:
1610      return SV_SAMPLE_POS;
1611   case nir_intrinsic_load_subgroup_eq_mask:
1612      return SV_LANEMASK_EQ;
1613   case nir_intrinsic_load_subgroup_ge_mask:
1614      return SV_LANEMASK_GE;
1615   case nir_intrinsic_load_subgroup_gt_mask:
1616      return SV_LANEMASK_GT;
1617   case nir_intrinsic_load_subgroup_le_mask:
1618      return SV_LANEMASK_LE;
1619   case nir_intrinsic_load_subgroup_lt_mask:
1620      return SV_LANEMASK_LT;
1621   case nir_intrinsic_load_subgroup_invocation:
1622      return SV_LANEID;
1623   case nir_intrinsic_load_tess_coord:
1624      return SV_TESS_COORD;
1625   case nir_intrinsic_load_tess_level_inner:
1626      return SV_TESS_INNER;
1627   case nir_intrinsic_load_tess_level_outer:
1628      return SV_TESS_OUTER;
1629   case nir_intrinsic_load_vertex_id:
1630      return SV_VERTEX_ID;
1631   case nir_intrinsic_load_workgroup_id:
1632      return SV_CTAID;
1633   case nir_intrinsic_load_work_dim:
1634      return SV_WORK_DIM;
1635   default:
1636      ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1637            nir_intrinsic_infos[intr].name);
1638      assert(false);
1639      return SV_LAST;
1640   }
1641}
1642
1643bool
1644Converter::visit(nir_intrinsic_instr *insn)
1645{
1646   nir_intrinsic_op op = insn->intrinsic;
1647   const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1648   unsigned dest_components = nir_intrinsic_dest_components(insn);
1649
1650   switch (op) {
1651   case nir_intrinsic_load_uniform: {
1652      LValues &newDefs = convert(&insn->dest);
1653      const DataType dType = getDType(insn);
1654      Value *indirect;
1655      uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1656      for (uint8_t i = 0; i < dest_components; ++i) {
1657         loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1658      }
1659      break;
1660   }
1661   case nir_intrinsic_store_output:
1662   case nir_intrinsic_store_per_vertex_output: {
1663      Value *indirect;
1664      DataType dType = getSType(insn->src[0], false, false);
1665      uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1666
1667      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1668         if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1669            continue;
1670
1671         uint8_t offset = 0;
1672         Value *src = getSrc(&insn->src[0], i);
1673         switch (prog->getType()) {
1674         case Program::TYPE_FRAGMENT: {
1675            if (info_out->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1676               // TGSI uses a different interface than NIR, TGSI stores that
1677               // value in the z component, NIR in X
1678               offset += 2;
1679               src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1680            }
1681            break;
1682         }
1683         case Program::TYPE_GEOMETRY:
1684         case Program::TYPE_TESSELLATION_EVAL:
1685         case Program::TYPE_VERTEX: {
1686            if (info_out->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1687               mkMov(clipVtx[i], src);
1688               src = clipVtx[i];
1689            }
1690            break;
1691         }
1692         default:
1693            break;
1694         }
1695
1696         storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1697      }
1698      break;
1699   }
1700   case nir_intrinsic_load_input:
1701   case nir_intrinsic_load_interpolated_input:
1702   case nir_intrinsic_load_output: {
1703      LValues &newDefs = convert(&insn->dest);
1704
1705      // FBFetch
1706      if (prog->getType() == Program::TYPE_FRAGMENT &&
1707          op == nir_intrinsic_load_output) {
1708         std::vector<Value*> defs, srcs;
1709         uint8_t mask = 0;
1710
1711         srcs.push_back(getSSA());
1712         srcs.push_back(getSSA());
1713         Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1714         Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1715         mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1716         mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1717
1718         srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1719         srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1720
1721         for (uint8_t i = 0u; i < dest_components; ++i) {
1722            defs.push_back(newDefs[i]);
1723            mask |= 1 << i;
1724         }
1725
1726         TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1727         texi->tex.levelZero = true;
1728         texi->tex.mask = mask;
1729         texi->tex.useOffsets = 0;
1730         texi->tex.r = 0xffff;
1731         texi->tex.s = 0xffff;
1732
1733         info_out->prop.fp.readsFramebuffer = true;
1734         break;
1735      }
1736
1737      const DataType dType = getDType(insn);
1738      Value *indirect;
1739      bool input = op != nir_intrinsic_load_output;
1740      operation nvirOp;
1741      uint32_t mode = 0;
1742
1743      uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1744      nv50_ir_varying& vary = input ? info_out->in[idx] : info_out->out[idx];
1745
1746      // see load_barycentric_* handling
1747      if (prog->getType() == Program::TYPE_FRAGMENT) {
1748         if (op == nir_intrinsic_load_interpolated_input) {
1749            ImmediateValue immMode;
1750            if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1751               mode = immMode.reg.data.u32;
1752         }
1753         if (mode == NV50_IR_INTERP_DEFAULT)
1754            mode |= translateInterpMode(&vary, nvirOp);
1755         else {
1756            if (vary.linear) {
1757               nvirOp = OP_LINTERP;
1758               mode |= NV50_IR_INTERP_LINEAR;
1759            } else {
1760               nvirOp = OP_PINTERP;
1761               mode |= NV50_IR_INTERP_PERSPECTIVE;
1762            }
1763         }
1764      }
1765
1766      for (uint8_t i = 0u; i < dest_components; ++i) {
1767         uint32_t address = getSlotAddress(insn, idx, i);
1768         Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1769         if (prog->getType() == Program::TYPE_FRAGMENT) {
1770            int s = 1;
1771            if (typeSizeof(dType) == 8) {
1772               Value *lo = getSSA();
1773               Value *hi = getSSA();
1774               Instruction *interp;
1775
1776               interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1777               if (nvirOp == OP_PINTERP)
1778                  interp->setSrc(s++, fp.position);
1779               if (mode & NV50_IR_INTERP_OFFSET)
1780                  interp->setSrc(s++, getSrc(&insn->src[0], 0));
1781               interp->setInterpolate(mode);
1782               interp->setIndirect(0, 0, indirect);
1783
1784               Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1785               interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1786               if (nvirOp == OP_PINTERP)
1787                  interp->setSrc(s++, fp.position);
1788               if (mode & NV50_IR_INTERP_OFFSET)
1789                  interp->setSrc(s++, getSrc(&insn->src[0], 0));
1790               interp->setInterpolate(mode);
1791               interp->setIndirect(0, 0, indirect);
1792
1793               mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1794            } else {
1795               Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1796               if (nvirOp == OP_PINTERP)
1797                  interp->setSrc(s++, fp.position);
1798               if (mode & NV50_IR_INTERP_OFFSET)
1799                  interp->setSrc(s++, getSrc(&insn->src[0], 0));
1800               interp->setInterpolate(mode);
1801               interp->setIndirect(0, 0, indirect);
1802            }
1803         } else {
1804            mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1805         }
1806      }
1807      break;
1808   }
1809   case nir_intrinsic_load_barycentric_at_offset:
1810   case nir_intrinsic_load_barycentric_at_sample:
1811   case nir_intrinsic_load_barycentric_centroid:
1812   case nir_intrinsic_load_barycentric_pixel:
1813   case nir_intrinsic_load_barycentric_sample: {
1814      LValues &newDefs = convert(&insn->dest);
1815      uint32_t mode;
1816
1817      if (op == nir_intrinsic_load_barycentric_centroid ||
1818          op == nir_intrinsic_load_barycentric_sample) {
1819         mode = NV50_IR_INTERP_CENTROID;
1820      } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1821         Value *offs[2];
1822         for (uint8_t c = 0; c < 2; c++) {
1823            offs[c] = getScratch();
1824            mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1825            mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1826            mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1827            mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1828         }
1829         mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1830
1831         mode = NV50_IR_INTERP_OFFSET;
1832      } else if (op == nir_intrinsic_load_barycentric_pixel) {
1833         mode = NV50_IR_INTERP_DEFAULT;
1834      } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1835         info_out->prop.fp.readsSampleLocations = true;
1836         Value *sample = getSSA();
1837         mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), getSrc(&insn->src[0], 0), mkImm(0))
1838            ->subOp = 2;
1839         mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], sample)->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1840         mode = NV50_IR_INTERP_OFFSET;
1841      } else {
1842         unreachable("all intrinsics already handled above");
1843      }
1844
1845      loadImm(newDefs[1], mode);
1846      break;
1847   }
1848   case nir_intrinsic_demote:
1849   case nir_intrinsic_discard:
1850      mkOp(OP_DISCARD, TYPE_NONE, NULL);
1851      break;
1852   case nir_intrinsic_demote_if:
1853   case nir_intrinsic_discard_if: {
1854      Value *pred = getSSA(1, FILE_PREDICATE);
1855      if (insn->num_components > 1) {
1856         ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1857         assert(false);
1858         return false;
1859      }
1860      mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1861      mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1862      break;
1863   }
1864   case nir_intrinsic_load_base_vertex:
1865   case nir_intrinsic_load_base_instance:
1866   case nir_intrinsic_load_draw_id:
1867   case nir_intrinsic_load_front_face:
1868   case nir_intrinsic_is_helper_invocation:
1869   case nir_intrinsic_load_helper_invocation:
1870   case nir_intrinsic_load_instance_id:
1871   case nir_intrinsic_load_invocation_id:
1872   case nir_intrinsic_load_workgroup_size:
1873   case nir_intrinsic_load_local_invocation_id:
1874   case nir_intrinsic_load_num_workgroups:
1875   case nir_intrinsic_load_patch_vertices_in:
1876   case nir_intrinsic_load_primitive_id:
1877   case nir_intrinsic_load_sample_id:
1878   case nir_intrinsic_load_sample_mask_in:
1879   case nir_intrinsic_load_sample_pos:
1880   case nir_intrinsic_load_subgroup_eq_mask:
1881   case nir_intrinsic_load_subgroup_ge_mask:
1882   case nir_intrinsic_load_subgroup_gt_mask:
1883   case nir_intrinsic_load_subgroup_le_mask:
1884   case nir_intrinsic_load_subgroup_lt_mask:
1885   case nir_intrinsic_load_subgroup_invocation:
1886   case nir_intrinsic_load_tess_coord:
1887   case nir_intrinsic_load_tess_level_inner:
1888   case nir_intrinsic_load_tess_level_outer:
1889   case nir_intrinsic_load_vertex_id:
1890   case nir_intrinsic_load_workgroup_id:
1891   case nir_intrinsic_load_work_dim: {
1892      const DataType dType = getDType(insn);
1893      SVSemantic sv = convert(op);
1894      LValues &newDefs = convert(&insn->dest);
1895
1896      for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
1897         Value *def;
1898         if (typeSizeof(dType) == 8)
1899            def = getSSA();
1900         else
1901            def = newDefs[i];
1902
1903         if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1904            loadImm(def, 0u);
1905         } else {
1906            Symbol *sym = mkSysVal(sv, i);
1907            Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1908            if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1909               rdsv->perPatch = 1;
1910         }
1911
1912         if (typeSizeof(dType) == 8)
1913            mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1914      }
1915      break;
1916   }
1917   // constants
1918   case nir_intrinsic_load_subgroup_size: {
1919      LValues &newDefs = convert(&insn->dest);
1920      loadImm(newDefs[0], 32u);
1921      break;
1922   }
1923   case nir_intrinsic_vote_all:
1924   case nir_intrinsic_vote_any:
1925   case nir_intrinsic_vote_ieq: {
1926      LValues &newDefs = convert(&insn->dest);
1927      Value *pred = getScratch(1, FILE_PREDICATE);
1928      mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1929      mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1930      mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1931      break;
1932   }
1933   case nir_intrinsic_ballot: {
1934      LValues &newDefs = convert(&insn->dest);
1935      Value *pred = getSSA(1, FILE_PREDICATE);
1936      mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1937      mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1938      break;
1939   }
1940   case nir_intrinsic_read_first_invocation:
1941   case nir_intrinsic_read_invocation: {
1942      LValues &newDefs = convert(&insn->dest);
1943      const DataType dType = getDType(insn);
1944      Value *tmp = getScratch();
1945
1946      if (op == nir_intrinsic_read_first_invocation) {
1947         mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1948         mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
1949         mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1950      } else
1951         tmp = getSrc(&insn->src[1], 0);
1952
1953      for (uint8_t i = 0; i < dest_components; ++i) {
1954         mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1955            ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1956      }
1957      break;
1958   }
1959   case nir_intrinsic_load_per_vertex_input: {
1960      const DataType dType = getDType(insn);
1961      LValues &newDefs = convert(&insn->dest);
1962      Value *indirectVertex;
1963      Value *indirectOffset;
1964      uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1965      uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1966
1967      Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1968                              mkImm(baseVertex), indirectVertex);
1969      for (uint8_t i = 0u; i < dest_components; ++i) {
1970         uint32_t address = getSlotAddress(insn, idx, i);
1971         loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
1972                  indirectOffset, vtxBase, info_out->in[idx].patch);
1973      }
1974      break;
1975   }
1976   case nir_intrinsic_load_per_vertex_output: {
1977      const DataType dType = getDType(insn);
1978      LValues &newDefs = convert(&insn->dest);
1979      Value *indirectVertex;
1980      Value *indirectOffset;
1981      uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1982      uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1983      Value *vtxBase = NULL;
1984
1985      if (indirectVertex)
1986         vtxBase = indirectVertex;
1987      else
1988         vtxBase = loadImm(NULL, baseVertex);
1989
1990      vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
1991
1992      for (uint8_t i = 0u; i < dest_components; ++i) {
1993         uint32_t address = getSlotAddress(insn, idx, i);
1994         loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
1995                  indirectOffset, vtxBase, info_out->in[idx].patch);
1996      }
1997      break;
1998   }
1999   case nir_intrinsic_emit_vertex: {
2000      if (info_out->io.genUserClip > 0)
2001         handleUserClipPlanes();
2002      uint32_t idx = nir_intrinsic_stream_id(insn);
2003      mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2004      break;
2005   }
2006   case nir_intrinsic_end_primitive: {
2007      uint32_t idx = nir_intrinsic_stream_id(insn);
2008      if (idx)
2009         break;
2010      mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2011      break;
2012   }
2013   case nir_intrinsic_load_ubo: {
2014      const DataType dType = getDType(insn);
2015      LValues &newDefs = convert(&insn->dest);
2016      Value *indirectIndex;
2017      Value *indirectOffset;
2018      uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2019      uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2020      if (indirectOffset)
2021         indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2022
2023      for (uint8_t i = 0u; i < dest_components; ++i) {
2024         loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2025                  indirectOffset, indirectIndex);
2026      }
2027      break;
2028   }
2029   case nir_intrinsic_get_ssbo_size: {
2030      LValues &newDefs = convert(&insn->dest);
2031      const DataType dType = getDType(insn);
2032      Value *indirectBuffer;
2033      uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2034
2035      Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2036      mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2037      break;
2038   }
2039   case nir_intrinsic_store_ssbo: {
2040      DataType sType = getSType(insn->src[0], false, false);
2041      Value *indirectBuffer;
2042      Value *indirectOffset;
2043      uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2044      uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2045
2046      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2047         if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2048            continue;
2049         Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2050                                offset + i * typeSizeof(sType));
2051         mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2052            ->setIndirect(0, 1, indirectBuffer);
2053      }
2054      info_out->io.globalAccess |= 0x2;
2055      break;
2056   }
2057   case nir_intrinsic_load_ssbo: {
2058      const DataType dType = getDType(insn);
2059      LValues &newDefs = convert(&insn->dest);
2060      Value *indirectBuffer;
2061      Value *indirectOffset;
2062      uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2063      uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2064
2065      for (uint8_t i = 0u; i < dest_components; ++i)
2066         loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2067                  indirectOffset, indirectBuffer);
2068
2069      info_out->io.globalAccess |= 0x1;
2070      break;
2071   }
2072   case nir_intrinsic_shared_atomic_add:
2073   case nir_intrinsic_shared_atomic_fadd:
2074   case nir_intrinsic_shared_atomic_and:
2075   case nir_intrinsic_shared_atomic_comp_swap:
2076   case nir_intrinsic_shared_atomic_exchange:
2077   case nir_intrinsic_shared_atomic_or:
2078   case nir_intrinsic_shared_atomic_imax:
2079   case nir_intrinsic_shared_atomic_imin:
2080   case nir_intrinsic_shared_atomic_umax:
2081   case nir_intrinsic_shared_atomic_umin:
2082   case nir_intrinsic_shared_atomic_xor: {
2083      const DataType dType = getDType(insn);
2084      LValues &newDefs = convert(&insn->dest);
2085      Value *indirectOffset;
2086      uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2087      Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2088      Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2089      if (op == nir_intrinsic_shared_atomic_comp_swap)
2090         atom->setSrc(2, getSrc(&insn->src[2], 0));
2091      atom->setIndirect(0, 0, indirectOffset);
2092      atom->subOp = getSubOp(op);
2093      break;
2094   }
2095   case nir_intrinsic_ssbo_atomic_add:
2096   case nir_intrinsic_ssbo_atomic_fadd:
2097   case nir_intrinsic_ssbo_atomic_and:
2098   case nir_intrinsic_ssbo_atomic_comp_swap:
2099   case nir_intrinsic_ssbo_atomic_exchange:
2100   case nir_intrinsic_ssbo_atomic_or:
2101   case nir_intrinsic_ssbo_atomic_imax:
2102   case nir_intrinsic_ssbo_atomic_imin:
2103   case nir_intrinsic_ssbo_atomic_umax:
2104   case nir_intrinsic_ssbo_atomic_umin:
2105   case nir_intrinsic_ssbo_atomic_xor: {
2106      const DataType dType = getDType(insn);
2107      LValues &newDefs = convert(&insn->dest);
2108      Value *indirectBuffer;
2109      Value *indirectOffset;
2110      uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2111      uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2112
2113      Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2114      Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2115                                getSrc(&insn->src[2], 0));
2116      if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2117         atom->setSrc(2, getSrc(&insn->src[3], 0));
2118      atom->setIndirect(0, 0, indirectOffset);
2119      atom->setIndirect(0, 1, indirectBuffer);
2120      atom->subOp = getSubOp(op);
2121
2122      info_out->io.globalAccess |= 0x2;
2123      break;
2124   }
2125   case nir_intrinsic_global_atomic_add:
2126   case nir_intrinsic_global_atomic_fadd:
2127   case nir_intrinsic_global_atomic_and:
2128   case nir_intrinsic_global_atomic_comp_swap:
2129   case nir_intrinsic_global_atomic_exchange:
2130   case nir_intrinsic_global_atomic_or:
2131   case nir_intrinsic_global_atomic_imax:
2132   case nir_intrinsic_global_atomic_imin:
2133   case nir_intrinsic_global_atomic_umax:
2134   case nir_intrinsic_global_atomic_umin:
2135   case nir_intrinsic_global_atomic_xor: {
2136      const DataType dType = getDType(insn);
2137      LValues &newDefs = convert(&insn->dest);
2138      Value *address;
2139      uint32_t offset = getIndirect(&insn->src[0], 0, address);
2140
2141      Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2142      Instruction *atom =
2143         mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2144      if (op == nir_intrinsic_global_atomic_comp_swap)
2145         atom->setSrc(2, getSrc(&insn->src[2], 0));
2146      atom->setIndirect(0, 0, address);
2147      atom->subOp = getSubOp(op);
2148
2149      info_out->io.globalAccess |= 0x2;
2150      break;
2151   }
2152   case nir_intrinsic_bindless_image_atomic_add:
2153   case nir_intrinsic_bindless_image_atomic_fadd:
2154   case nir_intrinsic_bindless_image_atomic_and:
2155   case nir_intrinsic_bindless_image_atomic_comp_swap:
2156   case nir_intrinsic_bindless_image_atomic_exchange:
2157   case nir_intrinsic_bindless_image_atomic_imax:
2158   case nir_intrinsic_bindless_image_atomic_umax:
2159   case nir_intrinsic_bindless_image_atomic_imin:
2160   case nir_intrinsic_bindless_image_atomic_umin:
2161   case nir_intrinsic_bindless_image_atomic_or:
2162   case nir_intrinsic_bindless_image_atomic_xor:
2163   case nir_intrinsic_bindless_image_atomic_inc_wrap:
2164   case nir_intrinsic_bindless_image_atomic_dec_wrap:
2165   case nir_intrinsic_bindless_image_load:
2166   case nir_intrinsic_bindless_image_samples:
2167   case nir_intrinsic_bindless_image_size:
2168   case nir_intrinsic_bindless_image_store:
2169   case nir_intrinsic_image_atomic_add:
2170   case nir_intrinsic_image_atomic_fadd:
2171   case nir_intrinsic_image_atomic_and:
2172   case nir_intrinsic_image_atomic_comp_swap:
2173   case nir_intrinsic_image_atomic_exchange:
2174   case nir_intrinsic_image_atomic_imax:
2175   case nir_intrinsic_image_atomic_umax:
2176   case nir_intrinsic_image_atomic_imin:
2177   case nir_intrinsic_image_atomic_umin:
2178   case nir_intrinsic_image_atomic_or:
2179   case nir_intrinsic_image_atomic_xor:
2180   case nir_intrinsic_image_atomic_inc_wrap:
2181   case nir_intrinsic_image_atomic_dec_wrap:
2182   case nir_intrinsic_image_load:
2183   case nir_intrinsic_image_samples:
2184   case nir_intrinsic_image_size:
2185   case nir_intrinsic_image_store: {
2186      std::vector<Value*> srcs, defs;
2187      Value *indirect;
2188      DataType ty;
2189
2190      uint32_t mask = 0;
2191      TexInstruction::Target target =
2192         convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2193      unsigned int argCount = getNIRArgCount(target);
2194      uint16_t location = 0;
2195
2196      if (opInfo.has_dest) {
2197         LValues &newDefs = convert(&insn->dest);
2198         for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2199            defs.push_back(newDefs[i]);
2200            mask |= 1 << i;
2201         }
2202      }
2203
2204      int lod_src = -1;
2205      bool bindless = false;
2206      switch (op) {
2207      case nir_intrinsic_bindless_image_atomic_add:
2208      case nir_intrinsic_bindless_image_atomic_fadd:
2209      case nir_intrinsic_bindless_image_atomic_and:
2210      case nir_intrinsic_bindless_image_atomic_comp_swap:
2211      case nir_intrinsic_bindless_image_atomic_exchange:
2212      case nir_intrinsic_bindless_image_atomic_imax:
2213      case nir_intrinsic_bindless_image_atomic_umax:
2214      case nir_intrinsic_bindless_image_atomic_imin:
2215      case nir_intrinsic_bindless_image_atomic_umin:
2216      case nir_intrinsic_bindless_image_atomic_or:
2217      case nir_intrinsic_bindless_image_atomic_xor:
2218      case nir_intrinsic_bindless_image_atomic_inc_wrap:
2219      case nir_intrinsic_bindless_image_atomic_dec_wrap:
2220         ty = getDType(insn);
2221         bindless = true;
2222         info_out->io.globalAccess |= 0x2;
2223         mask = 0x1;
2224         break;
2225      case nir_intrinsic_image_atomic_add:
2226      case nir_intrinsic_image_atomic_fadd:
2227      case nir_intrinsic_image_atomic_and:
2228      case nir_intrinsic_image_atomic_comp_swap:
2229      case nir_intrinsic_image_atomic_exchange:
2230      case nir_intrinsic_image_atomic_imax:
2231      case nir_intrinsic_image_atomic_umax:
2232      case nir_intrinsic_image_atomic_imin:
2233      case nir_intrinsic_image_atomic_umin:
2234      case nir_intrinsic_image_atomic_or:
2235      case nir_intrinsic_image_atomic_xor:
2236      case nir_intrinsic_image_atomic_inc_wrap:
2237      case nir_intrinsic_image_atomic_dec_wrap:
2238         ty = getDType(insn);
2239         bindless = false;
2240         info_out->io.globalAccess |= 0x2;
2241         mask = 0x1;
2242         break;
2243      case nir_intrinsic_bindless_image_load:
2244      case nir_intrinsic_image_load:
2245         ty = TYPE_U32;
2246         bindless = op == nir_intrinsic_bindless_image_load;
2247         info_out->io.globalAccess |= 0x1;
2248         lod_src = 4;
2249         break;
2250      case nir_intrinsic_bindless_image_store:
2251      case nir_intrinsic_image_store:
2252         ty = TYPE_U32;
2253         bindless = op == nir_intrinsic_bindless_image_store;
2254         info_out->io.globalAccess |= 0x2;
2255         lod_src = 5;
2256         mask = 0xf;
2257         break;
2258      case nir_intrinsic_bindless_image_samples:
2259         mask = 0x8;
2260         FALLTHROUGH;
2261      case nir_intrinsic_image_samples:
2262         ty = TYPE_U32;
2263         bindless = op == nir_intrinsic_bindless_image_samples;
2264         mask = 0x8;
2265         break;
2266      case nir_intrinsic_bindless_image_size:
2267      case nir_intrinsic_image_size:
2268         assert(nir_src_as_uint(insn->src[1]) == 0);
2269         ty = TYPE_U32;
2270         bindless = op == nir_intrinsic_bindless_image_size;
2271         break;
2272      default:
2273         unreachable("unhandled image opcode");
2274         break;
2275      }
2276
2277      if (bindless)
2278         indirect = getSrc(&insn->src[0], 0);
2279      else
2280         location = getIndirect(&insn->src[0], 0, indirect);
2281
2282      /* Pre-GF100, SSBOs and images are in the same HW file, managed by
2283       * prop.cp.gmem.  images are located after SSBOs.
2284       */
2285      if (info->target < NVISA_GF100_CHIPSET)
2286         location += nir->info.num_ssbos;
2287
2288      // coords
2289      if (opInfo.num_srcs >= 2)
2290         for (unsigned int i = 0u; i < argCount; ++i)
2291            srcs.push_back(getSrc(&insn->src[1], i));
2292
2293      // the sampler is just another src added after coords
2294      if (opInfo.num_srcs >= 3 && target.isMS())
2295         srcs.push_back(getSrc(&insn->src[2], 0));
2296
2297      if (opInfo.num_srcs >= 4 && lod_src != 4) {
2298         unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2299         for (uint8_t i = 0u; i < components; ++i)
2300            srcs.push_back(getSrc(&insn->src[3], i));
2301      }
2302
2303      if (opInfo.num_srcs >= 5 && lod_src != 5)
2304         // 1 for aotmic swap
2305         for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2306            srcs.push_back(getSrc(&insn->src[4], i));
2307
2308      TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2309      texi->tex.bindless = bindless;
2310      texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2311      texi->tex.mask = mask;
2312      texi->cache = convert(nir_intrinsic_access(insn));
2313      texi->setType(ty);
2314      texi->subOp = getSubOp(op);
2315
2316      if (indirect)
2317         texi->setIndirectR(indirect);
2318
2319      break;
2320   }
2321   case nir_intrinsic_store_scratch:
2322   case nir_intrinsic_store_shared: {
2323      DataType sType = getSType(insn->src[0], false, false);
2324      Value *indirectOffset;
2325      uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2326      if (indirectOffset)
2327         indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2328
2329      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2330         if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2331            continue;
2332         Symbol *sym = mkSymbol(getFile(op), 0, sType, offset + i * typeSizeof(sType));
2333         mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2334      }
2335      break;
2336   }
2337   case nir_intrinsic_load_kernel_input:
2338   case nir_intrinsic_load_scratch:
2339   case nir_intrinsic_load_shared: {
2340      const DataType dType = getDType(insn);
2341      LValues &newDefs = convert(&insn->dest);
2342      Value *indirectOffset;
2343      uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2344      if (indirectOffset)
2345         indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset);
2346
2347      for (uint8_t i = 0u; i < dest_components; ++i)
2348         loadFrom(getFile(op), 0, dType, newDefs[i], offset, i, indirectOffset);
2349
2350      break;
2351   }
2352   case nir_intrinsic_control_barrier: {
2353      // TODO: add flag to shader_info
2354      info_out->numBarriers = 1;
2355      Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2356      bar->fixed = 1;
2357      bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2358      break;
2359   }
2360   case nir_intrinsic_group_memory_barrier:
2361   case nir_intrinsic_memory_barrier:
2362   case nir_intrinsic_memory_barrier_buffer:
2363   case nir_intrinsic_memory_barrier_image:
2364   case nir_intrinsic_memory_barrier_shared: {
2365      Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2366      bar->fixed = 1;
2367      bar->subOp = getSubOp(op);
2368      break;
2369   }
2370   case nir_intrinsic_memory_barrier_tcs_patch:
2371      break;
2372   case nir_intrinsic_shader_clock: {
2373      const DataType dType = getDType(insn);
2374      LValues &newDefs = convert(&insn->dest);
2375
2376      loadImm(newDefs[0], 0u);
2377      mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2378      break;
2379   }
2380   case nir_intrinsic_load_global:
2381   case nir_intrinsic_load_global_constant: {
2382      const DataType dType = getDType(insn);
2383      LValues &newDefs = convert(&insn->dest);
2384      Value *indirectOffset;
2385      uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2386
2387      for (auto i = 0u; i < dest_components; ++i)
2388         loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2389
2390      info_out->io.globalAccess |= 0x1;
2391      break;
2392   }
2393   case nir_intrinsic_store_global: {
2394      DataType sType = getSType(insn->src[0], false, false);
2395
2396      for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2397         if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2398            continue;
2399         if (typeSizeof(sType) == 8) {
2400            Value *split[2];
2401            mkSplit(split, 4, getSrc(&insn->src[0], i));
2402
2403            Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2404            mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2405
2406            sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2407            mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2408         } else {
2409            Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2410            mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2411         }
2412      }
2413
2414      info_out->io.globalAccess |= 0x2;
2415      break;
2416   }
2417   default:
2418      ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2419      return false;
2420   }
2421
2422   return true;
2423}
2424
2425bool
2426Converter::visit(nir_jump_instr *insn)
2427{
2428   switch (insn->type) {
2429   case nir_jump_return:
2430      // TODO: this only works in the main function
2431      mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2432      bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2433      break;
2434   case nir_jump_break:
2435   case nir_jump_continue: {
2436      bool isBreak = insn->type == nir_jump_break;
2437      nir_block *block = insn->instr.block;
2438      BasicBlock *target = convert(block->successors[0]);
2439      mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2440      bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2441      break;
2442   }
2443   default:
2444      ERROR("unknown nir_jump_type %u\n", insn->type);
2445      return false;
2446   }
2447
2448   return true;
2449}
2450
2451Value*
2452Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2453{
2454   Value *val;
2455
2456   if (immInsertPos)
2457      setPosition(immInsertPos, true);
2458   else
2459      setPosition(bb, false);
2460
2461   switch (insn->def.bit_size) {
2462   case 64:
2463      val = loadImm(getSSA(8), insn->value[idx].u64);
2464      break;
2465   case 32:
2466      val = loadImm(getSSA(4), insn->value[idx].u32);
2467      break;
2468   case 16:
2469      val = loadImm(getSSA(2), insn->value[idx].u16);
2470      break;
2471   case 8:
2472      val = loadImm(getSSA(1), insn->value[idx].u8);
2473      break;
2474   default:
2475      unreachable("unhandled bit size!\n");
2476   }
2477   setPosition(bb, true);
2478   return val;
2479}
2480
2481bool
2482Converter::visit(nir_load_const_instr *insn)
2483{
2484   assert(insn->def.bit_size <= 64);
2485   immediates[insn->def.index] = insn;
2486   return true;
2487}
2488
2489#define DEFAULT_CHECKS \
2490      if (insn->dest.dest.ssa.num_components > 1) { \
2491         ERROR("nir_alu_instr only supported with 1 component!\n"); \
2492         return false; \
2493      } \
2494      if (insn->dest.write_mask != 1) { \
2495         ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2496         return false; \
2497      }
2498bool
2499Converter::visit(nir_alu_instr *insn)
2500{
2501   const nir_op op = insn->op;
2502   const nir_op_info &info = nir_op_infos[op];
2503   DataType dType = getDType(insn);
2504   const std::vector<DataType> sTypes = getSTypes(insn);
2505
2506   Instruction *oldPos = this->bb->getExit();
2507
2508   switch (op) {
2509   case nir_op_fabs:
2510   case nir_op_iabs:
2511   case nir_op_fadd:
2512   case nir_op_iadd:
2513   case nir_op_iand:
2514   case nir_op_fceil:
2515   case nir_op_fcos:
2516   case nir_op_fddx:
2517   case nir_op_fddx_coarse:
2518   case nir_op_fddx_fine:
2519   case nir_op_fddy:
2520   case nir_op_fddy_coarse:
2521   case nir_op_fddy_fine:
2522   case nir_op_fdiv:
2523   case nir_op_idiv:
2524   case nir_op_udiv:
2525   case nir_op_fexp2:
2526   case nir_op_ffloor:
2527   case nir_op_ffma:
2528   case nir_op_flog2:
2529   case nir_op_fmax:
2530   case nir_op_imax:
2531   case nir_op_umax:
2532   case nir_op_fmin:
2533   case nir_op_imin:
2534   case nir_op_umin:
2535   case nir_op_fmod:
2536   case nir_op_imod:
2537   case nir_op_umod:
2538   case nir_op_fmul:
2539   case nir_op_imul:
2540   case nir_op_imul_high:
2541   case nir_op_umul_high:
2542   case nir_op_fneg:
2543   case nir_op_ineg:
2544   case nir_op_inot:
2545   case nir_op_ior:
2546   case nir_op_pack_64_2x32_split:
2547   case nir_op_fpow:
2548   case nir_op_frcp:
2549   case nir_op_frem:
2550   case nir_op_irem:
2551   case nir_op_frsq:
2552   case nir_op_fsat:
2553   case nir_op_ishr:
2554   case nir_op_ushr:
2555   case nir_op_fsin:
2556   case nir_op_fsqrt:
2557   case nir_op_ftrunc:
2558   case nir_op_ishl:
2559   case nir_op_ixor: {
2560      DEFAULT_CHECKS;
2561      LValues &newDefs = convert(&insn->dest);
2562      operation preOp = preOperationNeeded(op);
2563      if (preOp != OP_NOP) {
2564         assert(info.num_inputs < 2);
2565         Value *tmp = getSSA(typeSizeof(dType));
2566         Instruction *i0 = mkOp(preOp, dType, tmp);
2567         Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2568         if (info.num_inputs) {
2569            i0->setSrc(0, getSrc(&insn->src[0]));
2570            i1->setSrc(0, tmp);
2571         }
2572         i1->subOp = getSubOp(op);
2573      } else {
2574         Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2575         for (unsigned s = 0u; s < info.num_inputs; ++s) {
2576            i->setSrc(s, getSrc(&insn->src[s]));
2577
2578            if (this->info->io.mul_zero_wins) {
2579               switch (op) {
2580               case nir_op_fmul:
2581               case nir_op_ffma:
2582                  i->dnz = true;
2583                  break;
2584               default:
2585                  break;
2586               }
2587            }
2588         }
2589         i->subOp = getSubOp(op);
2590      }
2591      break;
2592   }
2593   case nir_op_ifind_msb:
2594   case nir_op_ufind_msb: {
2595      DEFAULT_CHECKS;
2596      LValues &newDefs = convert(&insn->dest);
2597      dType = sTypes[0];
2598      mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2599      break;
2600   }
2601   case nir_op_fround_even: {
2602      DEFAULT_CHECKS;
2603      LValues &newDefs = convert(&insn->dest);
2604      mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2605      break;
2606   }
2607   // convert instructions
2608   case nir_op_f2f32:
2609   case nir_op_f2i32:
2610   case nir_op_f2u32:
2611   case nir_op_i2f32:
2612   case nir_op_i2i32:
2613   case nir_op_u2f32:
2614   case nir_op_u2u32:
2615   case nir_op_f2f64:
2616   case nir_op_f2i64:
2617   case nir_op_f2u64:
2618   case nir_op_i2f64:
2619   case nir_op_i2i64:
2620   case nir_op_u2f64:
2621   case nir_op_u2u64: {
2622      DEFAULT_CHECKS;
2623      LValues &newDefs = convert(&insn->dest);
2624      Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2625      if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2626         i->rnd = ROUND_Z;
2627      i->sType = sTypes[0];
2628      break;
2629   }
2630   // compare instructions
2631   case nir_op_feq32:
2632   case nir_op_ieq32:
2633   case nir_op_fge32:
2634   case nir_op_ige32:
2635   case nir_op_uge32:
2636   case nir_op_flt32:
2637   case nir_op_ilt32:
2638   case nir_op_ult32:
2639   case nir_op_fneu32:
2640   case nir_op_ine32: {
2641      DEFAULT_CHECKS;
2642      LValues &newDefs = convert(&insn->dest);
2643      Instruction *i = mkCmp(getOperation(op),
2644                             getCondCode(op),
2645                             dType,
2646                             newDefs[0],
2647                             dType,
2648                             getSrc(&insn->src[0]),
2649                             getSrc(&insn->src[1]));
2650      if (info.num_inputs == 3)
2651         i->setSrc(2, getSrc(&insn->src[2]));
2652      i->sType = sTypes[0];
2653      break;
2654   }
2655   case nir_op_mov:
2656   case nir_op_vec2:
2657   case nir_op_vec3:
2658   case nir_op_vec4:
2659   case nir_op_vec8:
2660   case nir_op_vec16: {
2661      LValues &newDefs = convert(&insn->dest);
2662      for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2663         mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2664      }
2665      break;
2666   }
2667   // (un)pack
2668   case nir_op_pack_64_2x32: {
2669      LValues &newDefs = convert(&insn->dest);
2670      Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2671      merge->setSrc(0, getSrc(&insn->src[0], 0));
2672      merge->setSrc(1, getSrc(&insn->src[0], 1));
2673      break;
2674   }
2675   case nir_op_pack_half_2x16_split: {
2676      LValues &newDefs = convert(&insn->dest);
2677      Value *tmpH = getSSA();
2678      Value *tmpL = getSSA();
2679
2680      mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2681      mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2682      mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2683      break;
2684   }
2685   case nir_op_unpack_half_2x16_split_x:
2686   case nir_op_unpack_half_2x16_split_y: {
2687      LValues &newDefs = convert(&insn->dest);
2688      Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2689      if (op == nir_op_unpack_half_2x16_split_y)
2690         cvt->subOp = 1;
2691      break;
2692   }
2693   case nir_op_unpack_64_2x32: {
2694      LValues &newDefs = convert(&insn->dest);
2695      mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2696      break;
2697   }
2698   case nir_op_unpack_64_2x32_split_x: {
2699      LValues &newDefs = convert(&insn->dest);
2700      mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2701      break;
2702   }
2703   case nir_op_unpack_64_2x32_split_y: {
2704      LValues &newDefs = convert(&insn->dest);
2705      mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2706      break;
2707   }
2708   // special instructions
2709   case nir_op_fsign:
2710   case nir_op_isign: {
2711      DEFAULT_CHECKS;
2712      DataType iType;
2713      if (::isFloatType(dType))
2714         iType = TYPE_F32;
2715      else
2716         iType = TYPE_S32;
2717
2718      LValues &newDefs = convert(&insn->dest);
2719      LValue *val0 = getScratch();
2720      LValue *val1 = getScratch();
2721      mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2722      mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2723
2724      if (dType == TYPE_F64) {
2725         mkOp2(OP_SUB, iType, val0, val0, val1);
2726         mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2727      } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2728         mkOp2(OP_SUB, iType, val0, val1, val0);
2729         mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2730         mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2731      } else if (::isFloatType(dType))
2732         mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2733      else
2734         mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2735      break;
2736   }
2737   case nir_op_fcsel:
2738   case nir_op_b32csel: {
2739      DEFAULT_CHECKS;
2740      LValues &newDefs = convert(&insn->dest);
2741      mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2742      break;
2743   }
2744   case nir_op_ibitfield_extract:
2745   case nir_op_ubitfield_extract: {
2746      DEFAULT_CHECKS;
2747      Value *tmp = getSSA();
2748      LValues &newDefs = convert(&insn->dest);
2749      mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2750      mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2751      break;
2752   }
2753   case nir_op_bfm: {
2754      DEFAULT_CHECKS;
2755      LValues &newDefs = convert(&insn->dest);
2756      mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
2757      break;
2758   }
2759   case nir_op_bitfield_insert: {
2760      DEFAULT_CHECKS;
2761      LValues &newDefs = convert(&insn->dest);
2762      LValue *temp = getSSA();
2763      mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2764      mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2765      break;
2766   }
2767   case nir_op_bit_count: {
2768      DEFAULT_CHECKS;
2769      LValues &newDefs = convert(&insn->dest);
2770      mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2771      break;
2772   }
2773   case nir_op_bitfield_reverse: {
2774      DEFAULT_CHECKS;
2775      LValues &newDefs = convert(&insn->dest);
2776      mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
2777      break;
2778   }
2779   case nir_op_find_lsb: {
2780      DEFAULT_CHECKS;
2781      LValues &newDefs = convert(&insn->dest);
2782      Value *tmp = getSSA();
2783      mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
2784      mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2785      break;
2786   }
2787   case nir_op_extract_u8: {
2788      DEFAULT_CHECKS;
2789      LValues &newDefs = convert(&insn->dest);
2790      Value *prmt = getSSA();
2791      mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
2792      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2793      break;
2794   }
2795   case nir_op_extract_i8: {
2796      DEFAULT_CHECKS;
2797      LValues &newDefs = convert(&insn->dest);
2798      Value *prmt = getSSA();
2799      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
2800      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2801      break;
2802   }
2803   case nir_op_extract_u16: {
2804      DEFAULT_CHECKS;
2805      LValues &newDefs = convert(&insn->dest);
2806      Value *prmt = getSSA();
2807      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
2808      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2809      break;
2810   }
2811   case nir_op_extract_i16: {
2812      DEFAULT_CHECKS;
2813      LValues &newDefs = convert(&insn->dest);
2814      Value *prmt = getSSA();
2815      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
2816      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2817      break;
2818   }
2819   case nir_op_urol: {
2820      DEFAULT_CHECKS;
2821      LValues &newDefs = convert(&insn->dest);
2822      mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2823            getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2824         ->subOp = NV50_IR_SUBOP_SHF_L |
2825                   NV50_IR_SUBOP_SHF_W |
2826                   NV50_IR_SUBOP_SHF_HI;
2827      break;
2828   }
2829   case nir_op_uror: {
2830      DEFAULT_CHECKS;
2831      LValues &newDefs = convert(&insn->dest);
2832      mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2833            getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2834         ->subOp = NV50_IR_SUBOP_SHF_R |
2835                   NV50_IR_SUBOP_SHF_W |
2836                   NV50_IR_SUBOP_SHF_LO;
2837      break;
2838   }
2839   // boolean conversions
2840   case nir_op_b2f32: {
2841      DEFAULT_CHECKS;
2842      LValues &newDefs = convert(&insn->dest);
2843      mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2844      break;
2845   }
2846   case nir_op_b2f64: {
2847      DEFAULT_CHECKS;
2848      LValues &newDefs = convert(&insn->dest);
2849      Value *tmp = getSSA(4);
2850      mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2851      mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2852      break;
2853   }
2854   case nir_op_f2b32:
2855   case nir_op_i2b32: {
2856      DEFAULT_CHECKS;
2857      LValues &newDefs = convert(&insn->dest);
2858      Value *src1;
2859      if (typeSizeof(sTypes[0]) == 8) {
2860         src1 = loadImm(getSSA(8), 0.0);
2861      } else {
2862         src1 = zero;
2863      }
2864      CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2865      mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2866      break;
2867   }
2868   case nir_op_b2i32: {
2869      DEFAULT_CHECKS;
2870      LValues &newDefs = convert(&insn->dest);
2871      mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2872      break;
2873   }
2874   case nir_op_b2i64: {
2875      DEFAULT_CHECKS;
2876      LValues &newDefs = convert(&insn->dest);
2877      LValue *def = getScratch();
2878      mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2879      mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2880      break;
2881   }
2882   default:
2883      ERROR("unknown nir_op %s\n", info.name);
2884      assert(false);
2885      return false;
2886   }
2887
2888   if (!oldPos) {
2889      oldPos = this->bb->getEntry();
2890      oldPos->precise = insn->exact;
2891   }
2892
2893   if (unlikely(!oldPos))
2894      return true;
2895
2896   while (oldPos->next) {
2897      oldPos = oldPos->next;
2898      oldPos->precise = insn->exact;
2899   }
2900   oldPos->saturate = insn->dest.saturate;
2901
2902   return true;
2903}
2904#undef DEFAULT_CHECKS
2905
2906bool
2907Converter::visit(nir_ssa_undef_instr *insn)
2908{
2909   LValues &newDefs = convert(&insn->def);
2910   for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2911      mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2912   }
2913   return true;
2914}
2915
2916#define CASE_SAMPLER(ty) \
2917   case GLSL_SAMPLER_DIM_ ## ty : \
2918      if (isArray && !isShadow) \
2919         return TEX_TARGET_ ## ty ## _ARRAY; \
2920      else if (!isArray && isShadow) \
2921         return TEX_TARGET_## ty ## _SHADOW; \
2922      else if (isArray && isShadow) \
2923         return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2924      else \
2925         return TEX_TARGET_ ## ty
2926
2927TexTarget
2928Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2929{
2930   switch (dim) {
2931   CASE_SAMPLER(1D);
2932   CASE_SAMPLER(2D);
2933   CASE_SAMPLER(CUBE);
2934   case GLSL_SAMPLER_DIM_3D:
2935      return TEX_TARGET_3D;
2936   case GLSL_SAMPLER_DIM_MS:
2937      if (isArray)
2938         return TEX_TARGET_2D_MS_ARRAY;
2939      return TEX_TARGET_2D_MS;
2940   case GLSL_SAMPLER_DIM_RECT:
2941      if (isShadow)
2942         return TEX_TARGET_RECT_SHADOW;
2943      return TEX_TARGET_RECT;
2944   case GLSL_SAMPLER_DIM_BUF:
2945      return TEX_TARGET_BUFFER;
2946   case GLSL_SAMPLER_DIM_EXTERNAL:
2947      return TEX_TARGET_2D;
2948   default:
2949      ERROR("unknown glsl_sampler_dim %u\n", dim);
2950      assert(false);
2951      return TEX_TARGET_COUNT;
2952   }
2953}
2954#undef CASE_SAMPLER
2955
2956unsigned int
2957Converter::getNIRArgCount(TexInstruction::Target& target)
2958{
2959   unsigned int result = target.getArgCount();
2960   if (target.isCube() && target.isArray())
2961      result--;
2962   if (target.isMS())
2963      result--;
2964   return result;
2965}
2966
2967CacheMode
2968Converter::convert(enum gl_access_qualifier access)
2969{
2970   if (access & ACCESS_VOLATILE)
2971      return CACHE_CV;
2972   if (access & ACCESS_COHERENT)
2973      return CACHE_CG;
2974   return CACHE_CA;
2975}
2976
2977bool
2978Converter::visit(nir_tex_instr *insn)
2979{
2980   switch (insn->op) {
2981   case nir_texop_lod:
2982   case nir_texop_query_levels:
2983   case nir_texop_tex:
2984   case nir_texop_texture_samples:
2985   case nir_texop_tg4:
2986   case nir_texop_txb:
2987   case nir_texop_txd:
2988   case nir_texop_txf:
2989   case nir_texop_txf_ms:
2990   case nir_texop_txl:
2991   case nir_texop_txs: {
2992      LValues &newDefs = convert(&insn->dest);
2993      std::vector<Value*> srcs;
2994      std::vector<Value*> defs;
2995      std::vector<nir_src*> offsets;
2996      uint8_t mask = 0;
2997      bool lz = false;
2998      TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2999      operation op = getOperation(insn->op);
3000
3001      int r, s;
3002      int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3003      int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3004      int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3005      int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3006      int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3007      int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3008      int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3009      int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3010      int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3011      int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3012      int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3013      int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3014
3015      bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3016      assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3017
3018      srcs.resize(insn->coord_components);
3019      for (uint8_t i = 0u; i < insn->coord_components; ++i)
3020         srcs[i] = getSrc(&insn->src[coordsIdx].src, i);
3021
3022      // sometimes we get less args than target.getArgCount, but codegen expects the latter
3023      if (insn->coord_components) {
3024         uint32_t argCount = target.getArgCount();
3025
3026         if (target.isMS())
3027            argCount -= 1;
3028
3029         for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3030            srcs.push_back(getSSA());
3031      }
3032
3033      if (insn->op == nir_texop_texture_samples)
3034         srcs.push_back(zero);
3035      else if (!insn->num_srcs)
3036         srcs.push_back(loadImm(NULL, 0));
3037      if (biasIdx != -1)
3038         srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3039      if (lodIdx != -1)
3040         srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3041      else if (op == OP_TXF)
3042         lz = true;
3043      if (msIdx != -1)
3044         srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3045      if (offsetIdx != -1)
3046         offsets.push_back(&insn->src[offsetIdx].src);
3047      if (compIdx != -1)
3048         srcs.push_back(getSrc(&insn->src[compIdx].src, 0));
3049      if (texOffIdx != -1) {
3050         srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3051         texOffIdx = srcs.size() - 1;
3052      }
3053      if (sampOffIdx != -1) {
3054         srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3055         sampOffIdx = srcs.size() - 1;
3056      }
3057      if (bindless) {
3058         // currently we use the lower bits
3059         Value *split[2];
3060         Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3061
3062         mkSplit(split, 4, handle);
3063
3064         srcs.push_back(split[0]);
3065         texOffIdx = srcs.size() - 1;
3066      }
3067
3068      r = bindless ? 0xff : insn->texture_index;
3069      s = bindless ? 0x1f : insn->sampler_index;
3070      if (op == OP_TXF || op == OP_TXQ)
3071         s = 0;
3072
3073      defs.resize(newDefs.size());
3074      for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3075         defs[d] = newDefs[d];
3076         mask |= 1 << d;
3077      }
3078      if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3079         lz = true;
3080
3081      TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3082      texi->tex.levelZero = lz;
3083      texi->tex.mask = mask;
3084      texi->tex.bindless = bindless;
3085
3086      if (texOffIdx != -1)
3087         texi->tex.rIndirectSrc = texOffIdx;
3088      if (sampOffIdx != -1)
3089         texi->tex.sIndirectSrc = sampOffIdx;
3090
3091      switch (insn->op) {
3092      case nir_texop_tg4:
3093         if (!target.isShadow())
3094            texi->tex.gatherComp = insn->component;
3095         break;
3096      case nir_texop_txs:
3097         texi->tex.query = TXQ_DIMS;
3098         break;
3099      case nir_texop_texture_samples:
3100         texi->tex.mask = 0x4;
3101         texi->tex.query = TXQ_TYPE;
3102         break;
3103      case nir_texop_query_levels:
3104         texi->tex.mask = 0x8;
3105         texi->tex.query = TXQ_DIMS;
3106         break;
3107      default:
3108         break;
3109      }
3110
3111      texi->tex.useOffsets = offsets.size();
3112      if (texi->tex.useOffsets) {
3113         for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3114            for (uint32_t c = 0u; c < 3; ++c) {
3115               uint8_t s2 = std::min(c, target.getDim() - 1);
3116               texi->offset[s][c].set(getSrc(offsets[s], s2));
3117               texi->offset[s][c].setInsn(texi);
3118            }
3119         }
3120      }
3121
3122      if (op == OP_TXG && offsetIdx == -1) {
3123         if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3124            texi->tex.useOffsets = 4;
3125            setPosition(texi, false);
3126            for (uint8_t i = 0; i < 4; ++i) {
3127               for (uint8_t j = 0; j < 2; ++j) {
3128                  texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3129                  texi->offset[i][j].setInsn(texi);
3130               }
3131            }
3132            setPosition(texi, true);
3133         }
3134      }
3135
3136      if (ddxIdx != -1 && ddyIdx != -1) {
3137         for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3138            texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3139            texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3140         }
3141      }
3142
3143      break;
3144   }
3145   default:
3146      ERROR("unknown nir_texop %u\n", insn->op);
3147      return false;
3148   }
3149   return true;
3150}
3151
3152/* nouveau's RA doesn't track the liveness of exported registers in the fragment
3153 * shader, so we need all the store_outputs to appear at the end of the shader
3154 * with no other instructions that might generate a temp value in between them.
3155 */
3156static void
3157nv_nir_move_stores_to_end(nir_shader *s)
3158{
3159   nir_function_impl *impl = nir_shader_get_entrypoint(s);
3160   nir_block *block = nir_impl_last_block(impl);
3161   nir_instr *first_store = NULL;
3162
3163   nir_foreach_instr_safe(instr, block) {
3164      if (instr == first_store)
3165         break;
3166      if (instr->type != nir_instr_type_intrinsic)
3167         continue;
3168      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3169      if (intrin->intrinsic == nir_intrinsic_store_output) {
3170         nir_instr_remove(instr);
3171         nir_instr_insert(nir_after_block(block), instr);
3172
3173         if (!first_store)
3174            first_store = instr;
3175      }
3176   }
3177   nir_metadata_preserve(impl,
3178                         nir_metadata_block_index |
3179                         nir_metadata_dominance);
3180}
3181
3182bool
3183Converter::run()
3184{
3185   bool progress;
3186
3187   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3188      nir_print_shader(nir, stderr);
3189
3190   struct nir_lower_subgroups_options subgroup_options = {};
3191   subgroup_options.subgroup_size = 32;
3192   subgroup_options.ballot_bit_size = 32;
3193   subgroup_options.ballot_components = 1;
3194   subgroup_options.lower_elect = true;
3195
3196   /* prepare for IO lowering */
3197   NIR_PASS_V(nir, nir_opt_deref);
3198   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3199   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3200
3201   /* codegen assumes vec4 alignment for memory */
3202   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, function_temp_type_info);
3203   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset);
3204   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3205
3206   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
3207              type_size, (nir_lower_io_options)0);
3208
3209   NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3210
3211   struct nir_lower_tex_options tex_options = {};
3212   tex_options.lower_txp = ~0;
3213
3214   NIR_PASS_V(nir, nir_lower_tex, &tex_options);
3215
3216   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3217   NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3218   NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
3219
3220   /*TODO: improve this lowering/optimisation loop so that we can use
3221    *      nir_opt_idiv_const effectively before this.
3222    */
3223   nir_lower_idiv_options idiv_options = {
3224      .imprecise_32bit_lowering = false,
3225      .allow_fp16 = true,
3226   };
3227   NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);
3228
3229   do {
3230      progress = false;
3231      NIR_PASS(progress, nir, nir_copy_prop);
3232      NIR_PASS(progress, nir, nir_opt_remove_phis);
3233      NIR_PASS(progress, nir, nir_opt_trivial_continues);
3234      NIR_PASS(progress, nir, nir_opt_cse);
3235      NIR_PASS(progress, nir, nir_opt_algebraic);
3236      NIR_PASS(progress, nir, nir_opt_constant_folding);
3237      NIR_PASS(progress, nir, nir_copy_prop);
3238      NIR_PASS(progress, nir, nir_opt_dce);
3239      NIR_PASS(progress, nir, nir_opt_dead_cf);
3240      NIR_PASS(progress, nir, nir_lower_64bit_phis);
3241   } while (progress);
3242
3243   nir_move_options move_options =
3244      (nir_move_options)(nir_move_const_undef |
3245                         nir_move_load_ubo |
3246                         nir_move_load_uniform |
3247                         nir_move_load_input);
3248   NIR_PASS_V(nir, nir_opt_sink, move_options);
3249   NIR_PASS_V(nir, nir_opt_move, move_options);
3250
3251   if (nir->info.stage == MESA_SHADER_FRAGMENT)
3252      NIR_PASS_V(nir, nv_nir_move_stores_to_end);
3253
3254   NIR_PASS_V(nir, nir_lower_bool_to_int32);
3255   NIR_PASS_V(nir, nir_convert_from_ssa, true);
3256
3257   // Garbage collect dead instructions
3258   nir_sweep(nir);
3259
3260   if (!parseNIR()) {
3261      ERROR("Couldn't prase NIR!\n");
3262      return false;
3263   }
3264
3265   if (!assignSlots()) {
3266      ERROR("Couldn't assign slots!\n");
3267      return false;
3268   }
3269
3270   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3271      nir_print_shader(nir, stderr);
3272
3273   nir_foreach_function(function, nir) {
3274      if (!visit(function))
3275         return false;
3276   }
3277
3278   return true;
3279}
3280
3281} // unnamed namespace
3282
3283namespace nv50_ir {
3284
3285bool
3286Program::makeFromNIR(struct nv50_ir_prog_info *info,
3287                     struct nv50_ir_prog_info_out *info_out)
3288{
3289   nir_shader *nir = (nir_shader*)info->bin.source;
3290   Converter converter(this, nir, info, info_out);
3291   bool result = converter.run();
3292   if (!result)
3293      return result;
3294   LoweringHelper lowering;
3295   lowering.run(this);
3296   tlsSize = info_out->bin.tlsSpace;
3297   return result;
3298}
3299
3300} // namespace nv50_ir
3301
3302static nir_shader_compiler_options
3303nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
3304{
3305   nir_shader_compiler_options op = {};
3306   op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
3307   op.lower_ffma16 = false;
3308   op.lower_ffma32 = false;
3309   op.lower_ffma64 = false;
3310   op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
3311   op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
3312   op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
3313   op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
3314   op.lower_flrp32 = true;
3315   op.lower_flrp64 = true;
3316   op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it
3317   op.lower_fsat = false;
3318   op.lower_fsqrt = false; // TODO: only before gm200
3319   op.lower_sincos = false;
3320   op.lower_fmod = true;
3321   op.lower_bitfield_extract = false;
3322   op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
3323   op.lower_bitfield_insert = false;
3324   op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
3325   op.lower_bitfield_insert_to_bitfield_select = false;
3326   op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET);
3327   op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET);
3328   op.lower_ifind_msb = (chipset < NVISA_GF100_CHIPSET);
3329   op.lower_find_lsb = (chipset < NVISA_GF100_CHIPSET);
3330   op.lower_uadd_carry = true; // TODO
3331   op.lower_usub_borrow = true; // TODO
3332   op.lower_mul_high = false;
3333   op.lower_fneg = false;
3334   op.lower_ineg = false;
3335   op.lower_scmp = true; // TODO: not implemented yet
3336   op.lower_vector_cmp = false;
3337   op.lower_bitops = false;
3338   op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
3339   op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
3340   op.lower_fdph = false;
3341   op.lower_fdot = false;
3342   op.fdot_replicates = false; // TODO
3343   op.lower_ffloor = false; // TODO
3344   op.lower_ffract = true;
3345   op.lower_fceil = false; // TODO
3346   op.lower_ftrunc = false;
3347   op.lower_ldexp = true;
3348   op.lower_pack_half_2x16 = true;
3349   op.lower_pack_unorm_2x16 = true;
3350   op.lower_pack_snorm_2x16 = true;
3351   op.lower_pack_unorm_4x8 = true;
3352   op.lower_pack_snorm_4x8 = true;
3353   op.lower_unpack_half_2x16 = true;
3354   op.lower_unpack_unorm_2x16 = true;
3355   op.lower_unpack_snorm_2x16 = true;
3356   op.lower_unpack_unorm_4x8 = true;
3357   op.lower_unpack_snorm_4x8 = true;
3358   op.lower_pack_split = false;
3359   op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
3360   op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
3361   op.lower_insert_byte = true;
3362   op.lower_insert_word = true;
3363   op.lower_all_io_to_temps = false;
3364   op.lower_all_io_to_elements = false;
3365   op.vertex_id_zero_based = false;
3366   op.lower_base_vertex = false;
3367   op.lower_helper_invocation = false;
3368   op.optimize_sample_mask_in = false;
3369   op.lower_cs_local_index_to_id = true;
3370   op.lower_cs_local_id_to_index = false;
3371   op.lower_device_index_to_zero = false; // TODO
3372   op.lower_wpos_pntc = false; // TODO
3373   op.lower_hadd = true; // TODO
3374   op.lower_uadd_sat = true; // TODO
3375   op.lower_usub_sat = true; // TODO
3376   op.lower_iadd_sat = true; // TODO
3377   op.vectorize_io = false;
3378   op.lower_to_scalar = false;
3379   op.unify_interfaces = false;
3380   op.use_interpolated_input_intrinsics = true;
3381   op.lower_mul_2x32_64 = true; // TODO
3382   op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);
3383   op.has_imul24 = false;
3384   op.intel_vec4 = false;
3385   op.force_indirect_unrolling = (nir_variable_mode) (
3386      ((shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_out : 0) |
3387      /* HW doesn't support indirect addressing of fragment program inputs
3388       * on Volta.  The binary driver generates a function to handle every
3389       * possible indirection, and indirectly calls the function to handle
3390       * this instead.
3391       */
3392      ((chipset >= NVISA_GV100_CHIPSET && shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_in : 0)
3393   );
3394   op.force_indirect_unrolling_sampler = (chipset < NVISA_GF100_CHIPSET),
3395   op.max_unroll_iterations = 32;
3396   op.lower_int64_options = (nir_lower_int64_options) (
3397      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
3398      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
3399      nir_lower_divmod64 |
3400      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
3401      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |
3402      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
3403      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
3404      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
3405      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
3406      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
3407      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
3408      nir_lower_imul_2x32_64 |
3409      ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
3410      nir_lower_ufind_msb64
3411   );
3412   op.lower_doubles_options = (nir_lower_doubles_options) (
3413      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
3414      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
3415      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
3416      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
3417      nir_lower_dmod |
3418      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
3419      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
3420   );
3421   return op;
3422}
3423
3424static const nir_shader_compiler_options g80_nir_shader_compiler_options =
3425nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_TYPES);
3426static const nir_shader_compiler_options g80_fs_nir_shader_compiler_options =
3427nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_FRAGMENT);
3428static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
3429nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_TYPES);
3430static const nir_shader_compiler_options gf100_fs_nir_shader_compiler_options =
3431nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_FRAGMENT);
3432static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
3433nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_TYPES);
3434static const nir_shader_compiler_options gm107_fs_nir_shader_compiler_options =
3435nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_FRAGMENT);
3436static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
3437nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_TYPES);
3438static const nir_shader_compiler_options gv100_fs_nir_shader_compiler_options =
3439nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_FRAGMENT);
3440
3441const nir_shader_compiler_options *
3442nv50_ir_nir_shader_compiler_options(int chipset,  uint8_t shader_type)
3443{
3444   if (chipset >= NVISA_GV100_CHIPSET) {
3445      if (shader_type == PIPE_SHADER_FRAGMENT)
3446         return &gv100_fs_nir_shader_compiler_options;
3447      else
3448         return &gv100_nir_shader_compiler_options;
3449   }
3450
3451   if (chipset >= NVISA_GM107_CHIPSET) {
3452      if (shader_type == PIPE_SHADER_FRAGMENT)
3453         return &gm107_fs_nir_shader_compiler_options;
3454      else
3455         return &gm107_nir_shader_compiler_options;
3456   }
3457
3458   if (chipset >= NVISA_GF100_CHIPSET) {
3459      if (shader_type == PIPE_SHADER_FRAGMENT)
3460         return &gf100_fs_nir_shader_compiler_options;
3461      else
3462         return &gf100_nir_shader_compiler_options;
3463   }
3464
3465   if (shader_type == PIPE_SHADER_FRAGMENT)
3466      return &g80_fs_nir_shader_compiler_options;
3467   else
3468      return &g80_nir_shader_compiler_options;
3469}
3470