1/**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26#include "lp_bld_nir.h"
27#include "lp_bld_arit.h"
28#include "lp_bld_bitarit.h"
29#include "lp_bld_const.h"
30#include "lp_bld_conv.h"
31#include "lp_bld_gather.h"
32#include "lp_bld_logic.h"
33#include "lp_bld_quad.h"
34#include "lp_bld_flow.h"
35#include "lp_bld_intr.h"
36#include "lp_bld_struct.h"
37#include "lp_bld_debug.h"
38#include "lp_bld_printf.h"
39#include "nir_deref.h"
40#include "nir_search_helpers.h"
41
42
43// Doing AOS (and linear) codegen?
44static bool
45is_aos(const struct lp_build_nir_context *bld_base)
46{
47   // AOS is used for vectors of uint8[16]
48   return bld_base->base.type.length == 16 && bld_base->base.type.width == 8;
49}
50
51
52static void
53visit_cf_list(struct lp_build_nir_context *bld_base,
54              struct exec_list *list);
55
56
57static LLVMValueRef
58cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
59          nir_alu_type alu_type, unsigned bit_size)
60{
61   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
62   switch (alu_type) {
63   case nir_type_float:
64      switch (bit_size) {
65      case 16:
66         return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
67      case 32:
68         return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
69      case 64:
70         return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
71      default:
72         assert(0);
73         break;
74      }
75      break;
76   case nir_type_int:
77      switch (bit_size) {
78      case 8:
79         return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
80      case 16:
81         return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
82      case 32:
83         return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
84      case 64:
85         return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
86      default:
87         assert(0);
88         break;
89      }
90      break;
91   case nir_type_uint:
92      switch (bit_size) {
93      case 8:
94         return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
95      case 16:
96         return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
97      case 1:
98      case 32:
99         return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
100      case 64:
101         return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
102      default:
103         assert(0);
104         break;
105      }
106      break;
107   case nir_type_uint32:
108      return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
109   default:
110      return val;
111   }
112   return NULL;
113}
114
115
116static unsigned
117glsl_sampler_to_pipe(int sampler_dim, bool is_array)
118{
119   unsigned pipe_target = PIPE_BUFFER;
120   switch (sampler_dim) {
121   case GLSL_SAMPLER_DIM_1D:
122      pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
123      break;
124   case GLSL_SAMPLER_DIM_2D:
125      pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
126      break;
127   case GLSL_SAMPLER_DIM_SUBPASS:
128   case GLSL_SAMPLER_DIM_SUBPASS_MS:
129      pipe_target = PIPE_TEXTURE_2D_ARRAY;
130      break;
131   case GLSL_SAMPLER_DIM_3D:
132      pipe_target = PIPE_TEXTURE_3D;
133      break;
134   case GLSL_SAMPLER_DIM_MS:
135      pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
136      break;
137   case GLSL_SAMPLER_DIM_CUBE:
138      pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
139      break;
140   case GLSL_SAMPLER_DIM_RECT:
141      pipe_target = PIPE_TEXTURE_RECT;
142      break;
143   case GLSL_SAMPLER_DIM_BUF:
144      pipe_target = PIPE_BUFFER;
145      break;
146   default:
147      break;
148   }
149   return pipe_target;
150}
151
152
153static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
154{
155   return bld_base->ssa_defs[ssa->index];
156}
157
158
159static LLVMValueRef
160get_src(struct lp_build_nir_context *bld_base, nir_src src);
161
162
163static LLVMValueRef
164get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
165{
166   struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
167   LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
168   struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
169   LLVMValueRef indir_src = NULL;
170   if (src.indirect)
171      indir_src = get_src(bld_base, *src.indirect);
172   return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
173}
174
175
176static LLVMValueRef
177get_src(struct lp_build_nir_context *bld_base, nir_src src)
178{
179   if (src.is_ssa)
180      return get_ssa_src(bld_base, src.ssa);
181   else
182      return get_reg_src(bld_base, src.reg);
183}
184
185
186static void
187assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
188{
189   bld_base->ssa_defs[idx] = ptr;
190}
191
192
193static void
194assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
195                LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
196{
197   if ((ssa->num_components == 1 || is_aos(bld_base))) {
198      assign_ssa(bld_base, ssa->index, vals[0]);
199   } else {
200      assign_ssa(bld_base, ssa->index,
201             lp_nir_array_build_gather_values(bld_base->base.gallivm->builder,
202                                              vals, ssa->num_components));
203   }
204}
205
206
207static void
208assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
209           unsigned write_mask,
210           LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
211{
212   struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
213   LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
214   struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
215   LLVMValueRef indir_src = NULL;
216   if (reg->indirect)
217      indir_src = get_src(bld_base, *reg->indirect);
218   bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
219}
220
221
222static void
223assign_dest(struct lp_build_nir_context *bld_base,
224            const nir_dest *dest,
225            LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
226{
227   if (dest->is_ssa)
228      assign_ssa_dest(bld_base, &dest->ssa, vals);
229   else
230      assign_reg(bld_base, &dest->reg, 0, vals);
231}
232
233
234static void
235assign_alu_dest(struct lp_build_nir_context *bld_base,
236                const nir_alu_dest *dest,
237                LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
238{
239   if (dest->dest.is_ssa)
240      assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
241   else
242      assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
243}
244
245
246static LLVMValueRef
247int_to_bool32(struct lp_build_nir_context *bld_base,
248              uint32_t src_bit_size,
249              bool is_unsigned,
250              LLVMValueRef val)
251{
252   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
253   struct lp_build_context *int_bld =
254      get_int_bld(bld_base, is_unsigned, src_bit_size);
255   LLVMValueRef result = lp_build_compare(bld_base->base.gallivm,
256                                          int_bld->type, PIPE_FUNC_NOTEQUAL,
257                                          val, int_bld->zero);
258   if (src_bit_size == 16)
259      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
260   else if (src_bit_size == 64)
261      result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
262   return result;
263}
264
265
266static LLVMValueRef
267flt_to_bool32(struct lp_build_nir_context *bld_base,
268              uint32_t src_bit_size,
269              LLVMValueRef val)
270{
271   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
272   struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
273   LLVMValueRef result =
274      lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
275   if (src_bit_size == 64)
276      result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
277   if (src_bit_size == 16)
278      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
279   return result;
280}
281
282
283static LLVMValueRef
284fcmp32(struct lp_build_nir_context *bld_base,
285       enum pipe_compare_func compare,
286       uint32_t src_bit_size,
287       LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
288{
289   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
290   struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
291   LLVMValueRef result;
292
293   if (compare != PIPE_FUNC_NOTEQUAL)
294      result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
295   else
296      result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
297   if (src_bit_size == 64)
298      result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
299   else if (src_bit_size == 16)
300      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
301   return result;
302}
303
304
305static LLVMValueRef
306icmp32(struct lp_build_nir_context *bld_base,
307       enum pipe_compare_func compare,
308       bool is_unsigned,
309       uint32_t src_bit_size,
310       LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
311{
312   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
313   struct lp_build_context *i_bld =
314      get_int_bld(bld_base, is_unsigned, src_bit_size);
315   LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
316   if (src_bit_size < 32)
317      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
318   else if (src_bit_size == 64)
319      result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
320   return result;
321}
322
323
324/**
325 * Get a source register value for an ALU instruction.
326 * This is where swizzled are handled.  There should be no negation
327 * or absolute value modifiers.
328 * num_components indicates the number of components needed in the
329 * returned array or vector.
330 */
331static LLVMValueRef
332get_alu_src(struct lp_build_nir_context *bld_base,
333            nir_alu_src src,
334            unsigned num_components)
335{
336   struct gallivm_state *gallivm = bld_base->base.gallivm;
337   LLVMBuilderRef builder = gallivm->builder;
338   LLVMValueRef value = get_src(bld_base, src.src);
339   bool need_swizzle = false;
340
341   assert(value);
342
343   if (is_aos(bld_base))
344      return value;
345
346   unsigned src_components = nir_src_num_components(src.src);
347   for (unsigned i = 0; i < num_components; ++i) {
348      assert(src.swizzle[i] < src_components);
349      if (src.swizzle[i] != i)
350         need_swizzle = true;
351   }
352
353   if (need_swizzle || num_components != src_components) {
354      if (src_components > 1 && num_components == 1) {
355         value = LLVMBuildExtractValue(gallivm->builder, value,
356                                       src.swizzle[0], "");
357      } else if (src_components == 1 && num_components > 1) {
358         LLVMValueRef values[] = {value, value, value, value,
359                                  value, value, value, value,
360                                  value, value, value, value,
361                                  value, value, value, value};
362         value = lp_nir_array_build_gather_values(builder, values, num_components);
363      } else {
364         LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
365         for (unsigned i = 0; i < num_components; i++)
366            arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
367         value = arr;
368      }
369   }
370   assert(!src.negate);
371   assert(!src.abs);
372   return value;
373}
374
375
376static LLVMValueRef
377emit_b2f(struct lp_build_nir_context *bld_base,
378         LLVMValueRef src0,
379         unsigned bitsize)
380{
381   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
382   LLVMValueRef result =
383      LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
384                   LLVMBuildBitCast(builder,
385                                    lp_build_const_vec(bld_base->base.gallivm,
386                                                       bld_base->base.type,
387                                                       1.0),
388                                    bld_base->int_bld.vec_type, ""),
389                   "");
390   result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
391   switch (bitsize) {
392   case 16:
393      result = LLVMBuildFPTrunc(builder, result,
394                                bld_base->half_bld.vec_type, "");
395      break;
396   case 32:
397      break;
398   case 64:
399      result = LLVMBuildFPExt(builder, result,
400                              bld_base->dbl_bld.vec_type, "");
401      break;
402   default:
403      unreachable("unsupported bit size.");
404   }
405   return result;
406}
407
408
409static LLVMValueRef
410emit_b2i(struct lp_build_nir_context *bld_base,
411         LLVMValueRef src0,
412         unsigned bitsize)
413{
414   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
415   LLVMValueRef result = LLVMBuildAnd(builder,
416                          cast_type(bld_base, src0, nir_type_int, 32),
417                          lp_build_const_int_vec(bld_base->base.gallivm,
418                                                 bld_base->base.type, 1), "");
419   switch (bitsize) {
420   case 8:
421      return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
422   case 16:
423      return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
424   case 32:
425      return result;
426   case 64:
427      return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
428   default:
429      unreachable("unsupported bit size.");
430   }
431}
432
433
434static LLVMValueRef
435emit_b32csel(struct lp_build_nir_context *bld_base,
436             unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
437             LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
438{
439   LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
440   LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
441   struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
442   return lp_build_select(bld, v, src[1], src[2]);
443}
444
445
446static LLVMValueRef
447split_64bit(struct lp_build_nir_context *bld_base,
448            LLVMValueRef src,
449            bool hi)
450{
451   struct gallivm_state *gallivm = bld_base->base.gallivm;
452   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
453   LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
454   int len = bld_base->base.type.length * 2;
455   for (unsigned i = 0; i < bld_base->base.type.length; i++) {
456#if UTIL_ARCH_LITTLE_ENDIAN
457      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
458      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
459#else
460      shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
461      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
462#endif
463   }
464
465   src = LLVMBuildBitCast(gallivm->builder, src,
466           LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
467   return LLVMBuildShuffleVector(gallivm->builder, src,
468                                 LLVMGetUndef(LLVMTypeOf(src)),
469                                 LLVMConstVector(hi ? shuffles2 : shuffles,
470                                                 bld_base->base.type.length),
471                                 "");
472}
473
474
475static LLVMValueRef
476merge_64bit(struct lp_build_nir_context *bld_base,
477            LLVMValueRef input,
478            LLVMValueRef input2)
479{
480   struct gallivm_state *gallivm = bld_base->base.gallivm;
481   LLVMBuilderRef builder = gallivm->builder;
482   int i;
483   LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
484   int len = bld_base->base.type.length * 2;
485   assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
486
487   for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
488#if UTIL_ARCH_LITTLE_ENDIAN
489      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
490      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
491#else
492      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
493      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
494#endif
495   }
496   return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
497}
498
499
500static LLVMValueRef
501split_16bit(struct lp_build_nir_context *bld_base,
502            LLVMValueRef src,
503            bool hi)
504{
505   struct gallivm_state *gallivm = bld_base->base.gallivm;
506   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
507   LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
508   int len = bld_base->base.type.length * 2;
509   for (unsigned i = 0; i < bld_base->base.type.length; i++) {
510#if UTIL_ARCH_LITTLE_ENDIAN
511      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
512      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
513#else
514      shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
515      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
516#endif
517   }
518
519   src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
520   return LLVMBuildShuffleVector(gallivm->builder, src,
521                                 LLVMGetUndef(LLVMTypeOf(src)),
522                                 LLVMConstVector(hi ? shuffles2 : shuffles,
523                                                 bld_base->base.type.length),
524                                 "");
525}
526
527
528static LLVMValueRef
529merge_16bit(struct lp_build_nir_context *bld_base,
530            LLVMValueRef input,
531            LLVMValueRef input2)
532{
533   struct gallivm_state *gallivm = bld_base->base.gallivm;
534   LLVMBuilderRef builder = gallivm->builder;
535   int i;
536   LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
537   int len = bld_base->int16_bld.type.length * 2;
538   assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
539
540   for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
541#if UTIL_ARCH_LITTLE_ENDIAN
542      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
543      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
544#else
545      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
546      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
547#endif
548   }
549   return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
550}
551
552
553static LLVMValueRef
554get_signed_divisor(struct gallivm_state *gallivm,
555                   struct lp_build_context *int_bld,
556                   struct lp_build_context *mask_bld,
557                   int src_bit_size,
558                   LLVMValueRef src, LLVMValueRef divisor)
559{
560   LLVMBuilderRef builder = gallivm->builder;
561   /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
562      and divisor is -1. */
563   /* set mask if numerator == INT_MIN */
564   long long min_val;
565   switch (src_bit_size) {
566   case 8:
567      min_val = INT8_MIN;
568      break;
569   case 16:
570      min_val = INT16_MIN;
571      break;
572   default:
573   case 32:
574      min_val = INT_MIN;
575      break;
576   case 64:
577      min_val = INT64_MIN;
578      break;
579   }
580   LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
581                                         lp_build_const_int_vec(gallivm, int_bld->type, min_val));
582   /* set another mask if divisor is - 1 */
583   LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
584                                         lp_build_const_int_vec(gallivm, int_bld->type, -1));
585   div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
586
587   divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
588   return divisor;
589}
590
591
592static LLVMValueRef
593do_int_divide(struct lp_build_nir_context *bld_base,
594              bool is_unsigned, unsigned src_bit_size,
595              LLVMValueRef src, LLVMValueRef src2)
596{
597   struct gallivm_state *gallivm = bld_base->base.gallivm;
598   LLVMBuilderRef builder = gallivm->builder;
599   struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
600   struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
601
602   /* avoid divide by 0. Converted divisor from 0 to -1 */
603   LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
604                                        mask_bld->zero);
605
606   LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
607   if (!is_unsigned) {
608      divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
609                                   src_bit_size, src, divisor);
610   }
611   LLVMValueRef result = lp_build_div(int_bld, src, divisor);
612
613   if (!is_unsigned) {
614      LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
615      return LLVMBuildAnd(builder, not_div_mask, result, "");
616   } else
617      /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
618       * may as well do same for idiv */
619      return LLVMBuildOr(builder, div_mask, result, "");
620}
621
622
623static LLVMValueRef
624do_int_mod(struct lp_build_nir_context *bld_base,
625           bool is_unsigned, unsigned src_bit_size,
626           LLVMValueRef src, LLVMValueRef src2)
627{
628   struct gallivm_state *gallivm = bld_base->base.gallivm;
629   LLVMBuilderRef builder = gallivm->builder;
630   struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
631   struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
632   LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
633                                        mask_bld->zero);
634   LLVMValueRef divisor = LLVMBuildOr(builder,
635                                      div_mask,
636                                      src2, "");
637   if (!is_unsigned) {
638      divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
639                                   src_bit_size, src, divisor);
640   }
641   LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
642   return LLVMBuildOr(builder, div_mask, result, "");
643}
644
645
646static LLVMValueRef
647do_quantize_to_f16(struct lp_build_nir_context *bld_base,
648                   LLVMValueRef src)
649{
650   struct gallivm_state *gallivm = bld_base->base.gallivm;
651   LLVMBuilderRef builder = gallivm->builder;
652   LLVMValueRef result, cond, cond2, temp;
653
654   result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
655   result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
656
657   temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
658   cond = LLVMBuildFCmp(builder, LLVMRealOGT,
659                        LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
660                        temp, "");
661   cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
662   cond = LLVMBuildAnd(builder, cond, cond2, "");
663   result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
664   return result;
665}
666
667
668static LLVMValueRef
669do_alu_action(struct lp_build_nir_context *bld_base,
670              const nir_alu_instr *instr,
671              unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
672              LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
673{
674   struct gallivm_state *gallivm = bld_base->base.gallivm;
675   LLVMBuilderRef builder = gallivm->builder;
676   LLVMValueRef result;
677
678   switch (instr->op) {
679   case nir_op_b2f16:
680      result = emit_b2f(bld_base, src[0], 16);
681      break;
682   case nir_op_b2f32:
683      result = emit_b2f(bld_base, src[0], 32);
684      break;
685   case nir_op_b2f64:
686      result = emit_b2f(bld_base, src[0], 64);
687      break;
688   case nir_op_b2i8:
689      result = emit_b2i(bld_base, src[0], 8);
690      break;
691   case nir_op_b2i16:
692      result = emit_b2i(bld_base, src[0], 16);
693      break;
694   case nir_op_b2i32:
695      result = emit_b2i(bld_base, src[0], 32);
696      break;
697   case nir_op_b2i64:
698      result = emit_b2i(bld_base, src[0], 64);
699      break;
700   case nir_op_b32csel:
701      result = emit_b32csel(bld_base, src_bit_size, src);
702      break;
703   case nir_op_bit_count:
704      result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
705      if (src_bit_size[0] < 32)
706         result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
707      else if (src_bit_size[0] > 32)
708         result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
709      break;
710   case nir_op_bitfield_select:
711      result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
712      break;
713   case nir_op_bitfield_reverse:
714      result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
715      break;
716   case nir_op_f2b32:
717      result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
718      break;
719   case nir_op_f2f16:
720      if (src_bit_size[0] == 64)
721         src[0] = LLVMBuildFPTrunc(builder, src[0],
722                                   bld_base->base.vec_type, "");
723      result = LLVMBuildFPTrunc(builder, src[0],
724                                bld_base->half_bld.vec_type, "");
725      break;
726   case nir_op_f2f32:
727      if (src_bit_size[0] < 32)
728         result = LLVMBuildFPExt(builder, src[0],
729                                 bld_base->base.vec_type, "");
730      else
731         result = LLVMBuildFPTrunc(builder, src[0],
732                                   bld_base->base.vec_type, "");
733      break;
734   case nir_op_f2f64:
735      result = LLVMBuildFPExt(builder, src[0],
736                              bld_base->dbl_bld.vec_type, "");
737      break;
738   case nir_op_f2i8:
739      result = LLVMBuildFPToSI(builder,
740                               src[0],
741                               bld_base->uint8_bld.vec_type, "");
742      break;
743   case nir_op_f2i16:
744      result = LLVMBuildFPToSI(builder,
745                               src[0],
746                               bld_base->uint16_bld.vec_type, "");
747      break;
748   case nir_op_f2i32:
749      result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
750      break;
751   case nir_op_f2u8:
752      result = LLVMBuildFPToUI(builder,
753                               src[0],
754                               bld_base->uint8_bld.vec_type, "");
755      break;
756   case nir_op_f2u16:
757      result = LLVMBuildFPToUI(builder,
758                               src[0],
759                               bld_base->uint16_bld.vec_type, "");
760      break;
761   case nir_op_f2u32:
762      result = LLVMBuildFPToUI(builder,
763                               src[0],
764                               bld_base->base.int_vec_type, "");
765      break;
766   case nir_op_f2i64:
767      result = LLVMBuildFPToSI(builder,
768                               src[0],
769                               bld_base->int64_bld.vec_type, "");
770      break;
771   case nir_op_f2u64:
772      result = LLVMBuildFPToUI(builder,
773                               src[0],
774                               bld_base->uint64_bld.vec_type, "");
775      break;
776   case nir_op_fabs:
777      result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
778      break;
779   case nir_op_fadd:
780      result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
781                            src[0], src[1]);
782      break;
783   case nir_op_fceil:
784      result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
785      break;
786   case nir_op_fcos:
787      result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
788      break;
789   case nir_op_fddx:
790   case nir_op_fddx_coarse:
791   case nir_op_fddx_fine:
792      result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
793      break;
794   case nir_op_fddy:
795   case nir_op_fddy_coarse:
796   case nir_op_fddy_fine:
797      result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
798      break;
799   case nir_op_fdiv:
800      result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
801                            src[0], src[1]);
802      break;
803   case nir_op_feq32:
804      result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
805      break;
806   case nir_op_fexp2:
807      result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
808      break;
809   case nir_op_ffloor:
810      result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
811      break;
812   case nir_op_ffma:
813      result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
814      break;
815   case nir_op_ffract: {
816      struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
817      LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
818      result = lp_build_sub(flt_bld, src[0], tmp);
819      break;
820   }
821   case nir_op_fge:
822   case nir_op_fge32:
823      result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
824      break;
825   case nir_op_find_lsb: {
826      struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
827      result = lp_build_cttz(int_bld, src[0]);
828      if (src_bit_size[0] < 32)
829         result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
830      else if (src_bit_size[0] > 32)
831         result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
832      break;
833   }
834   case nir_op_fisfinite32:
835      unreachable("Should have been lowered in nir_opt_algebraic_late.");
836   case nir_op_flog2:
837      result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
838      break;
839   case nir_op_flt:
840   case nir_op_flt32:
841      result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
842      break;
843   case nir_op_fmax:
844   case nir_op_fmin: {
845      enum gallivm_nan_behavior minmax_nan;
846      int first = 0;
847
848      /* If one of the sources is known to be a number (i.e., not NaN), then
849       * better code can be generated by passing that information along.
850       */
851      if (is_a_number(bld_base->range_ht, instr, 1,
852                      0 /* unused num_components */,
853                      NULL /* unused swizzle */)) {
854         minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
855      } else if (is_a_number(bld_base->range_ht, instr, 0,
856                             0 /* unused num_components */,
857                             NULL /* unused swizzle */)) {
858         first = 1;
859         minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
860      } else {
861         minmax_nan = GALLIVM_NAN_RETURN_OTHER;
862      }
863
864      if (instr->op == nir_op_fmin) {
865         result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
866                                   src[first], src[1 - first], minmax_nan);
867      } else {
868         result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
869                                   src[first], src[1 - first], minmax_nan);
870      }
871      break;
872   }
873   case nir_op_fmod: {
874      struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
875      result = lp_build_div(flt_bld, src[0], src[1]);
876      result = lp_build_floor(flt_bld, result);
877      result = lp_build_mul(flt_bld, src[1], result);
878      result = lp_build_sub(flt_bld, src[0], result);
879      break;
880   }
881   case nir_op_fmul:
882      result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
883                            src[0], src[1]);
884      break;
885   case nir_op_fneu32:
886      result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
887      break;
888   case nir_op_fneg:
889      result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
890      break;
891   case nir_op_fpow:
892      result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
893      break;
894   case nir_op_fquantize2f16:
895      result = do_quantize_to_f16(bld_base, src[0]);
896      break;
897   case nir_op_frcp:
898      result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
899      break;
900   case nir_op_fround_even:
901      if (src_bit_size[0] == 16) {
902         struct lp_build_context *bld = get_flt_bld(bld_base, 16);
903         char intrinsic[64];
904         lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
905         result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
906      } else {
907         result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
908      }
909      break;
910   case nir_op_frsq:
911      result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
912      break;
913   case nir_op_fsat:
914      result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
915      break;
916   case nir_op_fsign:
917      result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
918      break;
919   case nir_op_fsin:
920      result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
921      break;
922   case nir_op_fsqrt:
923      result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
924      break;
925   case nir_op_ftrunc:
926      result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
927      break;
928   case nir_op_i2b32:
929      result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
930      break;
931   case nir_op_i2f16:
932      result = LLVMBuildSIToFP(builder, src[0],
933                               bld_base->half_bld.vec_type, "");
934      break;
935   case nir_op_i2f32:
936      result = lp_build_int_to_float(&bld_base->base, src[0]);
937      break;
938   case nir_op_i2f64:
939      result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
940      break;
941   case nir_op_i2i8:
942      result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
943      break;
944   case nir_op_i2i16:
945      if (src_bit_size[0] < 16)
946         result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
947      else
948         result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
949      break;
950   case nir_op_i2i32:
951      if (src_bit_size[0] < 32)
952         result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
953      else
954         result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
955      break;
956   case nir_op_i2i64:
957      result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
958      break;
959   case nir_op_iabs:
960      result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
961      break;
962   case nir_op_iadd:
963      result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
964                            src[0], src[1]);
965      break;
966   case nir_op_iand:
967      result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
968                            src[0], src[1]);
969      break;
970   case nir_op_idiv:
971      result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
972      break;
973   case nir_op_ieq32:
974      result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
975      break;
976   case nir_op_ige32:
977      result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
978      break;
979   case nir_op_ilt32:
980      result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
981      break;
982   case nir_op_imax:
983      result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
984      break;
985   case nir_op_imin:
986      result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
987      break;
988   case nir_op_imul:
989   case nir_op_imul24:
990      result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
991                            src[0], src[1]);
992      break;
993   case nir_op_imul_high: {
994      LLVMValueRef hi_bits;
995      lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
996      result = hi_bits;
997      break;
998   }
999   case nir_op_ine32:
1000      result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
1001      break;
1002   case nir_op_ineg:
1003      result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1004      break;
1005   case nir_op_inot:
1006      result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1007      break;
1008   case nir_op_ior:
1009      result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
1010                           src[0], src[1]);
1011      break;
1012   case nir_op_imod:
1013   case nir_op_irem:
1014      result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
1015      break;
1016   case nir_op_ishl: {
1017      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1018      struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
1019      if (src_bit_size[0] == 64)
1020         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1021      if (src_bit_size[0] < 32)
1022         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1023      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1024      result = lp_build_shl(int_bld, src[0], src[1]);
1025      break;
1026   }
1027   case nir_op_ishr: {
1028      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1029      struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
1030      if (src_bit_size[0] == 64)
1031         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1032      if (src_bit_size[0] < 32)
1033         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1034      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1035      result = lp_build_shr(int_bld, src[0], src[1]);
1036      break;
1037   }
1038   case nir_op_isign:
1039      result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
1040      break;
1041   case nir_op_isub:
1042      result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
1043                            src[0], src[1]);
1044      break;
1045   case nir_op_ixor:
1046      result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
1047                            src[0], src[1]);
1048      break;
1049   case nir_op_mov:
1050      result = src[0];
1051      break;
1052   case nir_op_unpack_64_2x32_split_x:
1053      result = split_64bit(bld_base, src[0], false);
1054      break;
1055   case nir_op_unpack_64_2x32_split_y:
1056      result = split_64bit(bld_base, src[0], true);
1057      break;
1058
1059   case nir_op_pack_32_2x16_split: {
1060      LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
1061      result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
1062      break;
1063   }
1064   case nir_op_unpack_32_2x16_split_x:
1065      result = split_16bit(bld_base, src[0], false);
1066      break;
1067   case nir_op_unpack_32_2x16_split_y:
1068      result = split_16bit(bld_base, src[0], true);
1069      break;
1070   case nir_op_pack_64_2x32_split: {
1071      LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
1072      result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
1073      break;
1074   }
1075   case nir_op_pack_32_4x8_split: {
1076      LLVMValueRef tmp1 = merge_16bit(bld_base, src[0], src[1]);
1077      LLVMValueRef tmp2 = merge_16bit(bld_base, src[2], src[3]);
1078      tmp1 = LLVMBuildBitCast(builder, tmp1, bld_base->uint16_bld.vec_type, "");
1079      tmp2 = LLVMBuildBitCast(builder, tmp2, bld_base->uint16_bld.vec_type, "");
1080      LLVMValueRef tmp = merge_16bit(bld_base, tmp1, tmp2);
1081      result = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.vec_type, "");
1082      break;
1083   }
1084   case nir_op_u2f16:
1085      result = LLVMBuildUIToFP(builder, src[0],
1086                               bld_base->half_bld.vec_type, "");
1087      break;
1088   case nir_op_u2f32:
1089      result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
1090      break;
1091   case nir_op_u2f64:
1092      result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
1093      break;
1094   case nir_op_u2u8:
1095      result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
1096      break;
1097   case nir_op_u2u16:
1098      if (src_bit_size[0] < 16)
1099         result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
1100      else
1101         result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
1102      break;
1103   case nir_op_u2u32:
1104      if (src_bit_size[0] < 32)
1105         result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
1106      else
1107         result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
1108      break;
1109   case nir_op_u2u64:
1110      result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
1111      break;
1112   case nir_op_udiv:
1113      result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
1114      break;
1115   case nir_op_ufind_msb: {
1116      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1117      result = lp_build_ctlz(uint_bld, src[0]);
1118      result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
1119      if (src_bit_size[0] < 32)
1120         result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
1121      else
1122         result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
1123      break;
1124   }
1125   case nir_op_uge32:
1126      result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
1127      break;
1128   case nir_op_ult32:
1129      result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
1130      break;
1131   case nir_op_umax:
1132      result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1133      break;
1134   case nir_op_umin:
1135      result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1136      break;
1137   case nir_op_umod:
1138      result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
1139      break;
1140   case nir_op_umul_high: {
1141      LLVMValueRef hi_bits;
1142      lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
1143      result = hi_bits;
1144      break;
1145   }
1146   case nir_op_ushr: {
1147      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1148      if (src_bit_size[0] == 64)
1149         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1150      if (src_bit_size[0] < 32)
1151         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1152      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1153      result = lp_build_shr(uint_bld, src[0], src[1]);
1154      break;
1155   }
1156   case nir_op_bcsel: {
1157      LLVMTypeRef src1_type = LLVMTypeOf(src[1]);
1158      LLVMTypeRef src2_type = LLVMTypeOf(src[2]);
1159
1160      if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
1161          LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
1162         src[2] = LLVMBuildIntToPtr(builder, src[2], src1_type, "");
1163      } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
1164                 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
1165         src[1] = LLVMBuildIntToPtr(builder, src[1], src2_type, "");
1166      }
1167
1168      for (int i = 1; i <= 2; i++) {
1169         LLVMTypeRef type = LLVMTypeOf(src[i]);
1170         if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
1171            break;
1172         src[i] = LLVMBuildBitCast(builder, src[i], get_int_bld(bld_base, true, src_bit_size[i])->vec_type, "");
1173      }
1174      return LLVMBuildSelect(builder, src[0], src[1], src[2], "");
1175   }
1176   default:
1177      assert(0);
1178      break;
1179   }
1180   return result;
1181}
1182
1183
1184static void
1185visit_alu(struct lp_build_nir_context *bld_base,
1186          const nir_alu_instr *instr)
1187{
1188   struct gallivm_state *gallivm = bld_base->base.gallivm;
1189   LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
1190   unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
1191   const unsigned num_components = nir_dest_num_components(instr->dest.dest);
1192   unsigned src_components;
1193
1194   switch (instr->op) {
1195   case nir_op_vec2:
1196   case nir_op_vec3:
1197   case nir_op_vec4:
1198   case nir_op_vec8:
1199   case nir_op_vec16:
1200      src_components = 1;
1201      break;
1202   case nir_op_pack_half_2x16:
1203      src_components = 2;
1204      break;
1205   case nir_op_unpack_half_2x16:
1206      src_components = 1;
1207      break;
1208   case nir_op_cube_face_coord_amd:
1209   case nir_op_cube_face_index_amd:
1210      src_components = 3;
1211      break;
1212   case nir_op_fsum2:
1213   case nir_op_fsum3:
1214   case nir_op_fsum4:
1215      src_components = nir_op_infos[instr->op].input_sizes[0];
1216      break;
1217   default:
1218      src_components = num_components;
1219      break;
1220   }
1221
1222   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1223      src[i] = get_alu_src(bld_base, instr->src[i], src_components);
1224      src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
1225   }
1226
1227   if (instr->op == nir_op_mov &&
1228       is_aos(bld_base) &&
1229       !instr->dest.dest.is_ssa) {
1230      for (unsigned i = 0; i < 4; i++) {
1231         if (instr->dest.write_mask & (1 << i)) {
1232            assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src);
1233         }
1234      }
1235      return;
1236   }
1237
1238   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1239   if (instr->op == nir_op_vec4 ||
1240       instr->op == nir_op_vec3 ||
1241       instr->op == nir_op_vec2 ||
1242       instr->op == nir_op_vec8 ||
1243       instr->op == nir_op_vec16) {
1244      for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1245         result[i] = cast_type(bld_base, src[i],
1246                               nir_op_infos[instr->op].input_types[i],
1247                               src_bit_size[i]);
1248      }
1249   } else if (instr->op == nir_op_fsum4 ||
1250              instr->op == nir_op_fsum3 ||
1251              instr->op == nir_op_fsum2) {
1252      for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
1253         LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
1254                                                          src[0], c, "");
1255         temp_chan = cast_type(bld_base, temp_chan,
1256                               nir_op_infos[instr->op].input_types[0],
1257                               src_bit_size[0]);
1258         result[0] = (c == 0) ? temp_chan
1259            : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
1260                           result[0], temp_chan);
1261      }
1262   } else if (is_aos(bld_base)) {
1263      if (instr->op == nir_op_fmul) {
1264         if (LLVMIsConstant(src[0])) {
1265            src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
1266         }
1267         if (LLVMIsConstant(src[1])) {
1268            src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
1269         }
1270      }
1271      result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
1272   } else {
1273      /* Loop for R,G,B,A channels */
1274      for (unsigned c = 0; c < num_components; c++) {
1275         LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
1276
1277         /* Loop over instruction operands */
1278         for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1279            if (num_components > 1) {
1280               src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
1281                                                     src[i], c, "");
1282            } else {
1283               src_chan[i] = src[i];
1284            }
1285            src_chan[i] = cast_type(bld_base, src_chan[i],
1286                                    nir_op_infos[instr->op].input_types[i],
1287                                    src_bit_size[i]);
1288         }
1289         result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
1290         result[c] = cast_type(bld_base, result[c],
1291                               nir_op_infos[instr->op].output_type,
1292                               nir_dest_bit_size(instr->dest.dest));
1293      }
1294   }
1295   assign_alu_dest(bld_base, &instr->dest, result);
1296}
1297
1298
1299static void
1300visit_load_const(struct lp_build_nir_context *bld_base,
1301                 const nir_load_const_instr *instr)
1302{
1303   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1304   bld_base->load_const(bld_base, instr, result);
1305   assign_ssa_dest(bld_base, &instr->def, result);
1306}
1307
1308
1309static void
1310get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1311                 bool vs_in, unsigned *vertex_index_out,
1312                 LLVMValueRef *vertex_index_ref,
1313                 unsigned *const_out, LLVMValueRef *indir_out)
1314{
1315   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1316   nir_variable *var = nir_deref_instr_get_variable(instr);
1317   nir_deref_path path;
1318   unsigned idx_lvl = 1;
1319
1320   nir_deref_path_init(&path, instr, NULL);
1321
1322   if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1323      if (vertex_index_ref) {
1324         *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1325         if (vertex_index_out)
1326            *vertex_index_out = 0;
1327      } else {
1328         *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1329      }
1330      ++idx_lvl;
1331   }
1332
1333   uint32_t const_offset = 0;
1334   LLVMValueRef offset = NULL;
1335
1336   if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1337      assert(instr->deref_type == nir_deref_type_array);
1338      const_offset = nir_src_as_uint(instr->arr.index);
1339      goto out;
1340   }
1341
1342   for (; path.path[idx_lvl]; ++idx_lvl) {
1343      const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1344      if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1345         unsigned index = path.path[idx_lvl]->strct.index;
1346
1347         for (unsigned i = 0; i < index; i++) {
1348            const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1349            const_offset += glsl_count_attribute_slots(ft, vs_in);
1350         }
1351      } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1352         unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1353         if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1354           const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1355         } else {
1356           LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1357           idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1358           LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1359                                               idx_src);
1360           if (offset)
1361             offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1362           else
1363             offset = array_off;
1364         }
1365      } else
1366         unreachable("Uhandled deref type in get_deref_instr_offset");
1367   }
1368
1369out:
1370   nir_deref_path_finish(&path);
1371
1372   if (const_offset && offset)
1373      offset = LLVMBuildAdd(builder, offset,
1374                            lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1375                            "");
1376   *const_out = const_offset;
1377   *indir_out = offset;
1378}
1379
1380
1381static void
1382visit_load_input(struct lp_build_nir_context *bld_base,
1383                 nir_intrinsic_instr *instr,
1384                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1385{
1386   nir_variable var = {0};
1387   var.data.location = nir_intrinsic_io_semantics(instr).location;
1388   var.data.driver_location = nir_intrinsic_base(instr);
1389   var.data.location_frac = nir_intrinsic_component(instr);
1390
1391   unsigned nc = nir_dest_num_components(instr->dest);
1392   unsigned bit_size = nir_dest_bit_size(instr->dest);
1393
1394   nir_src offset = *nir_get_io_offset_src(instr);
1395   bool indirect = !nir_src_is_const(offset);
1396   if (!indirect)
1397      assert(nir_src_as_uint(offset) == 0);
1398   LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1399
1400   bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1401}
1402
1403
1404static void
1405visit_store_output(struct lp_build_nir_context *bld_base,
1406                   nir_intrinsic_instr *instr)
1407{
1408   nir_variable var = {0};
1409   var.data.location = nir_intrinsic_io_semantics(instr).location;
1410   var.data.driver_location = nir_intrinsic_base(instr);
1411   var.data.location_frac = nir_intrinsic_component(instr);
1412
1413   unsigned mask = nir_intrinsic_write_mask(instr);
1414
1415   unsigned bit_size = nir_src_bit_size(instr->src[0]);
1416   LLVMValueRef src = get_src(bld_base, instr->src[0]);
1417
1418   nir_src offset = *nir_get_io_offset_src(instr);
1419   bool indirect = !nir_src_is_const(offset);
1420   if (!indirect)
1421      assert(nir_src_as_uint(offset) == 0);
1422   LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1423
1424   if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1425      src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1426                                  src, 0, "");
1427   }
1428
1429   bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1430                       bit_size, &var, mask, NULL, 0, indir_index, src);
1431}
1432
1433
1434static void
1435visit_load_var(struct lp_build_nir_context *bld_base,
1436               nir_intrinsic_instr *instr,
1437               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1438{
1439   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1440   nir_variable *var = nir_deref_instr_get_variable(deref);
1441   assert(util_bitcount(deref->modes) == 1);
1442   nir_variable_mode mode = deref->modes;
1443   unsigned const_index;
1444   LLVMValueRef indir_index;
1445   LLVMValueRef indir_vertex_index = NULL;
1446   unsigned vertex_index = 0;
1447   unsigned nc = nir_dest_num_components(instr->dest);
1448   unsigned bit_size = nir_dest_bit_size(instr->dest);
1449   if (var) {
1450      bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1451         var->data.mode == nir_var_shader_in;
1452      bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1453         var->data.mode == nir_var_shader_in;
1454      bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1455         var->data.mode == nir_var_shader_in;
1456      bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1457         var->data.mode == nir_var_shader_out && !var->data.patch;
1458      bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1459         var->data.mode == nir_var_shader_in && !var->data.patch;
1460
1461      mode = var->data.mode;
1462
1463      get_deref_offset(bld_base, deref, vs_in,
1464                   gs_in ? &vertex_index : NULL,
1465                   (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1466                   &const_index, &indir_index);
1467   }
1468   bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index,
1469                      indir_vertex_index, const_index, indir_index, result);
1470}
1471
1472
1473static void
1474visit_store_var(struct lp_build_nir_context *bld_base,
1475                nir_intrinsic_instr *instr)
1476{
1477   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1478   nir_variable *var = nir_deref_instr_get_variable(deref);
1479   assert(util_bitcount(deref->modes) == 1);
1480   nir_variable_mode mode = deref->modes;
1481   int writemask = instr->const_index[0];
1482   unsigned bit_size = nir_src_bit_size(instr->src[1]);
1483   LLVMValueRef src = get_src(bld_base, instr->src[1]);
1484   unsigned const_index = 0;
1485   LLVMValueRef indir_index, indir_vertex_index = NULL;
1486   if (var) {
1487      bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1488         var->data.mode == nir_var_shader_out && !var->data.patch;
1489      get_deref_offset(bld_base, deref, false, NULL,
1490                       tcs_out ? &indir_vertex_index : NULL,
1491                       &const_index, &indir_index);
1492   }
1493   bld_base->store_var(bld_base, mode, instr->num_components, bit_size,
1494                       var, writemask, indir_vertex_index, const_index,
1495                       indir_index, src);
1496}
1497
1498
1499static void
1500visit_load_ubo(struct lp_build_nir_context *bld_base,
1501               nir_intrinsic_instr *instr,
1502               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1503{
1504   struct gallivm_state *gallivm = bld_base->base.gallivm;
1505   LLVMBuilderRef builder = gallivm->builder;
1506   LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1507   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1508
1509   bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
1510   idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1511   bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest),
1512                      nir_dest_bit_size(instr->dest),
1513                      offset_is_uniform, idx, offset, result);
1514}
1515
1516
1517static void
1518visit_load_push_constant(struct lp_build_nir_context *bld_base,
1519                         nir_intrinsic_instr *instr,
1520                         LLVMValueRef result[4])
1521{
1522   struct gallivm_state *gallivm = bld_base->base.gallivm;
1523   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1524   LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1525   bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1526
1527   bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest),
1528                      nir_dest_bit_size(instr->dest),
1529                      offset_is_uniform, idx, offset, result);
1530}
1531
1532
1533static void
1534visit_load_ssbo(struct lp_build_nir_context *bld_base,
1535                nir_intrinsic_instr *instr,
1536                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1537{
1538   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1539   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1540   bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[0]) && nir_src_is_always_uniform(instr->src[1]);
1541   bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1542                      index_and_offset_are_uniform, idx, offset, result);
1543}
1544
1545
1546static void
1547visit_store_ssbo(struct lp_build_nir_context *bld_base,
1548                 nir_intrinsic_instr *instr)
1549{
1550   LLVMValueRef val = get_src(bld_base, instr->src[0]);
1551   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
1552   LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1553   bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[1]) && nir_src_is_always_uniform(instr->src[2]);
1554   int writemask = instr->const_index[0];
1555   int nc = nir_src_num_components(instr->src[0]);
1556   int bitsize = nir_src_bit_size(instr->src[0]);
1557   bld_base->store_mem(bld_base, writemask, nc, bitsize, index_and_offset_are_uniform, idx, offset, val);
1558}
1559
1560
1561static void
1562visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1563                    nir_intrinsic_instr *instr,
1564                    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1565{
1566   LLVMValueRef idx = cast_type(bld_base,
1567                                get_src(bld_base, instr->src[0]),
1568                                nir_type_uint, 32);
1569   result[0] = bld_base->get_ssbo_size(bld_base, idx);
1570}
1571
1572
1573static void
1574visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1575                  nir_intrinsic_instr *instr,
1576                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1577{
1578   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]),
1579                                nir_type_uint, 32);
1580   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1581   LLVMValueRef val = get_src(bld_base, instr->src[2]);
1582   LLVMValueRef val2 = NULL;
1583   int bitsize = nir_src_bit_size(instr->src[2]);
1584   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1585      val2 = get_src(bld_base, instr->src[3]);
1586
1587   bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx,
1588                        offset, val, val2, &result[0]);
1589}
1590
1591
1592static void
1593visit_load_image(struct lp_build_nir_context *bld_base,
1594                 nir_intrinsic_instr *instr,
1595                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1596{
1597   struct gallivm_state *gallivm = bld_base->base.gallivm;
1598   LLVMBuilderRef builder = gallivm->builder;
1599   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1600   nir_variable *var = nir_deref_instr_get_variable(deref);
1601   LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1602   LLVMValueRef coords[5];
1603   struct lp_img_params params;
1604   const struct glsl_type *type = glsl_without_array(var->type);
1605   unsigned const_index;
1606   LLVMValueRef indir_index;
1607   get_deref_offset(bld_base, deref, false, NULL, NULL,
1608                    &const_index, &indir_index);
1609
1610   memset(&params, 0, sizeof(params));
1611   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1612                                        glsl_sampler_type_is_array(type));
1613   for (unsigned i = 0; i < 4; i++)
1614      coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1615   if (params.target == PIPE_TEXTURE_1D_ARRAY)
1616      coords[2] = coords[1];
1617
1618   params.coords = coords;
1619   params.outdata = result;
1620   params.img_op = LP_IMG_LOAD;
1621   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ||
1622       glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS) {
1623      params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]),
1624                                  nir_type_uint, 32);
1625   }
1626   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1627   params.image_index_offset = indir_index;
1628   bld_base->image_op(bld_base, &params);
1629}
1630
1631
1632static void
1633visit_store_image(struct lp_build_nir_context *bld_base,
1634                  nir_intrinsic_instr *instr)
1635{
1636   struct gallivm_state *gallivm = bld_base->base.gallivm;
1637   LLVMBuilderRef builder = gallivm->builder;
1638   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1639   nir_variable *var = nir_deref_instr_get_variable(deref);
1640   LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1641   LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1642   LLVMValueRef coords[5];
1643   struct lp_img_params params;
1644   const struct glsl_type *type = glsl_without_array(var->type);
1645   unsigned const_index;
1646   LLVMValueRef indir_index;
1647   get_deref_offset(bld_base, deref, false, NULL, NULL,
1648                    &const_index, &indir_index);
1649
1650   memset(&params, 0, sizeof(params));
1651   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1652   for (unsigned i = 0; i < 4; i++)
1653      coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1654   if (params.target == PIPE_TEXTURE_1D_ARRAY)
1655      coords[2] = coords[1];
1656   params.coords = coords;
1657
1658   for (unsigned i = 0; i < 4; i++) {
1659      params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1660      params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1661   }
1662   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1663      params.ms_index = get_src(bld_base, instr->src[2]);
1664   params.img_op = LP_IMG_STORE;
1665   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1666   params.image_index_offset = indir_index;
1667
1668   if (params.target == PIPE_TEXTURE_1D_ARRAY)
1669      coords[2] = coords[1];
1670   bld_base->image_op(bld_base, &params);
1671}
1672
1673
1674static void
1675visit_atomic_image(struct lp_build_nir_context *bld_base,
1676                   nir_intrinsic_instr *instr,
1677                   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1678{
1679   struct gallivm_state *gallivm = bld_base->base.gallivm;
1680   LLVMBuilderRef builder = gallivm->builder;
1681   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1682   nir_variable *var = nir_deref_instr_get_variable(deref);
1683   struct lp_img_params params;
1684   LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1685   LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1686   LLVMValueRef coords[5];
1687   const struct glsl_type *type = glsl_without_array(var->type);
1688   unsigned const_index;
1689   LLVMValueRef indir_index;
1690   get_deref_offset(bld_base, deref, false, NULL, NULL,
1691                    &const_index, &indir_index);
1692
1693   memset(&params, 0, sizeof(params));
1694
1695   switch (instr->intrinsic) {
1696   case nir_intrinsic_image_deref_atomic_add:
1697      params.op = LLVMAtomicRMWBinOpAdd;
1698      break;
1699   case nir_intrinsic_image_deref_atomic_exchange:
1700      params.op = LLVMAtomicRMWBinOpXchg;
1701      break;
1702   case nir_intrinsic_image_deref_atomic_and:
1703      params.op = LLVMAtomicRMWBinOpAnd;
1704      break;
1705   case nir_intrinsic_image_deref_atomic_or:
1706      params.op = LLVMAtomicRMWBinOpOr;
1707      break;
1708   case nir_intrinsic_image_deref_atomic_xor:
1709      params.op = LLVMAtomicRMWBinOpXor;
1710      break;
1711   case nir_intrinsic_image_deref_atomic_umin:
1712      params.op = LLVMAtomicRMWBinOpUMin;
1713      break;
1714   case nir_intrinsic_image_deref_atomic_umax:
1715      params.op = LLVMAtomicRMWBinOpUMax;
1716      break;
1717   case nir_intrinsic_image_deref_atomic_imin:
1718      params.op = LLVMAtomicRMWBinOpMin;
1719      break;
1720   case nir_intrinsic_image_deref_atomic_imax:
1721      params.op = LLVMAtomicRMWBinOpMax;
1722      break;
1723   default:
1724      break;
1725   }
1726
1727   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1728                                        glsl_sampler_type_is_array(type));
1729   for (unsigned i = 0; i < 4; i++) {
1730      coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1731   }
1732   if (params.target == PIPE_TEXTURE_1D_ARRAY) {
1733      coords[2] = coords[1];
1734   }
1735
1736   params.coords = coords;
1737
1738   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) {
1739      params.ms_index = get_src(bld_base, instr->src[2]);
1740   }
1741   if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1742      LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1743      params.indata[0] = in_val;
1744      params.indata2[0] = cas_val;
1745   } else {
1746      params.indata[0] = in_val;
1747   }
1748
1749   params.outdata = result;
1750   params.img_op =
1751      (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap)
1752      ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1753   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1754   params.image_index_offset = indir_index;
1755
1756   bld_base->image_op(bld_base, &params);
1757}
1758
1759
1760static void
1761visit_image_size(struct lp_build_nir_context *bld_base,
1762                 nir_intrinsic_instr *instr,
1763                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1764{
1765   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1766   nir_variable *var = nir_deref_instr_get_variable(deref);
1767   struct lp_sampler_size_query_params params = { 0 };
1768   unsigned const_index;
1769   LLVMValueRef indir_index;
1770   const struct glsl_type *type = glsl_without_array(var->type);
1771   get_deref_offset(bld_base, deref, false, NULL, NULL,
1772                    &const_index, &indir_index);
1773   params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1774   params.texture_unit_offset = indir_index;
1775   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1776                                        glsl_sampler_type_is_array(type));
1777   params.sizes_out = result;
1778
1779   bld_base->image_size(bld_base, &params);
1780}
1781
1782
1783static void
1784visit_image_samples(struct lp_build_nir_context *bld_base,
1785                    nir_intrinsic_instr *instr,
1786                    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1787{
1788   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1789   nir_variable *var = nir_deref_instr_get_variable(deref);
1790   struct lp_sampler_size_query_params params = { 0 };
1791   unsigned const_index;
1792   LLVMValueRef indir_index;
1793   const struct glsl_type *type = glsl_without_array(var->type);
1794   get_deref_offset(bld_base, deref, false, NULL, NULL,
1795                    &const_index, &indir_index);
1796
1797   params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1798   params.texture_unit_offset = indir_index;
1799   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type),
1800                                        glsl_sampler_type_is_array(type));
1801   params.sizes_out = result;
1802   params.samples_only = true;
1803
1804   bld_base->image_size(bld_base, &params);
1805}
1806
1807static void
1808visit_shared_load(struct lp_build_nir_context *bld_base,
1809                  nir_intrinsic_instr *instr,
1810                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1811{
1812   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1813   bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1814   bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1815                      offset_is_uniform, NULL, offset, result);
1816}
1817
1818
1819static void
1820visit_shared_store(struct lp_build_nir_context *bld_base,
1821                   nir_intrinsic_instr *instr)
1822{
1823   LLVMValueRef val = get_src(bld_base, instr->src[0]);
1824   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1825   bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
1826   int writemask = instr->const_index[1];
1827   int nc = nir_src_num_components(instr->src[0]);
1828   int bitsize = nir_src_bit_size(instr->src[0]);
1829   bld_base->store_mem(bld_base, writemask, nc, bitsize, offset_is_uniform, NULL, offset, val);
1830}
1831
1832
1833static void
1834visit_shared_atomic(struct lp_build_nir_context *bld_base,
1835                    nir_intrinsic_instr *instr,
1836                    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1837{
1838   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1839   LLVMValueRef val = get_src(bld_base, instr->src[1]);
1840   LLVMValueRef val2 = NULL;
1841   int bitsize = nir_src_bit_size(instr->src[1]);
1842   if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1843      val2 = get_src(bld_base, instr->src[2]);
1844
1845   bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]);
1846}
1847
1848
1849static void
1850visit_barrier(struct lp_build_nir_context *bld_base)
1851{
1852   bld_base->barrier(bld_base);
1853}
1854
1855
1856static void
1857visit_discard(struct lp_build_nir_context *bld_base,
1858              nir_intrinsic_instr *instr)
1859{
1860   LLVMValueRef cond = NULL;
1861   if (instr->intrinsic == nir_intrinsic_discard_if) {
1862      cond = get_src(bld_base, instr->src[0]);
1863      cond = cast_type(bld_base, cond, nir_type_int, 32);
1864   }
1865   bld_base->discard(bld_base, cond);
1866}
1867
1868
1869static void
1870visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1871                        nir_intrinsic_instr *instr,
1872                        LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1873{
1874   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1875
1876   bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1877   bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest),
1878                             nir_dest_bit_size(instr->dest),
1879                             nir_src_bit_size(instr->src[0]),
1880                             offset_is_uniform, offset, result);
1881}
1882
1883
1884static void
1885visit_load_global(struct lp_build_nir_context *bld_base,
1886                  nir_intrinsic_instr *instr,
1887                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1888{
1889   LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1890   bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1891   bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1892                         nir_src_bit_size(instr->src[0]),
1893                         offset_is_uniform, addr, result);
1894}
1895
1896
1897static void
1898visit_store_global(struct lp_build_nir_context *bld_base,
1899                   nir_intrinsic_instr *instr)
1900{
1901   LLVMValueRef val = get_src(bld_base, instr->src[0]);
1902   int nc = nir_src_num_components(instr->src[0]);
1903   int bitsize = nir_src_bit_size(instr->src[0]);
1904   LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1905   int addr_bitsize = nir_src_bit_size(instr->src[1]);
1906   int writemask = instr->const_index[0];
1907   bld_base->store_global(bld_base, writemask, nc, bitsize,
1908                          addr_bitsize, addr, val);
1909}
1910
1911
1912static void
1913visit_global_atomic(struct lp_build_nir_context *bld_base,
1914                    nir_intrinsic_instr *instr,
1915                    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1916{
1917   LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1918   LLVMValueRef val = get_src(bld_base, instr->src[1]);
1919   LLVMValueRef val2 = NULL;
1920   int addr_bitsize = nir_src_bit_size(instr->src[0]);
1921   int val_bitsize = nir_src_bit_size(instr->src[1]);
1922   if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1923      val2 = get_src(bld_base, instr->src[2]);
1924
1925   bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
1926                           val_bitsize, addr, val, val2, &result[0]);
1927}
1928
1929#if LLVM_VERSION_MAJOR >= 10
1930static void visit_shuffle(struct lp_build_nir_context *bld_base,
1931                          nir_intrinsic_instr *instr,
1932                          LLVMValueRef dst[4])
1933{
1934   LLVMValueRef src = get_src(bld_base, instr->src[0]);
1935   src = cast_type(bld_base, src, nir_type_int, nir_src_bit_size(instr->src[0]));
1936   LLVMValueRef index = get_src(bld_base, instr->src[1]);
1937   index = cast_type(bld_base, index, nir_type_uint, nir_src_bit_size(instr->src[1]));
1938
1939   bld_base->shuffle(bld_base, src, index, instr, dst);
1940}
1941#endif
1942
1943
1944static void
1945visit_interp(struct lp_build_nir_context *bld_base,
1946             nir_intrinsic_instr *instr,
1947             LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1948{
1949   struct gallivm_state *gallivm = bld_base->base.gallivm;
1950   LLVMBuilderRef builder = gallivm->builder;
1951   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1952   unsigned num_components = nir_dest_num_components(instr->dest);
1953   nir_variable *var = nir_deref_instr_get_variable(deref);
1954   unsigned const_index;
1955   LLVMValueRef indir_index;
1956   LLVMValueRef offsets[2] = { NULL, NULL };
1957   get_deref_offset(bld_base, deref, false, NULL, NULL,
1958                    &const_index, &indir_index);
1959   bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1960   bool sample = false;
1961   if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1962      for (unsigned i = 0; i < 2; i++) {
1963         offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1964         offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1965      }
1966   } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1967      offsets[0] = get_src(bld_base, instr->src[1]);
1968      offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1969      sample = true;
1970   }
1971   bld_base->interp_at(bld_base, num_components, var, centroid, sample,
1972                       const_index, indir_index, offsets, result);
1973}
1974
1975
1976static void
1977visit_load_scratch(struct lp_build_nir_context *bld_base,
1978                   nir_intrinsic_instr *instr,
1979                   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1980{
1981   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1982
1983   bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1984                          nir_dest_bit_size(instr->dest), offset, result);
1985}
1986
1987
1988static void
1989visit_store_scratch(struct lp_build_nir_context *bld_base,
1990                    nir_intrinsic_instr *instr)
1991{
1992   LLVMValueRef val = get_src(bld_base, instr->src[0]);
1993   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1994   int writemask = instr->const_index[2];
1995   int nc = nir_src_num_components(instr->src[0]);
1996   int bitsize = nir_src_bit_size(instr->src[0]);
1997   bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1998}
1999
2000
2001static void
2002visit_intrinsic(struct lp_build_nir_context *bld_base,
2003                nir_intrinsic_instr *instr)
2004{
2005   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
2006   switch (instr->intrinsic) {
2007   case nir_intrinsic_load_input:
2008      visit_load_input(bld_base, instr, result);
2009      break;
2010   case nir_intrinsic_store_output:
2011      visit_store_output(bld_base, instr);
2012      break;
2013   case nir_intrinsic_load_deref:
2014      visit_load_var(bld_base, instr, result);
2015      break;
2016   case nir_intrinsic_store_deref:
2017      visit_store_var(bld_base, instr);
2018      break;
2019   case nir_intrinsic_load_ubo:
2020      visit_load_ubo(bld_base, instr, result);
2021      break;
2022   case nir_intrinsic_load_push_constant:
2023      visit_load_push_constant(bld_base, instr, result);
2024      break;
2025   case nir_intrinsic_load_ssbo:
2026      visit_load_ssbo(bld_base, instr, result);
2027      break;
2028   case nir_intrinsic_store_ssbo:
2029      visit_store_ssbo(bld_base, instr);
2030      break;
2031   case nir_intrinsic_get_ssbo_size:
2032      visit_get_ssbo_size(bld_base, instr, result);
2033      break;
2034   case nir_intrinsic_load_vertex_id:
2035   case nir_intrinsic_load_primitive_id:
2036   case nir_intrinsic_load_instance_id:
2037   case nir_intrinsic_load_base_instance:
2038   case nir_intrinsic_load_base_vertex:
2039   case nir_intrinsic_load_first_vertex:
2040   case nir_intrinsic_load_workgroup_id:
2041   case nir_intrinsic_load_local_invocation_id:
2042   case nir_intrinsic_load_local_invocation_index:
2043   case nir_intrinsic_load_num_workgroups:
2044   case nir_intrinsic_load_invocation_id:
2045   case nir_intrinsic_load_front_face:
2046   case nir_intrinsic_load_draw_id:
2047   case nir_intrinsic_load_workgroup_size:
2048   case nir_intrinsic_load_work_dim:
2049   case nir_intrinsic_load_tess_coord:
2050   case nir_intrinsic_load_tess_level_outer:
2051   case nir_intrinsic_load_tess_level_inner:
2052   case nir_intrinsic_load_patch_vertices_in:
2053   case nir_intrinsic_load_sample_id:
2054   case nir_intrinsic_load_sample_pos:
2055   case nir_intrinsic_load_sample_mask_in:
2056   case nir_intrinsic_load_view_index:
2057   case nir_intrinsic_load_subgroup_invocation:
2058   case nir_intrinsic_load_subgroup_id:
2059   case nir_intrinsic_load_num_subgroups:
2060      bld_base->sysval_intrin(bld_base, instr, result);
2061      break;
2062   case nir_intrinsic_load_helper_invocation:
2063      bld_base->helper_invocation(bld_base, &result[0]);
2064      break;
2065   case nir_intrinsic_discard_if:
2066   case nir_intrinsic_discard:
2067      visit_discard(bld_base, instr);
2068      break;
2069   case nir_intrinsic_emit_vertex:
2070      bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
2071      break;
2072   case nir_intrinsic_end_primitive:
2073      bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
2074      break;
2075   case nir_intrinsic_ssbo_atomic_add:
2076   case nir_intrinsic_ssbo_atomic_imin:
2077   case nir_intrinsic_ssbo_atomic_imax:
2078   case nir_intrinsic_ssbo_atomic_umin:
2079   case nir_intrinsic_ssbo_atomic_umax:
2080   case nir_intrinsic_ssbo_atomic_and:
2081   case nir_intrinsic_ssbo_atomic_or:
2082   case nir_intrinsic_ssbo_atomic_xor:
2083   case nir_intrinsic_ssbo_atomic_exchange:
2084   case nir_intrinsic_ssbo_atomic_comp_swap:
2085      visit_ssbo_atomic(bld_base, instr, result);
2086      break;
2087   case nir_intrinsic_image_deref_load:
2088      visit_load_image(bld_base, instr, result);
2089      break;
2090   case nir_intrinsic_image_deref_store:
2091      visit_store_image(bld_base, instr);
2092      break;
2093   case nir_intrinsic_image_deref_atomic_add:
2094   case nir_intrinsic_image_deref_atomic_imin:
2095   case nir_intrinsic_image_deref_atomic_imax:
2096   case nir_intrinsic_image_deref_atomic_umin:
2097   case nir_intrinsic_image_deref_atomic_umax:
2098   case nir_intrinsic_image_deref_atomic_and:
2099   case nir_intrinsic_image_deref_atomic_or:
2100   case nir_intrinsic_image_deref_atomic_xor:
2101   case nir_intrinsic_image_deref_atomic_exchange:
2102   case nir_intrinsic_image_deref_atomic_comp_swap:
2103      visit_atomic_image(bld_base, instr, result);
2104      break;
2105   case nir_intrinsic_image_deref_size:
2106      visit_image_size(bld_base, instr, result);
2107      break;
2108   case nir_intrinsic_image_deref_samples:
2109      visit_image_samples(bld_base, instr, result);
2110      break;
2111   case nir_intrinsic_load_shared:
2112      visit_shared_load(bld_base, instr, result);
2113      break;
2114   case nir_intrinsic_store_shared:
2115      visit_shared_store(bld_base, instr);
2116      break;
2117   case nir_intrinsic_shared_atomic_add:
2118   case nir_intrinsic_shared_atomic_imin:
2119   case nir_intrinsic_shared_atomic_umin:
2120   case nir_intrinsic_shared_atomic_imax:
2121   case nir_intrinsic_shared_atomic_umax:
2122   case nir_intrinsic_shared_atomic_and:
2123   case nir_intrinsic_shared_atomic_or:
2124   case nir_intrinsic_shared_atomic_xor:
2125   case nir_intrinsic_shared_atomic_exchange:
2126   case nir_intrinsic_shared_atomic_comp_swap:
2127      visit_shared_atomic(bld_base, instr, result);
2128      break;
2129   case nir_intrinsic_control_barrier:
2130   case nir_intrinsic_scoped_barrier:
2131      visit_barrier(bld_base);
2132      break;
2133   case nir_intrinsic_group_memory_barrier:
2134   case nir_intrinsic_memory_barrier:
2135   case nir_intrinsic_memory_barrier_shared:
2136   case nir_intrinsic_memory_barrier_buffer:
2137   case nir_intrinsic_memory_barrier_image:
2138   case nir_intrinsic_memory_barrier_tcs_patch:
2139      break;
2140   case nir_intrinsic_load_kernel_input:
2141      visit_load_kernel_input(bld_base, instr, result);
2142     break;
2143   case nir_intrinsic_load_global:
2144   case nir_intrinsic_load_global_constant:
2145      visit_load_global(bld_base, instr, result);
2146      break;
2147   case nir_intrinsic_store_global:
2148      visit_store_global(bld_base, instr);
2149      break;
2150   case nir_intrinsic_global_atomic_add:
2151   case nir_intrinsic_global_atomic_imin:
2152   case nir_intrinsic_global_atomic_umin:
2153   case nir_intrinsic_global_atomic_imax:
2154   case nir_intrinsic_global_atomic_umax:
2155   case nir_intrinsic_global_atomic_and:
2156   case nir_intrinsic_global_atomic_or:
2157   case nir_intrinsic_global_atomic_xor:
2158   case nir_intrinsic_global_atomic_exchange:
2159   case nir_intrinsic_global_atomic_comp_swap:
2160      visit_global_atomic(bld_base, instr, result);
2161      break;
2162   case nir_intrinsic_vote_all:
2163   case nir_intrinsic_vote_any:
2164   case nir_intrinsic_vote_ieq:
2165   case nir_intrinsic_vote_feq:
2166      bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
2167      break;
2168   case nir_intrinsic_elect:
2169      bld_base->elect(bld_base, result);
2170      break;
2171   case nir_intrinsic_reduce:
2172   case nir_intrinsic_inclusive_scan:
2173   case nir_intrinsic_exclusive_scan:
2174      bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
2175      break;
2176   case nir_intrinsic_ballot:
2177      bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
2178      break;
2179#if LLVM_VERSION_MAJOR >= 10
2180   case nir_intrinsic_shuffle:
2181      visit_shuffle(bld_base, instr, result);
2182      break;
2183#endif
2184   case nir_intrinsic_read_invocation:
2185   case nir_intrinsic_read_first_invocation: {
2186      LLVMValueRef src1 = NULL;
2187      LLVMValueRef src0 = get_src(bld_base, instr->src[0]);
2188      if (instr->intrinsic == nir_intrinsic_read_invocation) {
2189         src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32);
2190         src0 = cast_type(bld_base, src0, nir_type_int, nir_src_bit_size(instr->src[0]));
2191      }
2192      bld_base->read_invocation(bld_base, src0, nir_src_bit_size(instr->src[0]), src1, result);
2193      break;
2194   }
2195   case nir_intrinsic_interp_deref_at_offset:
2196   case nir_intrinsic_interp_deref_at_centroid:
2197   case nir_intrinsic_interp_deref_at_sample:
2198      visit_interp(bld_base, instr, result);
2199      break;
2200   case nir_intrinsic_load_scratch:
2201      visit_load_scratch(bld_base, instr, result);
2202      break;
2203   case nir_intrinsic_store_scratch:
2204      visit_store_scratch(bld_base, instr);
2205      break;
2206   default:
2207      fprintf(stderr, "Unsupported intrinsic: ");
2208      nir_print_instr(&instr->instr, stderr);
2209      fprintf(stderr, "\n");
2210      assert(0);
2211      break;
2212   }
2213   if (result[0]) {
2214      assign_dest(bld_base, &instr->dest, result);
2215   }
2216}
2217
2218
2219static void
2220visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
2221{
2222   struct lp_sampler_size_query_params params = { 0 };
2223   LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
2224   LLVMValueRef explicit_lod = NULL;
2225   LLVMValueRef texture_unit_offset = NULL;
2226   for (unsigned i = 0; i < instr->num_srcs; i++) {
2227      switch (instr->src[i].src_type) {
2228      case nir_tex_src_lod:
2229         explicit_lod = cast_type(bld_base,
2230                                  get_src(bld_base, instr->src[i].src),
2231                                  nir_type_int, 32);
2232         break;
2233      case nir_tex_src_texture_offset:
2234         texture_unit_offset = get_src(bld_base, instr->src[i].src);
2235         break;
2236      default:
2237         break;
2238      }
2239   }
2240
2241   params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
2242   params.texture_unit = instr->texture_index;
2243   params.explicit_lod = explicit_lod;
2244   params.is_sviewinfo = TRUE;
2245   params.sizes_out = sizes_out;
2246   params.samples_only = (instr->op == nir_texop_texture_samples);
2247   params.texture_unit_offset = texture_unit_offset;
2248
2249   if (instr->op == nir_texop_query_levels)
2250      params.explicit_lod = bld_base->uint_bld.zero;
2251   bld_base->tex_size(bld_base, &params);
2252   assign_dest(bld_base, &instr->dest,
2253               &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
2254}
2255
2256
2257static enum lp_sampler_lod_property
2258lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
2259                          nir_src lod_src)
2260{
2261   enum lp_sampler_lod_property lod_property;
2262
2263   if (nir_src_is_always_uniform(lod_src)) {
2264      lod_property = LP_SAMPLER_LOD_SCALAR;
2265   } else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2266      if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2267         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2268      else
2269         lod_property = LP_SAMPLER_LOD_PER_QUAD;
2270   } else {
2271      lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2272   }
2273   return lod_property;
2274}
2275
2276
2277static void
2278visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
2279{
2280   struct gallivm_state *gallivm = bld_base->base.gallivm;
2281   LLVMBuilderRef builder = gallivm->builder;
2282   LLVMValueRef coords[5];
2283   LLVMValueRef offsets[3] = { NULL };
2284   LLVMValueRef explicit_lod = NULL, ms_index = NULL;
2285   struct lp_sampler_params params;
2286   struct lp_derivatives derivs;
2287   unsigned sample_key = 0;
2288   nir_deref_instr *texture_deref_instr = NULL;
2289   nir_deref_instr *sampler_deref_instr = NULL;
2290   LLVMValueRef texture_unit_offset = NULL;
2291   LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
2292   unsigned lod_src = 0;
2293   LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
2294   unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components;
2295   memset(&params, 0, sizeof(params));
2296   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2297
2298   if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
2299      visit_txs(bld_base, instr);
2300      return;
2301   }
2302   if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2303      sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2304   else if (instr->op == nir_texop_tg4) {
2305      sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
2306      sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
2307   } else if (instr->op == nir_texop_lod)
2308      sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
2309   for (unsigned i = 0; i < instr->num_srcs; i++) {
2310      switch (instr->src[i].src_type) {
2311      case nir_tex_src_coord: {
2312         LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
2313         if (coord_vals == 1)
2314            coords[0] = coord;
2315         else {
2316            for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2317               coords[chan] = LLVMBuildExtractValue(builder, coord,
2318                                                    chan, "");
2319         }
2320         for (unsigned chan = coord_vals; chan < 5; chan++)
2321            coords[chan] = coord_undef;
2322
2323         break;
2324      }
2325      case nir_tex_src_texture_deref:
2326         texture_deref_instr = nir_src_as_deref(instr->src[i].src);
2327         break;
2328      case nir_tex_src_sampler_deref:
2329         sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
2330         break;
2331      case nir_tex_src_comparator:
2332         sample_key |= LP_SAMPLER_SHADOW;
2333         coords[4] = get_src(bld_base, instr->src[i].src);
2334         coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
2335         break;
2336      case nir_tex_src_bias:
2337         sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2338         lod_src = i;
2339         explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2340         break;
2341      case nir_tex_src_lod:
2342         sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2343         lod_src = i;
2344         if (instr->op == nir_texop_txf)
2345            explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2346         else
2347            explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2348         break;
2349      case nir_tex_src_ddx: {
2350         int deriv_cnt = instr->coord_components;
2351         if (instr->is_array)
2352            deriv_cnt--;
2353         LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2354         if (deriv_cnt == 1)
2355            derivs.ddx[0] = deriv_val;
2356         else
2357            for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2358               derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
2359                                                        chan, "");
2360         for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2361            derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
2362         break;
2363      }
2364      case nir_tex_src_ddy: {
2365         int deriv_cnt = instr->coord_components;
2366         if (instr->is_array)
2367            deriv_cnt--;
2368         LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2369         if (deriv_cnt == 1)
2370            derivs.ddy[0] = deriv_val;
2371         else
2372            for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2373               derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
2374                                                        chan, "");
2375         for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2376            derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
2377         break;
2378      }
2379      case nir_tex_src_offset: {
2380         int offset_cnt = instr->coord_components;
2381         if (instr->is_array)
2382            offset_cnt--;
2383         LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
2384         sample_key |= LP_SAMPLER_OFFSETS;
2385         if (offset_cnt == 1)
2386            offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
2387         else {
2388            for (unsigned chan = 0; chan < offset_cnt; ++chan) {
2389               offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
2390                                                     chan, "");
2391               offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
2392            }
2393         }
2394         break;
2395      }
2396      case nir_tex_src_ms_index:
2397         sample_key |= LP_SAMPLER_FETCH_MS;
2398         ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2399         break;
2400
2401      case nir_tex_src_texture_offset:
2402         texture_unit_offset = get_src(bld_base, instr->src[i].src);
2403         break;
2404      case nir_tex_src_sampler_offset:
2405         break;
2406      default:
2407         assert(0);
2408         break;
2409      }
2410   }
2411   if (!sampler_deref_instr)
2412      sampler_deref_instr = texture_deref_instr;
2413
2414   if (explicit_lod)
2415      lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
2416
2417   if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
2418       instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
2419      for (unsigned chan = 0; chan < coord_vals; ++chan)
2420         coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
2421   else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2422      for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2423         coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
2424
2425   if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
2426      /* move layer coord for 1d arrays. */
2427      coords[2] = coords[1];
2428      coords[1] = coord_undef;
2429   }
2430
2431   uint32_t samp_base_index = 0, tex_base_index = 0;
2432   if (!sampler_deref_instr) {
2433      int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
2434      if (samp_src_index == -1) {
2435         samp_base_index = instr->sampler_index;
2436      }
2437   }
2438   if (!texture_deref_instr) {
2439      int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
2440      if (tex_src_index == -1) {
2441         tex_base_index = instr->texture_index;
2442      }
2443   }
2444
2445   if (instr->op == nir_texop_txd) {
2446      sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2447      params.derivs = &derivs;
2448      if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2449         if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2450            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2451         else
2452            lod_property = LP_SAMPLER_LOD_PER_QUAD;
2453      } else
2454         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2455   }
2456
2457   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2458   params.sample_key = sample_key;
2459   params.offsets = offsets;
2460   params.texture_index = tex_base_index;
2461   params.texture_index_offset = texture_unit_offset;
2462   params.sampler_index = samp_base_index;
2463   params.coords = coords;
2464   params.texel = texel;
2465   params.lod = explicit_lod;
2466   params.ms_index = ms_index;
2467   params.aniso_filter_table = bld_base->aniso_filter_table;
2468   bld_base->tex(bld_base, &params);
2469
2470   if (nir_dest_bit_size(instr->dest) != 32) {
2471      assert(nir_dest_bit_size(instr->dest) == 16);
2472      LLVMTypeRef vec_type = NULL;
2473      bool is_float = false;
2474      switch (nir_alu_type_get_base_type(instr->dest_type)) {
2475      case nir_type_float:
2476         is_float = true;
2477         break;
2478      case nir_type_int:
2479         vec_type = bld_base->int16_bld.vec_type;
2480         break;
2481      case nir_type_uint:
2482         vec_type = bld_base->uint16_bld.vec_type;
2483         break;
2484      default:
2485         unreachable("unexpected alu type");
2486      }
2487      for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
2488         if (is_float) {
2489            texel[i] = lp_build_float_to_half(gallivm, texel[i]);
2490         } else {
2491            texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
2492            texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
2493         }
2494      }
2495   }
2496
2497   assign_dest(bld_base, &instr->dest, texel);
2498}
2499
2500
2501static void
2502visit_ssa_undef(struct lp_build_nir_context *bld_base,
2503                const nir_ssa_undef_instr *instr)
2504{
2505   unsigned num_components = instr->def.num_components;
2506   LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2507   struct lp_build_context *undef_bld = get_int_bld(bld_base, true,
2508                                                    instr->def.bit_size);
2509   for (unsigned i = 0; i < num_components; i++)
2510      undef[i] = LLVMGetUndef(undef_bld->vec_type);
2511   memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
2512   assign_ssa_dest(bld_base, &instr->def, undef);
2513}
2514
2515
2516static void
2517visit_jump(struct lp_build_nir_context *bld_base,
2518           const nir_jump_instr *instr)
2519{
2520   switch (instr->type) {
2521   case nir_jump_break:
2522      bld_base->break_stmt(bld_base);
2523      break;
2524   case nir_jump_continue:
2525      bld_base->continue_stmt(bld_base);
2526      break;
2527   default:
2528      unreachable("Unknown jump instr\n");
2529   }
2530}
2531
2532
2533static void
2534visit_deref(struct lp_build_nir_context *bld_base,
2535            nir_deref_instr *instr)
2536{
2537   if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2538                                        nir_var_mem_global)) {
2539      return;
2540   }
2541
2542   LLVMValueRef result = NULL;
2543   switch(instr->deref_type) {
2544   case nir_deref_type_var: {
2545      struct hash_entry *entry =
2546         _mesa_hash_table_search(bld_base->vars, instr->var);
2547      result = entry->data;
2548      break;
2549   }
2550   default:
2551      unreachable("Unhandled deref_instr deref type");
2552   }
2553
2554   assign_ssa(bld_base, instr->dest.ssa.index, result);
2555}
2556
2557
2558static void
2559visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2560{
2561   nir_foreach_instr(instr, block)
2562   {
2563      switch (instr->type) {
2564      case nir_instr_type_alu:
2565         visit_alu(bld_base, nir_instr_as_alu(instr));
2566         break;
2567      case nir_instr_type_load_const:
2568         visit_load_const(bld_base, nir_instr_as_load_const(instr));
2569         break;
2570      case nir_instr_type_intrinsic:
2571         visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2572         break;
2573      case nir_instr_type_tex:
2574         visit_tex(bld_base, nir_instr_as_tex(instr));
2575         break;
2576      case nir_instr_type_phi:
2577         assert(0);
2578         break;
2579      case nir_instr_type_ssa_undef:
2580         visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2581         break;
2582      case nir_instr_type_jump:
2583         visit_jump(bld_base, nir_instr_as_jump(instr));
2584         break;
2585      case nir_instr_type_deref:
2586         visit_deref(bld_base, nir_instr_as_deref(instr));
2587         break;
2588      default:
2589         fprintf(stderr, "Unknown NIR instr type: ");
2590         nir_print_instr(instr, stderr);
2591         fprintf(stderr, "\n");
2592         abort();
2593      }
2594   }
2595}
2596
2597
2598static void
2599visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2600{
2601   LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2602
2603   bld_base->if_cond(bld_base, cond);
2604   visit_cf_list(bld_base, &if_stmt->then_list);
2605
2606   if (!exec_list_is_empty(&if_stmt->else_list)) {
2607      bld_base->else_stmt(bld_base);
2608      visit_cf_list(bld_base, &if_stmt->else_list);
2609   }
2610   bld_base->endif_stmt(bld_base);
2611}
2612
2613
2614static void
2615visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2616{
2617   bld_base->bgnloop(bld_base);
2618   visit_cf_list(bld_base, &loop->body);
2619   bld_base->endloop(bld_base);
2620}
2621
2622
2623static void
2624visit_cf_list(struct lp_build_nir_context *bld_base,
2625              struct exec_list *list)
2626{
2627   foreach_list_typed(nir_cf_node, node, node, list)
2628   {
2629      switch (node->type) {
2630      case nir_cf_node_block:
2631         visit_block(bld_base, nir_cf_node_as_block(node));
2632         break;
2633      case nir_cf_node_if:
2634         visit_if(bld_base, nir_cf_node_as_if(node));
2635         break;
2636      case nir_cf_node_loop:
2637         visit_loop(bld_base, nir_cf_node_as_loop(node));
2638         break;
2639      default:
2640         assert(0);
2641      }
2642   }
2643}
2644
2645
2646static void
2647handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2648                          struct nir_shader *nir,
2649                          struct nir_variable *variable)
2650{
2651   bld_base->emit_var_decl(bld_base, variable);
2652}
2653
2654
2655/* vector registers are stored as arrays in LLVM side,
2656   so we can use GEP on them, as to do exec mask stores
2657   we need to operate on a single components.
2658   arrays are:
2659   0.x, 1.x, 2.x, 3.x
2660   0.y, 1.y, 2.y, 3.y
2661   ....
2662*/
2663static LLVMTypeRef
2664get_register_type(struct lp_build_nir_context *bld_base,
2665                  nir_register *reg)
2666{
2667   if (is_aos(bld_base))
2668      return bld_base->base.int_vec_type;
2669
2670   struct lp_build_context *int_bld =
2671      get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
2672
2673   LLVMTypeRef type = int_bld->vec_type;
2674   if (reg->num_array_elems)
2675      type = LLVMArrayType(type, reg->num_array_elems);
2676   if (reg->num_components > 1)
2677      type = LLVMArrayType(type, reg->num_components);
2678
2679   return type;
2680}
2681
2682
2683bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
2684                       struct nir_shader *nir)
2685{
2686   struct nir_function *func;
2687
2688   nir_convert_from_ssa(nir, true);
2689   nir_lower_locals_to_regs(nir);
2690   nir_remove_dead_derefs(nir);
2691   nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2692
2693   if (is_aos(bld_base)) {
2694      nir_move_vec_src_uses_to_dest(nir);
2695      nir_lower_vec_to_movs(nir, NULL, NULL);
2696   }
2697
2698   nir_foreach_shader_out_variable(variable, nir)
2699      handle_shader_output_decl(bld_base, nir, variable);
2700
2701   if (nir->info.io_lowered) {
2702      uint64_t outputs_written = nir->info.outputs_written;
2703
2704      while (outputs_written) {
2705         unsigned location = u_bit_scan64(&outputs_written);
2706         nir_variable var = {0};
2707
2708         var.type = glsl_vec4_type();
2709         var.data.mode = nir_var_shader_out;
2710         var.data.location = location;
2711         var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2712                                                    BITFIELD64_MASK(location));
2713         bld_base->emit_var_decl(bld_base, &var);
2714      }
2715   }
2716
2717   bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2718                                            _mesa_key_pointer_equal);
2719   bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2720                                            _mesa_key_pointer_equal);
2721   bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
2722
2723   func = (struct nir_function *)exec_list_get_head(&nir->functions);
2724
2725   nir_foreach_register(reg, &func->impl->registers) {
2726      LLVMTypeRef type = get_register_type(bld_base, reg);
2727      LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2728                                               type, "reg");
2729      _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2730   }
2731   nir_index_ssa_defs(func->impl);
2732   bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2733   visit_cf_list(bld_base, &func->impl->body);
2734
2735   free(bld_base->ssa_defs);
2736   ralloc_free(bld_base->vars);
2737   ralloc_free(bld_base->regs);
2738   ralloc_free(bld_base->range_ht);
2739   return true;
2740}
2741
2742
2743/* do some basic opts to remove some things we don't want to see. */
2744void
2745lp_build_opt_nir(struct nir_shader *nir)
2746{
2747   bool progress;
2748
2749   static const struct nir_lower_tex_options lower_tex_options = {
2750      .lower_tg4_offsets = true,
2751      .lower_txp = ~0u,
2752      .lower_invalid_implicit_lod = true,
2753   };
2754   NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2755   NIR_PASS_V(nir, nir_lower_frexp);
2756
2757   NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2758   NIR_PASS_V(nir, nir_lower_fp16_casts);
2759   do {
2760      progress = false;
2761      NIR_PASS(progress, nir, nir_opt_constant_folding);
2762      NIR_PASS(progress, nir, nir_opt_algebraic);
2763      NIR_PASS(progress, nir, nir_lower_pack);
2764
2765      nir_lower_tex_options options = { .lower_invalid_implicit_lod = true, };
2766      NIR_PASS_V(nir, nir_lower_tex, &options);
2767
2768      const nir_lower_subgroups_options subgroups_options = {
2769         .subgroup_size = lp_native_vector_width / 32,
2770         .ballot_bit_size = 32,
2771         .ballot_components = 1,
2772         .lower_to_scalar = true,
2773         .lower_subgroup_masks = true,
2774         .lower_relative_shuffle = true,
2775      };
2776      NIR_PASS(progress, nir, nir_lower_subgroups, &subgroups_options);
2777   } while (progress);
2778
2779   do {
2780      progress = false;
2781      NIR_PASS(progress, nir, nir_opt_algebraic_late);
2782      if (progress) {
2783         NIR_PASS_V(nir, nir_copy_prop);
2784         NIR_PASS_V(nir, nir_opt_dce);
2785         NIR_PASS_V(nir, nir_opt_cse);
2786      }
2787   } while (progress);
2788
2789   if (nir_lower_bool_to_int32(nir)) {
2790      NIR_PASS_V(nir, nir_copy_prop);
2791      NIR_PASS_V(nir, nir_opt_dce);
2792   }
2793}
2794