1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir_to_dxil.h"
25
26#include "dxil_container.h"
27#include "dxil_dump.h"
28#include "dxil_enums.h"
29#include "dxil_function.h"
30#include "dxil_module.h"
31#include "dxil_nir.h"
32#include "dxil_signature.h"
33
34#include "nir/nir_builder.h"
35#include "util/u_debug.h"
36#include "util/u_dynarray.h"
37#include "util/u_math.h"
38
39#include "git_sha1.h"
40
41#include "vulkan/vulkan_core.h"
42
43#include <stdint.h>
44
45int debug_dxil = 0;
46
47static const struct debug_named_value
48dxil_debug_options[] = {
49   { "verbose", DXIL_DEBUG_VERBOSE, NULL },
50   { "dump_blob",  DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
51   { "trace",  DXIL_DEBUG_TRACE , "Trace instruction conversion" },
52   { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
53   DEBUG_NAMED_VALUE_END
54};
55
56DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
57
58#define NIR_INSTR_UNSUPPORTED(instr) \
59   if (debug_dxil & DXIL_DEBUG_VERBOSE) \
60   do { \
61      fprintf(stderr, "Unsupported instruction:"); \
62      nir_print_instr(instr, stderr); \
63      fprintf(stderr, "\n"); \
64   } while (0)
65
66#define TRACE_CONVERSION(instr) \
67   if (debug_dxil & DXIL_DEBUG_TRACE) \
68      do { \
69         fprintf(stderr, "Convert '"); \
70         nir_print_instr(instr, stderr); \
71         fprintf(stderr, "'\n"); \
72      } while (0)
73
74static const nir_shader_compiler_options
75nir_options = {
76   .lower_ineg = true,
77   .lower_fneg = true,
78   .lower_ffma16 = true,
79   .lower_ffma32 = true,
80   .lower_isign = true,
81   .lower_fsign = true,
82   .lower_iabs = true,
83   .lower_fmod = true,
84   .lower_fpow = true,
85   .lower_scmp = true,
86   .lower_ldexp = true,
87   .lower_flrp16 = true,
88   .lower_flrp32 = true,
89   .lower_flrp64 = true,
90   .lower_bitfield_extract = true,
91   .lower_find_msb_to_reverse = true,
92   .lower_extract_word = true,
93   .lower_extract_byte = true,
94   .lower_insert_word = true,
95   .lower_insert_byte = true,
96   .lower_all_io_to_elements = true,
97   .lower_all_io_to_temps = true,
98   .lower_hadd = true,
99   .lower_uadd_sat = true,
100   .lower_usub_sat = true,
101   .lower_iadd_sat = true,
102   .lower_uadd_carry = true,
103   .lower_mul_high = true,
104   .lower_rotate = true,
105   .lower_pack_64_2x32_split = true,
106   .lower_pack_32_2x16_split = true,
107   .lower_unpack_64_2x32_split = true,
108   .lower_unpack_32_2x16_split = true,
109   .lower_unpack_half_2x16 = true,
110   .lower_unpack_snorm_2x16 = true,
111   .lower_unpack_snorm_4x8 = true,
112   .lower_unpack_unorm_2x16 = true,
113   .lower_unpack_unorm_4x8 = true,
114   .lower_interpolate_at = true,
115   .has_fsub = true,
116   .has_isub = true,
117   .use_scoped_barrier = true,
118   .vertex_id_zero_based = true,
119   .lower_base_vertex = true,
120   .lower_helper_invocation = true,
121   .has_cs_global_id = true,
122   .has_txs = true,
123   .lower_mul_2x32_64 = true,
124   .lower_doubles_options =
125      nir_lower_drcp |
126      nir_lower_dsqrt |
127      nir_lower_drsq |
128      nir_lower_dfract |
129      nir_lower_dtrunc |
130      nir_lower_dfloor |
131      nir_lower_dceil |
132      nir_lower_dround_even,
133   .max_unroll_iterations = 32, /* arbitrary */
134   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
135};
136
137const nir_shader_compiler_options*
138dxil_get_nir_compiler_options(void)
139{
140   return &nir_options;
141}
142
143static bool
144emit_llvm_ident(struct dxil_module *m)
145{
146   const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
147   if (!compiler)
148      return false;
149
150   const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
151   return llvm_ident &&
152          dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
153}
154
155static bool
156emit_named_version(struct dxil_module *m, const char *name,
157                   int major, int minor)
158{
159   const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
160   const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
161   const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
162   const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
163                                                     ARRAY_SIZE(version_nodes));
164   return dxil_add_metadata_named_node(m, name, &version, 1);
165}
166
167static const char *
168get_shader_kind_str(enum dxil_shader_kind kind)
169{
170   switch (kind) {
171   case DXIL_PIXEL_SHADER:
172      return "ps";
173   case DXIL_VERTEX_SHADER:
174      return "vs";
175   case DXIL_GEOMETRY_SHADER:
176      return "gs";
177   case DXIL_HULL_SHADER:
178      return "hs";
179   case DXIL_DOMAIN_SHADER:
180      return "ds";
181   case DXIL_COMPUTE_SHADER:
182      return "cs";
183   default:
184      unreachable("invalid shader kind");
185   }
186}
187
188static bool
189emit_dx_shader_model(struct dxil_module *m)
190{
191   const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
192   const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
193   const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
194   const struct dxil_mdnode *shader_model[] = { type_node, major_node,
195                                                minor_node };
196   const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
197
198   return dxil_add_metadata_named_node(m, "dx.shaderModel",
199                                       &dx_shader_model, 1);
200}
201
202enum {
203   DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
204   DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
205};
206
207enum dxil_intr {
208   DXIL_INTR_LOAD_INPUT = 4,
209   DXIL_INTR_STORE_OUTPUT = 5,
210   DXIL_INTR_FABS = 6,
211   DXIL_INTR_SATURATE = 7,
212
213   DXIL_INTR_ISFINITE = 10,
214   DXIL_INTR_ISNORMAL = 11,
215
216   DXIL_INTR_FCOS = 12,
217   DXIL_INTR_FSIN = 13,
218
219   DXIL_INTR_FEXP2 = 21,
220   DXIL_INTR_FRC = 22,
221   DXIL_INTR_FLOG2 = 23,
222
223   DXIL_INTR_SQRT = 24,
224   DXIL_INTR_RSQRT = 25,
225   DXIL_INTR_ROUND_NE = 26,
226   DXIL_INTR_ROUND_NI = 27,
227   DXIL_INTR_ROUND_PI = 28,
228   DXIL_INTR_ROUND_Z = 29,
229
230   DXIL_INTR_BFREV = 30,
231   DXIL_INTR_COUNTBITS = 31,
232   DXIL_INTR_FIRSTBIT_LO = 32,
233   DXIL_INTR_FIRSTBIT_HI = 33,
234   DXIL_INTR_FIRSTBIT_SHI = 34,
235
236   DXIL_INTR_FMAX = 35,
237   DXIL_INTR_FMIN = 36,
238   DXIL_INTR_IMAX = 37,
239   DXIL_INTR_IMIN = 38,
240   DXIL_INTR_UMAX = 39,
241   DXIL_INTR_UMIN = 40,
242
243   DXIL_INTR_FMA = 47,
244
245   DXIL_INTR_IBFE = 51,
246   DXIL_INTR_UBFE = 52,
247   DXIL_INTR_BFI = 53,
248
249   DXIL_INTR_CREATE_HANDLE = 57,
250   DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
251
252   DXIL_INTR_SAMPLE = 60,
253   DXIL_INTR_SAMPLE_BIAS = 61,
254   DXIL_INTR_SAMPLE_LEVEL = 62,
255   DXIL_INTR_SAMPLE_GRAD = 63,
256   DXIL_INTR_SAMPLE_CMP = 64,
257   DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
258
259   DXIL_INTR_TEXTURE_LOAD = 66,
260   DXIL_INTR_TEXTURE_STORE = 67,
261
262   DXIL_INTR_BUFFER_LOAD = 68,
263   DXIL_INTR_BUFFER_STORE = 69,
264
265   DXIL_INTR_TEXTURE_SIZE = 72,
266   DXIL_INTR_TEXTURE_GATHER = 73,
267   DXIL_INTR_TEXTURE_GATHER_CMP = 74,
268
269   DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
270   DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
271   DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
272
273   DXIL_INTR_ATOMIC_BINOP = 78,
274   DXIL_INTR_ATOMIC_CMPXCHG = 79,
275   DXIL_INTR_BARRIER = 80,
276   DXIL_INTR_TEXTURE_LOD = 81,
277
278   DXIL_INTR_DISCARD = 82,
279   DXIL_INTR_DDX_COARSE = 83,
280   DXIL_INTR_DDY_COARSE = 84,
281   DXIL_INTR_DDX_FINE = 85,
282   DXIL_INTR_DDY_FINE = 86,
283
284   DXIL_INTR_EVAL_SNAPPED = 87,
285   DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
286   DXIL_INTR_EVAL_CENTROID = 89,
287
288   DXIL_INTR_SAMPLE_INDEX = 90,
289   DXIL_INTR_COVERAGE = 91,
290
291   DXIL_INTR_THREAD_ID = 93,
292   DXIL_INTR_GROUP_ID = 94,
293   DXIL_INTR_THREAD_ID_IN_GROUP = 95,
294   DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
295
296   DXIL_INTR_EMIT_STREAM = 97,
297   DXIL_INTR_CUT_STREAM = 98,
298
299   DXIL_INTR_GS_INSTANCE_ID = 100,
300
301   DXIL_INTR_MAKE_DOUBLE = 101,
302   DXIL_INTR_SPLIT_DOUBLE = 102,
303
304   DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
305   DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
306   DXIL_INTR_DOMAIN_LOCATION = 105,
307   DXIL_INTR_STORE_PATCH_CONSTANT = 106,
308   DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
309   DXIL_INTR_PRIMITIVE_ID = 108,
310
311   DXIL_INTR_LEGACY_F32TOF16 = 130,
312   DXIL_INTR_LEGACY_F16TOF32 = 131,
313
314   DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
315};
316
317enum dxil_atomic_op {
318   DXIL_ATOMIC_ADD = 0,
319   DXIL_ATOMIC_AND = 1,
320   DXIL_ATOMIC_OR = 2,
321   DXIL_ATOMIC_XOR = 3,
322   DXIL_ATOMIC_IMIN = 4,
323   DXIL_ATOMIC_IMAX = 5,
324   DXIL_ATOMIC_UMIN = 6,
325   DXIL_ATOMIC_UMAX = 7,
326   DXIL_ATOMIC_EXCHANGE = 8,
327};
328
329typedef struct {
330   unsigned id;
331   unsigned binding;
332   unsigned size;
333   unsigned space;
334} resource_array_layout;
335
336static void
337fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
338                       const struct dxil_type *struct_type,
339                       const char *name, const resource_array_layout *layout)
340{
341   const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
342   const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
343
344   fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
345   fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
346   fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
347   fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
348   fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
349   fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
350}
351
352static const struct dxil_mdnode *
353emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
354                  const char *name, const resource_array_layout *layout,
355                  enum dxil_component_type comp_type,
356                  enum dxil_resource_kind res_kind)
357{
358   const struct dxil_mdnode *fields[9];
359
360   const struct dxil_mdnode *metadata_tag_nodes[2];
361
362   fill_resource_metadata(m, fields, elem_type, name, layout);
363   fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
364   fields[7] = dxil_get_metadata_int1(m, 0); // sample count
365   if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
366       res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
367      metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
368      metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
369      fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
370   } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
371      fields[8] = NULL;
372   else
373      unreachable("Structured buffers not supported yet");
374
375   return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
376}
377
378static const struct dxil_mdnode *
379emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
380                  const char *name, const resource_array_layout *layout,
381                  enum dxil_component_type comp_type,
382                  enum dxil_resource_kind res_kind)
383{
384   const struct dxil_mdnode *fields[11];
385
386   const struct dxil_mdnode *metadata_tag_nodes[2];
387
388   fill_resource_metadata(m, fields, struct_type, name, layout);
389   fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
390   fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent
391   fields[8] = dxil_get_metadata_int1(m, false); // has counter
392   fields[9] = dxil_get_metadata_int1(m, false); // is ROV
393   if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
394       res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
395      metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
396      metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
397      fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
398   } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
399      fields[10] = NULL;
400   else
401      unreachable("Structured buffers not supported yet");
402
403   return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
404}
405
406static const struct dxil_mdnode *
407emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
408                  const char *name, const resource_array_layout *layout,
409                  unsigned size)
410{
411   const struct dxil_mdnode *fields[8];
412
413   fill_resource_metadata(m, fields, struct_type, name, layout);
414   fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
415   fields[7] = NULL; // metadata
416
417   return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
418}
419
420static const struct dxil_mdnode *
421emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
422                      nir_variable *var, const resource_array_layout *layout)
423{
424   const struct dxil_mdnode *fields[8];
425   const struct glsl_type *type = glsl_without_array(var->type);
426
427   fill_resource_metadata(m, fields, struct_type, var->name, layout);
428   fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind
429   enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
430          DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
431   fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
432   fields[7] = NULL; // metadata
433
434   return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
435}
436
437
438#define MAX_SRVS 128
439#define MAX_UAVS 64
440#define MAX_CBVS 64 // ??
441#define MAX_SAMPLERS 64 // ??
442
443struct dxil_def {
444   const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
445};
446
447struct ntd_context {
448   void *ralloc_ctx;
449   const struct nir_to_dxil_options *opts;
450   struct nir_shader *shader;
451
452   struct dxil_module mod;
453
454   struct util_dynarray srv_metadata_nodes;
455   const struct dxil_value *srv_handles[MAX_SRVS];
456
457   struct util_dynarray uav_metadata_nodes;
458   const struct dxil_value *ssbo_handles[MAX_UAVS];
459   const struct dxil_value *image_handles[MAX_UAVS];
460   uint32_t num_uavs;
461
462   struct util_dynarray cbv_metadata_nodes;
463   const struct dxil_value *cbv_handles[MAX_CBVS];
464
465   struct util_dynarray sampler_metadata_nodes;
466   const struct dxil_value *sampler_handles[MAX_SAMPLERS];
467
468   struct util_dynarray resources;
469
470   const struct dxil_mdnode *shader_property_nodes[6];
471   size_t num_shader_property_nodes;
472
473   struct dxil_def *defs;
474   unsigned num_defs;
475   struct hash_table *phis;
476
477   const struct dxil_value *sharedvars;
478   const struct dxil_value *scratchvars;
479   struct hash_table *consts;
480
481   nir_variable *ps_front_face;
482   nir_variable *system_value[SYSTEM_VALUE_MAX];
483
484   nir_function *tess_ctrl_patch_constant_func;
485   unsigned tess_input_control_point_count;
486
487   struct dxil_func_def *main_func_def;
488   struct dxil_func_def *tess_ctrl_patch_constant_func_def;
489   unsigned unnamed_ubo_count;
490};
491
492static const char*
493unary_func_name(enum dxil_intr intr)
494{
495   switch (intr) {
496   case DXIL_INTR_COUNTBITS:
497   case DXIL_INTR_FIRSTBIT_HI:
498   case DXIL_INTR_FIRSTBIT_SHI:
499   case DXIL_INTR_FIRSTBIT_LO:
500      return "dx.op.unaryBits";
501   case DXIL_INTR_ISFINITE:
502   case DXIL_INTR_ISNORMAL:
503      return "dx.op.isSpecialFloat";
504   default:
505      return "dx.op.unary";
506   }
507}
508
509static const struct dxil_value *
510emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
511                enum dxil_intr intr,
512                const struct dxil_value *op0)
513{
514   const struct dxil_func *func = dxil_get_function(&ctx->mod,
515                                                    unary_func_name(intr),
516                                                    overload);
517   if (!func)
518      return NULL;
519
520   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
521   if (!opcode)
522      return NULL;
523
524   const struct dxil_value *args[] = {
525     opcode,
526     op0
527   };
528
529   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
530}
531
532static const struct dxil_value *
533emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
534                 enum dxil_intr intr,
535                 const struct dxil_value *op0, const struct dxil_value *op1)
536{
537   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
538   if (!func)
539      return NULL;
540
541   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
542   if (!opcode)
543      return NULL;
544
545   const struct dxil_value *args[] = {
546     opcode,
547     op0,
548     op1
549   };
550
551   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
552}
553
554static const struct dxil_value *
555emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
556                   enum dxil_intr intr,
557                   const struct dxil_value *op0,
558                   const struct dxil_value *op1,
559                   const struct dxil_value *op2)
560{
561   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
562   if (!func)
563      return NULL;
564
565   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
566   if (!opcode)
567      return NULL;
568
569   const struct dxil_value *args[] = {
570     opcode,
571     op0,
572     op1,
573     op2
574   };
575
576   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
577}
578
579static const struct dxil_value *
580emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
581                     enum dxil_intr intr,
582                     const struct dxil_value *op0,
583                     const struct dxil_value *op1,
584                     const struct dxil_value *op2,
585                     const struct dxil_value *op3)
586{
587   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
588   if (!func)
589      return NULL;
590
591   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
592   if (!opcode)
593      return NULL;
594
595   const struct dxil_value *args[] = {
596     opcode,
597     op0,
598     op1,
599     op2,
600     op3
601   };
602
603   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
604}
605
606static const struct dxil_value *
607emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
608{
609   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
610   if (!func)
611      return NULL;
612
613   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
614       DXIL_INTR_THREAD_ID);
615   if (!opcode)
616      return NULL;
617
618   const struct dxil_value *args[] = {
619     opcode,
620     comp
621   };
622
623   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
624}
625
626static const struct dxil_value *
627emit_threadidingroup_call(struct ntd_context *ctx,
628                          const struct dxil_value *comp)
629{
630   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
631
632   if (!func)
633      return NULL;
634
635   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
636       DXIL_INTR_THREAD_ID_IN_GROUP);
637   if (!opcode)
638      return NULL;
639
640   const struct dxil_value *args[] = {
641     opcode,
642     comp
643   };
644
645   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
646}
647
648static const struct dxil_value *
649emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
650{
651   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
652
653   if (!func)
654      return NULL;
655
656   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
657      DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
658   if (!opcode)
659      return NULL;
660
661   const struct dxil_value *args[] = {
662     opcode
663   };
664
665   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
666}
667
668static const struct dxil_value *
669emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
670{
671   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
672
673   if (!func)
674      return NULL;
675
676   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
677       DXIL_INTR_GROUP_ID);
678   if (!opcode)
679      return NULL;
680
681   const struct dxil_value *args[] = {
682     opcode,
683     comp
684   };
685
686   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
687}
688
689static const struct dxil_value *
690emit_bufferload_call(struct ntd_context *ctx,
691                     const struct dxil_value *handle,
692                     const struct dxil_value *coord[2],
693                     enum overload_type overload)
694{
695   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
696   if (!func)
697      return NULL;
698
699   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
700      DXIL_INTR_BUFFER_LOAD);
701   const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
702
703   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
704}
705
706static bool
707emit_bufferstore_call(struct ntd_context *ctx,
708                      const struct dxil_value *handle,
709                      const struct dxil_value *coord[2],
710                      const struct dxil_value *value[4],
711                      const struct dxil_value *write_mask,
712                      enum overload_type overload)
713{
714   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
715
716   if (!func)
717      return false;
718
719   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
720      DXIL_INTR_BUFFER_STORE);
721   const struct dxil_value *args[] = {
722      opcode, handle, coord[0], coord[1],
723      value[0], value[1], value[2], value[3],
724      write_mask
725   };
726
727   return dxil_emit_call_void(&ctx->mod, func,
728                              args, ARRAY_SIZE(args));
729}
730
731static const struct dxil_value *
732emit_textureload_call(struct ntd_context *ctx,
733                      const struct dxil_value *handle,
734                      const struct dxil_value *coord[3],
735                      enum overload_type overload)
736{
737   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
738   if (!func)
739      return NULL;
740   const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
741   const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
742
743   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
744      DXIL_INTR_TEXTURE_LOAD);
745   const struct dxil_value *args[] = { opcode, handle,
746      /*lod_or_sample*/ int_undef,
747      coord[0], coord[1], coord[2],
748      /* offsets */ int_undef, int_undef, int_undef};
749
750   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
751}
752
753static bool
754emit_texturestore_call(struct ntd_context *ctx,
755                       const struct dxil_value *handle,
756                       const struct dxil_value *coord[3],
757                       const struct dxil_value *value[4],
758                       const struct dxil_value *write_mask,
759                       enum overload_type overload)
760{
761   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
762
763   if (!func)
764      return false;
765
766   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
767      DXIL_INTR_TEXTURE_STORE);
768   const struct dxil_value *args[] = {
769      opcode, handle, coord[0], coord[1], coord[2],
770      value[0], value[1], value[2], value[3],
771      write_mask
772   };
773
774   return dxil_emit_call_void(&ctx->mod, func,
775                              args, ARRAY_SIZE(args));
776}
777
778static const struct dxil_value *
779emit_atomic_binop(struct ntd_context *ctx,
780                  const struct dxil_value *handle,
781                  enum dxil_atomic_op atomic_op,
782                  const struct dxil_value *coord[3],
783                  const struct dxil_value *value)
784{
785   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
786
787   if (!func)
788      return false;
789
790   const struct dxil_value *opcode =
791      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
792   const struct dxil_value *atomic_op_value =
793      dxil_module_get_int32_const(&ctx->mod, atomic_op);
794   const struct dxil_value *args[] = {
795      opcode, handle, atomic_op_value,
796      coord[0], coord[1], coord[2], value
797   };
798
799   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
800}
801
802static const struct dxil_value *
803emit_atomic_cmpxchg(struct ntd_context *ctx,
804                    const struct dxil_value *handle,
805                    const struct dxil_value *coord[3],
806                    const struct dxil_value *cmpval,
807                    const struct dxil_value *newval)
808{
809   const struct dxil_func *func =
810      dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
811
812   if (!func)
813      return false;
814
815   const struct dxil_value *opcode =
816      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
817   const struct dxil_value *args[] = {
818      opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
819   };
820
821   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
822}
823
824static const struct dxil_value *
825emit_createhandle_call(struct ntd_context *ctx,
826                       enum dxil_resource_class resource_class,
827                       unsigned resource_range_id,
828                       const struct dxil_value *resource_range_index,
829                       bool non_uniform_resource_index)
830{
831   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
832   const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
833   const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
834   const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
835   if (!opcode || !resource_class_value || !resource_range_id_value ||
836       !non_uniform_resource_index_value)
837      return NULL;
838
839   const struct dxil_value *args[] = {
840      opcode,
841      resource_class_value,
842      resource_range_id_value,
843      resource_range_index,
844      non_uniform_resource_index_value
845   };
846
847   const struct dxil_func *func =
848         dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
849
850   if (!func)
851         return NULL;
852
853   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
854}
855
856static const struct dxil_value *
857emit_createhandle_call_const_index(struct ntd_context *ctx,
858                                   enum dxil_resource_class resource_class,
859                                   unsigned resource_range_id,
860                                   unsigned resource_range_index,
861                                   bool non_uniform_resource_index)
862{
863
864   const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
865   if (!resource_range_index_value)
866      return NULL;
867
868   return emit_createhandle_call(ctx, resource_class, resource_range_id,
869                                 resource_range_index_value,
870                                 non_uniform_resource_index);
871}
872
873static void
874add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
875             enum dxil_resource_kind kind,
876             const resource_array_layout *layout)
877{
878   struct dxil_resource_v0 *resource_v0 = NULL;
879   struct dxil_resource_v1 *resource_v1 = NULL;
880   if (ctx->mod.minor_validator >= 6) {
881      resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
882      resource_v0 = &resource_v1->v0;
883   } else {
884      resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
885   }
886   resource_v0->resource_type = type;
887   resource_v0->space = layout->space;
888   resource_v0->lower_bound = layout->binding;
889   if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
890      resource_v0->upper_bound = UINT_MAX;
891   else
892      resource_v0->upper_bound = layout->binding + layout->size - 1;
893   if (type == DXIL_RES_UAV_TYPED ||
894       type == DXIL_RES_UAV_RAW ||
895       type == DXIL_RES_UAV_STRUCTURED) {
896      uint32_t new_uav_count = ctx->num_uavs + layout->size;
897      if (layout->size == 0 || new_uav_count < ctx->num_uavs)
898         ctx->num_uavs = UINT_MAX;
899      else
900         ctx->num_uavs = new_uav_count;
901      if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
902         ctx->mod.feats.use_64uavs = 1;
903   }
904
905   if (resource_v1) {
906      resource_v1->resource_kind = kind;
907      /* No flags supported yet */
908      resource_v1->resource_flags = 0;
909   }
910}
911
912static unsigned
913get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class,
914                unsigned space, unsigned binding)
915{
916   unsigned offset = 0;
917   unsigned count = 0;
918
919   unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
920   unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
921   unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
922   unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
923
924   switch (class) {
925   case DXIL_RESOURCE_CLASS_UAV:
926      offset = num_srvs + num_samplers + num_cbvs;
927      count = num_uavs;
928      break;
929   case DXIL_RESOURCE_CLASS_SRV:
930      offset = num_samplers + num_cbvs;
931      count = num_srvs;
932      break;
933   case DXIL_RESOURCE_CLASS_SAMPLER:
934      offset = num_cbvs;
935      count = num_samplers;
936      break;
937   case DXIL_RESOURCE_CLASS_CBV:
938      offset = 0;
939      count = num_cbvs;
940      break;
941   }
942
943   unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
944      sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
945   assert(offset + count <= ctx->resources.size / resource_element_size);
946   for (unsigned i = offset; i < offset + count; ++i) {
947      const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
948      if (resource->space == space &&
949          resource->lower_bound <= binding &&
950          resource->upper_bound >= binding) {
951         return i - offset;
952      }
953   }
954
955   unreachable("Resource access for undeclared range");
956   return 0;
957}
958
959static bool
960emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
961{
962   unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
963   unsigned binding = var->data.binding;
964   resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
965
966   enum dxil_component_type comp_type;
967   enum dxil_resource_kind res_kind;
968   enum dxil_resource_type res_type;
969   if (var->data.mode == nir_var_mem_ssbo) {
970      comp_type = DXIL_COMP_TYPE_INVALID;
971      res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
972      res_type = DXIL_RES_SRV_RAW;
973   } else {
974      comp_type = dxil_get_comp_type(var->type);
975      res_kind = dxil_get_resource_kind(var->type);
976      res_type = DXIL_RES_SRV_TYPED;
977   }
978   const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */);
979
980   if (glsl_type_is_array(var->type))
981      res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
982
983   const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
984                                                          &layout, comp_type, res_kind);
985
986   if (!srv_meta)
987      return false;
988
989   util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
990   add_resource(ctx, res_type, res_kind, &layout);
991   if (res_type == DXIL_RES_SRV_RAW)
992      ctx->mod.raw_and_structured_buffers = true;
993
994   return true;
995}
996
997static bool
998emit_globals(struct ntd_context *ctx, unsigned size)
999{
1000   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1001      size++;
1002
1003   if (!size)
1004      return true;
1005
1006   const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,
1007      DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */);
1008   if (!struct_type)
1009      return false;
1010
1011   const struct dxil_type *array_type =
1012      dxil_module_get_array_type(&ctx->mod, struct_type, size);
1013   if (!array_type)
1014      return false;
1015
1016   resource_array_layout layout = {0, 0, size, 0};
1017   const struct dxil_mdnode *uav_meta =
1018      emit_uav_metadata(&ctx->mod, array_type,
1019                                   "globals", &layout,
1020                                   DXIL_COMP_TYPE_INVALID,
1021                                   DXIL_RESOURCE_KIND_RAW_BUFFER);
1022   if (!uav_meta)
1023      return false;
1024
1025   util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1026   if (ctx->mod.minor_validator < 6 &&
1027       util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1028      ctx->mod.feats.use_64uavs = 1;
1029   /* Handles to UAVs used for kernel globals are created on-demand */
1030   add_resource(ctx, DXIL_RES_UAV_RAW, DXIL_RESOURCE_KIND_RAW_BUFFER, &layout);
1031   ctx->mod.raw_and_structured_buffers = true;
1032   return true;
1033}
1034
1035static bool
1036emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1037         enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name)
1038{
1039   unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1040   resource_array_layout layout = { id, binding, count, space };
1041
1042   const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */);
1043   res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1044   const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1045                                                          &layout, comp_type, res_kind);
1046
1047   if (!uav_meta)
1048      return false;
1049
1050   util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1051   if (ctx->mod.minor_validator < 6 &&
1052       util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1053      ctx->mod.feats.use_64uavs = 1;
1054
1055   add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1056   if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1057      ctx->mod.raw_and_structured_buffers = true;
1058   if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1059       ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1060      ctx->mod.feats.uavs_at_every_stage = true;
1061
1062   return true;
1063}
1064
1065static bool
1066emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1067{
1068   unsigned binding, space;
1069   if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1070      /* For GL, the image intrinsics are already lowered, using driver_location
1071       * as the 0-based image index. Use space 1 so that we can keep using these
1072       * NIR constants without having to remap them, and so they don't overlap
1073       * SSBOs, which are also 0-based UAV bindings.
1074       */
1075      binding = var->data.driver_location;
1076      space = 1;
1077   } else {
1078      binding = var->data.binding;
1079      space = var->data.descriptor_set;
1080   }
1081   enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1082   enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1083   const char *name = var->name;
1084
1085   return emit_uav(ctx, binding, space, count, comp_type, res_kind, name);
1086}
1087
1088static void
1089var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx,
1090                                           const struct nir_constant *c,
1091                                           const struct glsl_type *type,
1092                                           void *const_vals,
1093                                           unsigned int offset)
1094{
1095   assert(glsl_type_is_vector_or_scalar(type));
1096   unsigned int components = glsl_get_vector_elements(type);
1097   unsigned bit_size = glsl_get_bit_size(type);
1098   unsigned int increment = bit_size / 8;
1099
1100   for (unsigned int comp = 0; comp < components; comp++) {
1101      uint8_t *dst = (uint8_t *)const_vals + offset;
1102
1103      switch (bit_size) {
1104      case 64:
1105         memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64));
1106         break;
1107      case 32:
1108         memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32));
1109         break;
1110      case 16:
1111         memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16));
1112         break;
1113      case 8:
1114         assert(glsl_base_type_is_integer(glsl_get_base_type(type)));
1115         memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8));
1116         break;
1117      default:
1118         unreachable("unexpeted bit-size");
1119      }
1120
1121      offset += increment;
1122   }
1123}
1124
1125static void
1126var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,
1127                     const struct glsl_type *type, void *const_vals,
1128                     unsigned int offset)
1129{
1130   assert(!glsl_type_is_interface(type));
1131
1132   if (glsl_type_is_vector_or_scalar(type)) {
1133      var_fill_const_array_with_vector_or_scalar(ctx, c, type,
1134                                                 const_vals,
1135                                                 offset);
1136   } else if (glsl_type_is_array(type)) {
1137      assert(!glsl_type_is_unsized_array(type));
1138      const struct glsl_type *without = glsl_without_array(type);
1139      unsigned stride = glsl_get_explicit_stride(without);
1140
1141      for (unsigned elt = 0; elt < glsl_get_length(type); elt++) {
1142         var_fill_const_array(ctx, c->elements[elt], without,
1143                              const_vals, offset + (elt * stride));
1144         offset += glsl_get_cl_size(without);
1145      }
1146   } else if (glsl_type_is_struct(type)) {
1147      for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) {
1148         const struct glsl_type *elt_type = glsl_get_struct_field(type, elt);
1149         unsigned field_offset = glsl_get_struct_field_offset(type, elt);
1150
1151         var_fill_const_array(ctx, c->elements[elt],
1152                              elt_type, const_vals,
1153                              offset + field_offset);
1154      }
1155   } else
1156      unreachable("unknown GLSL type in var_fill_const_array");
1157}
1158
1159static bool
1160emit_global_consts(struct ntd_context *ctx)
1161{
1162   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) {
1163      assert(var->constant_initializer);
1164
1165      unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);
1166      uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);
1167      var_fill_const_array(ctx, var->constant_initializer, var->type,
1168                                 const_ints, 0);
1169      const struct dxil_value **const_vals =
1170         ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);
1171      if (!const_vals)
1172         return false;
1173      for (int i = 0; i < num_members; i++)
1174         const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);
1175
1176      const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);
1177      if (!elt_type)
1178         return false;
1179      const struct dxil_type *type =
1180         dxil_module_get_array_type(&ctx->mod, elt_type, num_members);
1181      if (!type)
1182         return false;
1183      const struct dxil_value *agg_vals =
1184         dxil_module_get_array_const(&ctx->mod, type, const_vals);
1185      if (!agg_vals)
1186         return false;
1187
1188      const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,
1189                                                              DXIL_AS_DEFAULT, 4,
1190                                                              agg_vals);
1191      if (!gvar)
1192         return false;
1193
1194      if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))
1195         return false;
1196   }
1197
1198   return true;
1199}
1200
1201static bool
1202emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1203         unsigned size, unsigned count, char *name)
1204{
1205   assert(count != 0);
1206
1207   unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1208
1209   const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1210   const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1211   const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1212                                                                     &array_type, 1);
1213   // All ubo[1]s should have been lowered to ubo with static indexing
1214   const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1215   resource_array_layout layout = {idx, binding, count, space};
1216   const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1217                                                          name, &layout, 4 * size);
1218
1219   if (!cbv_meta)
1220      return false;
1221
1222   util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1223   add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1224
1225   return true;
1226}
1227
1228static bool
1229emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1230{
1231   unsigned count = 1;
1232   if (glsl_type_is_array(var->type))
1233      count = glsl_get_length(var->type);
1234
1235   char *name = var->name;
1236   char temp_name[30];
1237   if (name && strlen(name) == 0) {
1238      snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1239               ctx->unnamed_ubo_count++);
1240      name = temp_name;
1241   }
1242
1243   const struct glsl_type *type = glsl_without_array(var->type);
1244   assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1245   unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1246
1247   return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1248                   dwords, count, name);
1249}
1250
1251static bool
1252emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1253{
1254   unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1255   unsigned binding = var->data.binding;
1256   resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1257   const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1258   const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1259
1260   if (glsl_type_is_array(var->type))
1261      sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1262
1263   const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1264
1265   if (!sampler_meta)
1266      return false;
1267
1268   util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1269   add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1270
1271   return true;
1272}
1273
1274static bool
1275emit_static_indexing_handles(struct ntd_context *ctx)
1276{
1277   /* Vulkan always uses dynamic handles, from instructions in the NIR */
1278   if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1279      return true;
1280
1281   unsigned last_res_class = -1;
1282   unsigned id = 0;
1283
1284   unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1285      sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1286   for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1287        res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1288        res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1289      enum dxil_resource_class res_class;
1290      const struct dxil_value **handle_array;
1291      switch (res->resource_type) {
1292      case DXIL_RES_SRV_TYPED:
1293      case DXIL_RES_SRV_RAW:
1294      case DXIL_RES_SRV_STRUCTURED:
1295         res_class = DXIL_RESOURCE_CLASS_SRV;
1296         handle_array = ctx->srv_handles;
1297         break;
1298      case DXIL_RES_CBV:
1299         res_class = DXIL_RESOURCE_CLASS_CBV;
1300         handle_array = ctx->cbv_handles;
1301         break;
1302      case DXIL_RES_SAMPLER:
1303         res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1304         handle_array = ctx->sampler_handles;
1305         break;
1306      case DXIL_RES_UAV_RAW:
1307         res_class = DXIL_RESOURCE_CLASS_UAV;
1308         handle_array = ctx->ssbo_handles;
1309         break;
1310      case DXIL_RES_UAV_TYPED:
1311      case DXIL_RES_UAV_STRUCTURED:
1312      case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1313         res_class = DXIL_RESOURCE_CLASS_UAV;
1314         handle_array = ctx->image_handles;
1315         break;
1316      default:
1317         unreachable("Unexpected resource type");
1318      }
1319
1320      if (last_res_class != res_class)
1321         id = 0;
1322      else
1323         id++;
1324      last_res_class = res_class;
1325
1326      if (res->space > 1)
1327         continue;
1328      assert(res->space == 0 ||
1329         (res->space == 1 &&
1330            res->resource_type != DXIL_RES_UAV_RAW &&
1331            ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1332
1333      /* CL uses dynamic handles for the "globals" UAV array, but uses static
1334       * handles for UBOs, textures, and samplers.
1335       */
1336      if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1337          res->resource_type == DXIL_RES_UAV_RAW)
1338         continue;
1339
1340      for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1341         handle_array[i] = emit_createhandle_call_const_index(ctx, res_class, id, i, false);
1342         if (!handle_array[i])
1343            return false;
1344      }
1345   }
1346   return true;
1347}
1348
1349static const struct dxil_mdnode *
1350emit_gs_state(struct ntd_context *ctx)
1351{
1352   const struct dxil_mdnode *gs_state_nodes[5];
1353   const nir_shader *s = ctx->shader;
1354
1355   gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1356   gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1357   gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1358   gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1359   gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1360
1361   for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1362      if (!gs_state_nodes[i])
1363         return NULL;
1364   }
1365
1366   return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1367}
1368
1369static enum dxil_tessellator_domain
1370get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1371{
1372   switch (primitive_mode) {
1373   case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1374   case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1375   case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1376   default:
1377      unreachable("Invalid tessellator primitive mode");
1378   }
1379}
1380
1381static enum dxil_tessellator_partitioning
1382get_tessellator_partitioning(enum gl_tess_spacing spacing)
1383{
1384   switch (spacing) {
1385   default:
1386   case TESS_SPACING_EQUAL:
1387      return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1388   case TESS_SPACING_FRACTIONAL_EVEN:
1389      return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1390   case TESS_SPACING_FRACTIONAL_ODD:
1391      return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1392   }
1393}
1394
1395static enum dxil_tessellator_output_primitive
1396get_tessellator_output_primitive(const struct shader_info *info)
1397{
1398   if (info->tess.point_mode)
1399      return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1400   if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1401      return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1402   /* Note: GL tessellation domain is inverted from D3D, which means triangle
1403    * winding needs to be inverted.
1404    */
1405   if (info->tess.ccw)
1406      return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1407   return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1408}
1409
1410static const struct dxil_mdnode *
1411emit_hs_state(struct ntd_context *ctx)
1412{
1413   const struct dxil_mdnode *hs_state_nodes[7];
1414
1415   hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1416   hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1417   hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1418   hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1419   hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1420   hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1421   hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1422
1423   return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1424}
1425
1426static const struct dxil_mdnode *
1427emit_ds_state(struct ntd_context *ctx)
1428{
1429   const struct dxil_mdnode *ds_state_nodes[2];
1430
1431   ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1432   ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1433
1434   return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1435}
1436
1437static const struct dxil_mdnode *
1438emit_threads(struct ntd_context *ctx)
1439{
1440   const nir_shader *s = ctx->shader;
1441   const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1442   const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1443   const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1444   if (!threads_x || !threads_y || !threads_z)
1445      return false;
1446
1447   const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1448   return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1449}
1450
1451static int64_t
1452get_module_flags(struct ntd_context *ctx)
1453{
1454   /* See the DXIL documentation for the definition of these flags:
1455    *
1456    * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1457    */
1458
1459   uint64_t flags = 0;
1460   if (ctx->mod.feats.doubles)
1461      flags |= (1 << 2);
1462   if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1463       ctx->shader->info.fs.early_fragment_tests)
1464      flags |= (1 << 3);
1465   if (ctx->mod.raw_and_structured_buffers)
1466      flags |= (1 << 4);
1467   if (ctx->mod.feats.min_precision)
1468      flags |= (1 << 5);
1469   if (ctx->mod.feats.dx11_1_double_extensions)
1470      flags |= (1 << 6);
1471   if (ctx->mod.feats.array_layer_from_vs_or_ds)
1472      flags |= (1 << 9);
1473   if (ctx->mod.feats.inner_coverage)
1474      flags |= (1 << 10);
1475   if (ctx->mod.feats.typed_uav_load_additional_formats)
1476      flags |= (1 << 13);
1477   if (ctx->mod.feats.use_64uavs)
1478      flags |= (1 << 15);
1479   if (ctx->mod.feats.uavs_at_every_stage)
1480      flags |= (1 << 16);
1481   if (ctx->mod.feats.cs_4x_raw_sb)
1482      flags |= (1 << 17);
1483   if (ctx->mod.feats.wave_ops)
1484      flags |= (1 << 19);
1485   if (ctx->mod.feats.int64_ops)
1486      flags |= (1 << 20);
1487   if (ctx->mod.feats.barycentrics)
1488      flags |= (1 << 22);
1489   if (ctx->mod.feats.stencil_ref)
1490      flags |= (1 << 11);
1491   if (ctx->mod.feats.native_low_precision)
1492      flags |= (1 << 23) | (1 << 5);
1493
1494   if (ctx->opts->disable_math_refactoring)
1495      flags |= (1 << 1);
1496
1497   return flags;
1498}
1499
1500static const struct dxil_mdnode *
1501emit_entrypoint(struct ntd_context *ctx,
1502                const struct dxil_func *func, const char *name,
1503                const struct dxil_mdnode *signatures,
1504                const struct dxil_mdnode *resources,
1505                const struct dxil_mdnode *shader_props)
1506{
1507   char truncated_name[254] = { 0 };
1508   strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1509
1510   const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1511   const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1512   const struct dxil_mdnode *nodes[] = {
1513      func_md,
1514      name_md,
1515      signatures,
1516      resources,
1517      shader_props
1518   };
1519   return dxil_get_metadata_node(&ctx->mod, nodes,
1520                                 ARRAY_SIZE(nodes));
1521}
1522
1523static const struct dxil_mdnode *
1524emit_resources(struct ntd_context *ctx)
1525{
1526   bool emit_resources = false;
1527   const struct dxil_mdnode *resources_nodes[] = {
1528      NULL, NULL, NULL, NULL
1529   };
1530
1531#define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1532
1533   if (ctx->srv_metadata_nodes.size) {
1534      resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1535      emit_resources = true;
1536   }
1537
1538   if (ctx->uav_metadata_nodes.size) {
1539      resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1540      emit_resources = true;
1541   }
1542
1543   if (ctx->cbv_metadata_nodes.size) {
1544      resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1545      emit_resources = true;
1546   }
1547
1548   if (ctx->sampler_metadata_nodes.size) {
1549      resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1550      emit_resources = true;
1551   }
1552
1553#undef ARRAY_AND_SIZE
1554
1555   return emit_resources ?
1556      dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1557}
1558
1559static boolean
1560emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1561         const struct dxil_mdnode *value_node)
1562{
1563   const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1564   if (!tag_node || !value_node)
1565      return false;
1566   assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1567   ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1568   ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1569
1570   return true;
1571}
1572
1573static bool
1574emit_metadata(struct ntd_context *ctx)
1575{
1576   /* DXIL versions are 1.x for shader model 6.x */
1577   assert(ctx->mod.major_version == 6);
1578   unsigned dxilMajor = 1;
1579   unsigned dxilMinor = ctx->mod.minor_version;
1580   unsigned valMajor = ctx->mod.major_validator;
1581   unsigned valMinor = ctx->mod.minor_validator;
1582   if (!emit_llvm_ident(&ctx->mod) ||
1583       !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1584       !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1585       !emit_dx_shader_model(&ctx->mod))
1586      return false;
1587
1588   const struct dxil_func_def *main_func_def = ctx->main_func_def;
1589   if (!main_func_def)
1590      return false;
1591   const struct dxil_func *main_func = main_func_def->func;
1592
1593   const struct dxil_mdnode *resources_node = emit_resources(ctx);
1594
1595   const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
1596   const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
1597
1598   const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
1599   const struct dxil_mdnode *nodes_4_27_27[] = {
1600      node4, node27, node27
1601   };
1602   const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
1603                                                      ARRAY_SIZE(nodes_4_27_27));
1604
1605   const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
1606
1607   const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
1608   const struct dxil_mdnode *main_type_annotation_nodes[] = {
1609      node3, main_entrypoint, node29
1610   };
1611   const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
1612                                                                           ARRAY_SIZE(main_type_annotation_nodes));
1613
1614   if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
1615      if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
1616         return false;
1617   } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
1618      ctx->tess_input_control_point_count = 32;
1619      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
1620         if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
1621            ctx->tess_input_control_point_count = glsl_array_size(var->type);
1622            break;
1623         }
1624      }
1625
1626      if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
1627         return false;
1628   } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
1629      if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
1630         return false;
1631   } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
1632      if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
1633         return false;
1634   }
1635
1636   uint64_t flags = get_module_flags(ctx);
1637   if (flags != 0) {
1638      if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
1639         return false;
1640   }
1641   const struct dxil_mdnode *shader_properties = NULL;
1642   if (ctx->num_shader_property_nodes > 0) {
1643      shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
1644                                                 ctx->num_shader_property_nodes);
1645      if (!shader_properties)
1646         return false;
1647   }
1648
1649   nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
1650   const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
1651       entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
1652   if (!dx_entry_point)
1653      return false;
1654
1655   if (resources_node) {
1656      const struct dxil_mdnode *dx_resources = resources_node;
1657      dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
1658                                       &dx_resources, 1);
1659   }
1660
1661   const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
1662   return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
1663                                       dx_type_annotations,
1664                                       ARRAY_SIZE(dx_type_annotations)) &&
1665          dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
1666                                       &dx_entry_point, 1);
1667}
1668
1669static const struct dxil_value *
1670bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
1671               const struct dxil_value *value)
1672{
1673   const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
1674   if (!type)
1675      return NULL;
1676
1677   return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1678}
1679
1680static const struct dxil_value *
1681bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
1682                 const struct dxil_value *value)
1683{
1684   const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
1685   if (!type)
1686      return NULL;
1687
1688   return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1689}
1690
1691static void
1692store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan,
1693              const struct dxil_value *value)
1694{
1695   assert(ssa->index < ctx->num_defs);
1696   assert(chan < ssa->num_components);
1697   /* We pre-defined the dest value because of a phi node, so bitcast while storing if the
1698    * base type differs */
1699   if (ctx->defs[ssa->index].chans[chan]) {
1700      const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]);
1701      const struct dxil_type *value_type = dxil_value_get_type(value);
1702      if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type))
1703         value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value);
1704   }
1705   ctx->defs[ssa->index].chans[chan] = value;
1706}
1707
1708static void
1709store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1710                 const struct dxil_value *value)
1711{
1712   assert(dest->is_ssa);
1713   assert(value);
1714   store_ssa_def(ctx, &dest->ssa, chan, value);
1715}
1716
1717static void
1718store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1719           const struct dxil_value *value, nir_alu_type type)
1720{
1721   switch (nir_alu_type_get_base_type(type)) {
1722   case nir_type_float:
1723      if (nir_dest_bit_size(*dest) == 64)
1724         ctx->mod.feats.doubles = true;
1725      store_dest_value(ctx, dest, chan, value);
1726      break;
1727   case nir_type_uint:
1728   case nir_type_int:
1729      if (nir_dest_bit_size(*dest) == 16)
1730         ctx->mod.feats.native_low_precision = true;
1731      if (nir_dest_bit_size(*dest) == 64)
1732         ctx->mod.feats.int64_ops = true;
1733      FALLTHROUGH;
1734   case nir_type_bool:
1735      store_dest_value(ctx, dest, chan, value);
1736      break;
1737   default:
1738      unreachable("unexpected nir_alu_type");
1739   }
1740}
1741
1742static void
1743store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
1744               const struct dxil_value *value)
1745{
1746   assert(!alu->dest.saturate);
1747   store_dest(ctx, &alu->dest.dest, chan, value,
1748              nir_op_infos[alu->op].output_type);
1749}
1750
1751static const struct dxil_value *
1752get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan)
1753{
1754   assert(ssa->index < ctx->num_defs);
1755   assert(chan < ssa->num_components);
1756   assert(ctx->defs[ssa->index].chans[chan]);
1757   return ctx->defs[ssa->index].chans[chan];
1758}
1759
1760static const struct dxil_value *
1761get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
1762        nir_alu_type type)
1763{
1764   assert(src->is_ssa);
1765   const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
1766
1767   const int bit_size = nir_src_bit_size(*src);
1768
1769   switch (nir_alu_type_get_base_type(type)) {
1770   case nir_type_int:
1771   case nir_type_uint: {
1772      assert(bit_size != 64 || ctx->mod.feats.int64_ops);
1773      const struct dxil_type *expect_type =  dxil_module_get_int_type(&ctx->mod, bit_size);
1774      /* nohing to do */
1775      if (dxil_value_type_equal_to(value, expect_type))
1776         return value;
1777      assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1778      return bitcast_to_int(ctx,  bit_size, value);
1779      }
1780
1781   case nir_type_float:
1782      assert(nir_src_bit_size(*src) >= 16);
1783      assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
1784      if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size)))
1785         return value;
1786      assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1787      return bitcast_to_float(ctx, bit_size, value);
1788
1789   case nir_type_bool:
1790      if (!dxil_value_type_bitsize_equal_to(value, 1)) {
1791         return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
1792                               dxil_module_get_int_type(&ctx->mod, 1), value);
1793      }
1794      return value;
1795
1796   default:
1797      unreachable("unexpected nir_alu_type");
1798   }
1799}
1800
1801static const struct dxil_type *
1802get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1803{
1804   assert(!alu->src[src].abs);
1805   assert(!alu->src[src].negate);
1806   nir_ssa_def *ssa_src = alu->src[src].src.ssa;
1807   unsigned chan = alu->src[src].swizzle[0];
1808   const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan);
1809   return dxil_value_get_type(value);
1810}
1811
1812static const struct dxil_value *
1813get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1814{
1815   assert(!alu->src[src].abs);
1816   assert(!alu->src[src].negate);
1817
1818   unsigned chan = alu->src[src].swizzle[0];
1819   return get_src(ctx, &alu->src[src].src, chan,
1820                  nir_op_infos[alu->op].input_types[src]);
1821}
1822
1823static bool
1824emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
1825           enum dxil_bin_opcode opcode,
1826           const struct dxil_value *op0, const struct dxil_value *op1)
1827{
1828   bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
1829
1830   enum dxil_opt_flags flags = 0;
1831   if (is_float_op && !alu->exact)
1832      flags |= DXIL_UNSAFE_ALGEBRA;
1833
1834   const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
1835   if (!v)
1836      return false;
1837   store_alu_dest(ctx, alu, 0, v);
1838   return true;
1839}
1840
1841static bool
1842emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
1843           enum dxil_bin_opcode opcode,
1844           const struct dxil_value *op0, const struct dxil_value *op1)
1845{
1846   unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
1847   unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
1848   if (op0_bit_size != op1_bit_size) {
1849      const struct dxil_type *type =
1850         dxil_module_get_int_type(&ctx->mod, op0_bit_size);
1851      enum dxil_cast_opcode cast_op =
1852         op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
1853      op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
1854   }
1855
1856   const struct dxil_value *v =
1857      dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
1858   if (!v)
1859      return false;
1860   store_alu_dest(ctx, alu, 0, v);
1861   return true;
1862}
1863
1864static bool
1865emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
1866         enum dxil_cmp_pred pred,
1867         const struct dxil_value *op0, const struct dxil_value *op1)
1868{
1869   const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
1870   if (!v)
1871      return false;
1872   store_alu_dest(ctx, alu, 0, v);
1873   return true;
1874}
1875
1876static enum dxil_cast_opcode
1877get_cast_op(nir_alu_instr *alu)
1878{
1879   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1880   unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1881
1882   switch (alu->op) {
1883   /* bool -> int */
1884   case nir_op_b2i16:
1885   case nir_op_b2i32:
1886   case nir_op_b2i64:
1887      return DXIL_CAST_ZEXT;
1888
1889   /* float -> float */
1890   case nir_op_f2f16_rtz:
1891   case nir_op_f2f32:
1892   case nir_op_f2f64:
1893      assert(dst_bits != src_bits);
1894      if (dst_bits < src_bits)
1895         return DXIL_CAST_FPTRUNC;
1896      else
1897         return DXIL_CAST_FPEXT;
1898
1899   /* int -> int */
1900   case nir_op_i2i16:
1901   case nir_op_i2i32:
1902   case nir_op_i2i64:
1903      assert(dst_bits != src_bits);
1904      if (dst_bits < src_bits)
1905         return DXIL_CAST_TRUNC;
1906      else
1907         return DXIL_CAST_SEXT;
1908
1909   /* uint -> uint */
1910   case nir_op_u2u16:
1911   case nir_op_u2u32:
1912   case nir_op_u2u64:
1913      assert(dst_bits != src_bits);
1914      if (dst_bits < src_bits)
1915         return DXIL_CAST_TRUNC;
1916      else
1917         return DXIL_CAST_ZEXT;
1918
1919   /* float -> int */
1920   case nir_op_f2i16:
1921   case nir_op_f2i32:
1922   case nir_op_f2i64:
1923      return DXIL_CAST_FPTOSI;
1924
1925   /* float -> uint */
1926   case nir_op_f2u16:
1927   case nir_op_f2u32:
1928   case nir_op_f2u64:
1929      return DXIL_CAST_FPTOUI;
1930
1931   /* int -> float */
1932   case nir_op_i2f16:
1933   case nir_op_i2f32:
1934   case nir_op_i2f64:
1935      return DXIL_CAST_SITOFP;
1936
1937   /* uint -> float */
1938   case nir_op_u2f16:
1939   case nir_op_u2f32:
1940   case nir_op_u2f64:
1941      return DXIL_CAST_UITOFP;
1942
1943   default:
1944      unreachable("unexpected cast op");
1945   }
1946}
1947
1948static const struct dxil_type *
1949get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
1950{
1951   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1952   switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
1953   case nir_type_bool:
1954      assert(dst_bits == 1);
1955      FALLTHROUGH;
1956   case nir_type_int:
1957   case nir_type_uint:
1958      return dxil_module_get_int_type(&ctx->mod, dst_bits);
1959
1960   case nir_type_float:
1961      return dxil_module_get_float_type(&ctx->mod, dst_bits);
1962
1963   default:
1964      unreachable("unknown nir_alu_type");
1965   }
1966}
1967
1968static bool
1969is_double(nir_alu_type alu_type, unsigned bit_size)
1970{
1971   return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
1972          bit_size == 64;
1973}
1974
1975static bool
1976emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
1977          const struct dxil_value *value)
1978{
1979   enum dxil_cast_opcode opcode = get_cast_op(alu);
1980   const struct dxil_type *type = get_cast_dest_type(ctx, alu);
1981   if (!type)
1982      return false;
1983
1984   const nir_op_info *info = &nir_op_infos[alu->op];
1985   switch (opcode) {
1986   case DXIL_CAST_UITOFP:
1987   case DXIL_CAST_SITOFP:
1988      if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest)))
1989         ctx->mod.feats.dx11_1_double_extensions = true;
1990      break;
1991   case DXIL_CAST_FPTOUI:
1992   case DXIL_CAST_FPTOSI:
1993      if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
1994         ctx->mod.feats.dx11_1_double_extensions = true;
1995      break;
1996   default:
1997      break;
1998   }
1999
2000   const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2001                                               value);
2002   if (!v)
2003      return false;
2004   store_alu_dest(ctx, alu, 0, v);
2005   return true;
2006}
2007
2008static enum overload_type
2009get_overload(nir_alu_type alu_type, unsigned bit_size)
2010{
2011   switch (nir_alu_type_get_base_type(alu_type)) {
2012   case nir_type_int:
2013   case nir_type_uint:
2014      switch (bit_size) {
2015      case 16: return DXIL_I16;
2016      case 32: return DXIL_I32;
2017      case 64: return DXIL_I64;
2018      default:
2019         unreachable("unexpected bit_size");
2020      }
2021   case nir_type_float:
2022      switch (bit_size) {
2023      case 16: return DXIL_F16;
2024      case 32: return DXIL_F32;
2025      case 64: return DXIL_F64;
2026      default:
2027         unreachable("unexpected bit_size");
2028      }
2029   default:
2030      unreachable("unexpected output type");
2031   }
2032}
2033
2034static bool
2035emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2036                 enum dxil_intr intr, const struct dxil_value *op)
2037{
2038   const nir_op_info *info = &nir_op_infos[alu->op];
2039   unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2040   enum overload_type overload = get_overload(info->input_types[0], src_bits);
2041
2042   const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2043   if (!v)
2044      return false;
2045   store_alu_dest(ctx, alu, 0, v);
2046   return true;
2047}
2048
2049static bool
2050emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2051                  enum dxil_intr intr,
2052                  const struct dxil_value *op0, const struct dxil_value *op1)
2053{
2054   const nir_op_info *info = &nir_op_infos[alu->op];
2055   assert(info->output_type == info->input_types[0]);
2056   assert(info->output_type == info->input_types[1]);
2057   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
2058   assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2059   assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2060   enum overload_type overload = get_overload(info->output_type, dst_bits);
2061
2062   const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2063                                                 op0, op1);
2064   if (!v)
2065      return false;
2066   store_alu_dest(ctx, alu, 0, v);
2067   return true;
2068}
2069
2070static bool
2071emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2072                    enum dxil_intr intr,
2073                    const struct dxil_value *op0,
2074                    const struct dxil_value *op1,
2075                    const struct dxil_value *op2)
2076{
2077   const nir_op_info *info = &nir_op_infos[alu->op];
2078   unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
2079   assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2080   assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2081   assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2082
2083   assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2084   assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2085   assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2086
2087   enum overload_type overload = get_overload(info->output_type, dst_bits);
2088
2089   const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2090                                                   op0, op1, op2);
2091   if (!v)
2092      return false;
2093   store_alu_dest(ctx, alu, 0, v);
2094   return true;
2095}
2096
2097static bool
2098emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2099                     const struct dxil_value *base,
2100                     const struct dxil_value *insert,
2101                     const struct dxil_value *offset,
2102                     const struct dxil_value *width)
2103{
2104   /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2105   const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2106                                                     width, offset, insert, base);
2107   if (!v)
2108      return false;
2109
2110   /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2111   const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2112      width, dxil_module_get_int32_const(&ctx->mod, 32));
2113   v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2114   store_alu_dest(ctx, alu, 0, v);
2115   return true;
2116}
2117
2118static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2119                        const struct dxil_value *sel,
2120                        const struct dxil_value *val_true,
2121                        const struct dxil_value *val_false)
2122{
2123   assert(sel);
2124   assert(val_true);
2125   assert(val_false);
2126
2127   const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2128   if (!v)
2129      return false;
2130
2131   store_alu_dest(ctx, alu, 0, v);
2132   return true;
2133}
2134
2135static bool
2136emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2137{
2138   assert(val);
2139
2140   struct dxil_module *m = &ctx->mod;
2141
2142   const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2143   const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2144
2145   if (!c0 || !c1)
2146      return false;
2147
2148   return emit_select(ctx, alu, val, c1, c0);
2149}
2150
2151static bool
2152emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2153{
2154   assert(val);
2155
2156   struct dxil_module *m = &ctx->mod;
2157
2158   const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2159   const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2160
2161   if (!c0 || !c1)
2162      return false;
2163
2164   return emit_select(ctx, alu, val, c1, c0);
2165}
2166
2167static bool
2168emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2169{
2170   assert(val);
2171
2172   struct dxil_module *m = &ctx->mod;
2173
2174   const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2175   const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2176
2177   if (!c0 || !c1)
2178      return false;
2179
2180   ctx->mod.feats.doubles = 1;
2181   return emit_select(ctx, alu, val, c1, c0);
2182}
2183
2184static bool
2185emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2186{
2187   assert(val);
2188
2189   const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f);
2190   return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero);
2191}
2192
2193static bool
2194emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2195{
2196   if (shift) {
2197      val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2198         dxil_module_get_int32_const(&ctx->mod, 16), 0);
2199      if (!val)
2200         return false;
2201   }
2202
2203   const struct dxil_func *func = dxil_get_function(&ctx->mod,
2204                                                    "dx.op.legacyF16ToF32",
2205                                                    DXIL_NONE);
2206   if (!func)
2207      return false;
2208
2209   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2210   if (!opcode)
2211      return false;
2212
2213   const struct dxil_value *args[] = {
2214     opcode,
2215     val
2216   };
2217
2218   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2219   if (!v)
2220      return false;
2221   store_alu_dest(ctx, alu, 0, v);
2222   return true;
2223}
2224
2225static bool
2226emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2227{
2228   const struct dxil_func *func = dxil_get_function(&ctx->mod,
2229                                                    "dx.op.legacyF32ToF16",
2230                                                    DXIL_NONE);
2231   if (!func)
2232      return false;
2233
2234   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2235   if (!opcode)
2236      return false;
2237
2238   const struct dxil_value *args[] = {
2239     opcode,
2240     val0
2241   };
2242
2243   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2244   if (!v)
2245      return false;
2246
2247   if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2248      args[1] = val1;
2249      const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2250      if (!v_high)
2251         return false;
2252
2253      v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2254         dxil_module_get_int32_const(&ctx->mod, 16), 0);
2255      if (!v_high)
2256         return false;
2257
2258      v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2259      if (!v)
2260         return false;
2261   }
2262
2263   store_alu_dest(ctx, alu, 0, v);
2264   return true;
2265}
2266
2267static bool
2268emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2269{
2270   const struct dxil_type *type = get_alu_src_type(ctx, alu, 0);
2271   nir_alu_type t = dxil_type_to_nir_type(type);
2272
2273   for (unsigned i = 0; i < num_inputs; i++) {
2274      const struct dxil_value *src =
2275         get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t);
2276      if (!src)
2277         return false;
2278
2279      store_alu_dest(ctx, alu, i, src);
2280   }
2281   return true;
2282}
2283
2284static bool
2285emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2286{
2287   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2288   if (!func)
2289      return false;
2290
2291   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2292   if (!opcode)
2293      return false;
2294
2295   const struct dxil_value *args[3] = {
2296      opcode,
2297      get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2298      get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2299   };
2300   if (!args[1] || !args[2])
2301      return false;
2302
2303   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2304   if (!v)
2305      return false;
2306   store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64);
2307   return true;
2308}
2309
2310static bool
2311emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2312{
2313   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2314   if (!func)
2315      return false;
2316
2317   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2318   if (!opcode)
2319      return false;
2320
2321   const struct dxil_value *args[] = {
2322      opcode,
2323      get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2324   };
2325   if (!args[1])
2326      return false;
2327
2328   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2329   if (!v)
2330      return false;
2331
2332   const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2333   const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2334   if (!hi || !lo)
2335      return false;
2336
2337   store_dest_value(ctx, &alu->dest.dest, 0, hi);
2338   store_dest_value(ctx, &alu->dest.dest, 1, lo);
2339   return true;
2340}
2341
2342static bool
2343emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2344{
2345   /* handle vec-instructions first; they are the only ones that produce
2346    * vector results.
2347    */
2348   switch (alu->op) {
2349   case nir_op_vec2:
2350   case nir_op_vec3:
2351   case nir_op_vec4:
2352   case nir_op_vec8:
2353   case nir_op_vec16:
2354      return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2355   case nir_op_mov: {
2356         assert(nir_dest_num_components(alu->dest.dest) == 1);
2357         store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx,
2358                        alu->src->src.ssa, alu->src->swizzle[0]));
2359         return true;
2360      }
2361   case nir_op_pack_double_2x32_dxil:
2362      return emit_make_double(ctx, alu);
2363   case nir_op_unpack_double_2x32_dxil:
2364      return emit_split_double(ctx, alu);
2365   default:
2366      /* silence warnings */
2367      ;
2368   }
2369
2370   /* other ops should be scalar */
2371   assert(alu->dest.write_mask == 1);
2372   const struct dxil_value *src[4];
2373   assert(nir_op_infos[alu->op].num_inputs <= 4);
2374   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2375      src[i] = get_alu_src(ctx, alu, i);
2376      if (!src[i])
2377         return false;
2378   }
2379
2380   switch (alu->op) {
2381   case nir_op_iadd:
2382   case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2383
2384   case nir_op_isub:
2385   case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2386
2387   case nir_op_imul:
2388   case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2389
2390   case nir_op_fdiv:
2391      if (alu->dest.dest.ssa.bit_size == 64)
2392         ctx->mod.feats.dx11_1_double_extensions = 1;
2393      FALLTHROUGH;
2394   case nir_op_idiv:
2395      return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2396
2397   case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]);
2398   case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2399   case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2400   case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2401   case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2402   case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2403   case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2404   case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2405   case nir_op_ior:  return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2406   case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2407   case nir_op_inot: {
2408      unsigned bit_size = alu->dest.dest.ssa.bit_size;
2409      intmax_t val = bit_size == 1 ? 1 : -1;
2410      const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2411      return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2412   }
2413   case nir_op_ieq:  return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2414   case nir_op_ine:  return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2415   case nir_op_ige:  return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2416   case nir_op_uge:  return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2417   case nir_op_ilt:  return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2418   case nir_op_ult:  return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2419   case nir_op_feq:  return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2420   case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2421   case nir_op_flt:  return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2422   case nir_op_fge:  return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2423   case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]);
2424   case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2425   case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2426   case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2427   case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2428   case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2429   case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2430   case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2431   case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2432   case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2433   case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2434   case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2435
2436   case nir_op_fddx:
2437   case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
2438   case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);
2439   case nir_op_fddy:
2440   case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);
2441   case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);
2442
2443   case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2444   case nir_op_frcp: {
2445         const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2446         return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2447      }
2448   case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2449   case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2450   case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2451   case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2452   case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
2453   case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
2454   case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
2455   case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
2456   case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
2457   case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
2458   case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
2459   case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
2460   case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
2461   case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
2462   case nir_op_ffma:
2463      if (alu->dest.dest.ssa.bit_size == 64)
2464         ctx->mod.feats.dx11_1_double_extensions = 1;
2465      return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
2466
2467   case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
2468   case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
2469   case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
2470
2471   case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
2472   case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
2473   case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
2474
2475   case nir_op_b2i16:
2476   case nir_op_i2i16:
2477   case nir_op_f2i16:
2478   case nir_op_f2u16:
2479   case nir_op_u2u16:
2480   case nir_op_u2f16:
2481   case nir_op_i2f16:
2482   case nir_op_f2f16_rtz:
2483   case nir_op_b2i32:
2484   case nir_op_f2f32:
2485   case nir_op_f2i32:
2486   case nir_op_f2u32:
2487   case nir_op_i2f32:
2488   case nir_op_i2i32:
2489   case nir_op_u2f32:
2490   case nir_op_u2u32:
2491   case nir_op_b2i64:
2492   case nir_op_f2f64:
2493   case nir_op_f2i64:
2494   case nir_op_f2u64:
2495   case nir_op_i2f64:
2496   case nir_op_i2i64:
2497   case nir_op_u2f64:
2498   case nir_op_u2u64:
2499      return emit_cast(ctx, alu, src[0]);
2500
2501   case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]);
2502   case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
2503   case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
2504   case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
2505   default:
2506      NIR_INSTR_UNSUPPORTED(&alu->instr);
2507      assert("Unimplemented ALU instruction");
2508      return false;
2509   }
2510}
2511
2512static const struct dxil_value *
2513load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
2514         const struct dxil_value *offset, enum overload_type overload)
2515{
2516   assert(handle && offset);
2517
2518   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
2519   if (!opcode)
2520      return NULL;
2521
2522   const struct dxil_value *args[] = {
2523      opcode, handle, offset
2524   };
2525
2526   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
2527   if (!func)
2528      return NULL;
2529   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2530}
2531
2532static bool
2533emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, nir_scope execution_scope, nir_scope mem_scope)
2534{
2535   const struct dxil_value *opcode, *mode;
2536   const struct dxil_func *func;
2537   uint32_t flags = 0;
2538
2539   if (execution_scope == NIR_SCOPE_WORKGROUP)
2540      flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
2541
2542   if (modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) {
2543      if (mem_scope > NIR_SCOPE_WORKGROUP)
2544         flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
2545      else
2546         flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
2547   }
2548
2549   if (modes & nir_var_mem_shared)
2550      flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
2551
2552   func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
2553   if (!func)
2554      return false;
2555
2556   opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
2557   if (!opcode)
2558      return false;
2559
2560   mode = dxil_module_get_int32_const(&ctx->mod, flags);
2561   if (!mode)
2562      return false;
2563
2564   const struct dxil_value *args[] = { opcode, mode };
2565
2566   return dxil_emit_call_void(&ctx->mod, func,
2567                              args, ARRAY_SIZE(args));
2568}
2569
2570static bool
2571emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2572{
2573   return emit_barrier_impl(ctx,
2574      nir_intrinsic_memory_modes(intr),
2575      nir_intrinsic_execution_scope(intr),
2576      nir_intrinsic_memory_scope(intr));
2577}
2578
2579/* Memory barrier for UAVs (buffers/images) at cross-workgroup scope */
2580static bool
2581emit_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2582{
2583   return emit_barrier_impl(ctx,
2584      nir_var_mem_global,
2585      NIR_SCOPE_NONE,
2586      NIR_SCOPE_DEVICE);
2587}
2588
2589/* Memory barrier for TGSM */
2590static bool
2591emit_memory_barrier_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2592{
2593   return emit_barrier_impl(ctx,
2594      nir_var_mem_shared,
2595      NIR_SCOPE_NONE,
2596      NIR_SCOPE_WORKGROUP);
2597}
2598
2599/* Memory barrier for all intra-workgroup memory accesses (UAVs and TGSM) */
2600static bool
2601emit_group_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2602{
2603   return emit_barrier_impl(ctx,
2604      nir_var_mem_shared | nir_var_mem_global,
2605      NIR_SCOPE_NONE,
2606      NIR_SCOPE_WORKGROUP);
2607}
2608
2609static bool
2610emit_control_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2611{
2612   return emit_barrier_impl(ctx,
2613      nir_var_mem_shared,
2614      NIR_SCOPE_WORKGROUP,
2615      NIR_SCOPE_NONE);
2616}
2617
2618static bool
2619emit_load_global_invocation_id(struct ntd_context *ctx,
2620                                    nir_intrinsic_instr *intr)
2621{
2622   assert(intr->dest.is_ssa);
2623   nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2624
2625   for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2626      if (comps & (1 << i)) {
2627         const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2628         if (!idx)
2629            return false;
2630         const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
2631
2632         if (!globalid)
2633            return false;
2634
2635         store_dest_value(ctx, &intr->dest, i, globalid);
2636      }
2637   }
2638   return true;
2639}
2640
2641static bool
2642emit_load_local_invocation_id(struct ntd_context *ctx,
2643                              nir_intrinsic_instr *intr)
2644{
2645   assert(intr->dest.is_ssa);
2646   nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2647
2648   for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2649      if (comps & (1 << i)) {
2650         const struct dxil_value
2651            *idx = dxil_module_get_int32_const(&ctx->mod, i);
2652         if (!idx)
2653            return false;
2654         const struct dxil_value
2655            *threadidingroup = emit_threadidingroup_call(ctx, idx);
2656         if (!threadidingroup)
2657            return false;
2658         store_dest_value(ctx, &intr->dest, i, threadidingroup);
2659      }
2660   }
2661   return true;
2662}
2663
2664static bool
2665emit_load_local_invocation_index(struct ntd_context *ctx,
2666                                 nir_intrinsic_instr *intr)
2667{
2668   assert(intr->dest.is_ssa);
2669
2670   const struct dxil_value
2671      *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
2672   if (!flattenedthreadidingroup)
2673      return false;
2674   store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup);
2675
2676   return true;
2677}
2678
2679static bool
2680emit_load_local_workgroup_id(struct ntd_context *ctx,
2681                              nir_intrinsic_instr *intr)
2682{
2683   assert(intr->dest.is_ssa);
2684   nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2685
2686   for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2687      if (comps & (1 << i)) {
2688         const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2689         if (!idx)
2690            return false;
2691         const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
2692         if (!groupid)
2693            return false;
2694         store_dest_value(ctx, &intr->dest, i, groupid);
2695      }
2696   }
2697   return true;
2698}
2699
2700static const struct dxil_value *
2701call_unary_external_function(struct ntd_context *ctx,
2702                             const char *name,
2703                             int32_t dxil_intr)
2704{
2705   const struct dxil_func *func =
2706      dxil_get_function(&ctx->mod, name, DXIL_I32);
2707   if (!func)
2708      return false;
2709
2710   const struct dxil_value *opcode =
2711      dxil_module_get_int32_const(&ctx->mod, dxil_intr);
2712   if (!opcode)
2713      return false;
2714
2715   const struct dxil_value *args[] = {opcode};
2716
2717   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2718}
2719
2720static bool
2721emit_load_unary_external_function(struct ntd_context *ctx,
2722                                  nir_intrinsic_instr *intr, const char *name,
2723                                  int32_t dxil_intr)
2724{
2725   const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr);
2726   store_dest_value(ctx, &intr->dest, 0, value);
2727
2728   return true;
2729}
2730
2731static bool
2732emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2733{
2734   const struct dxil_value *value = call_unary_external_function(ctx,
2735      "dx.op.coverage", DXIL_INTR_COVERAGE);
2736
2737   /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
2738   if (ctx->mod.info.has_per_sample_input) {
2739      value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
2740         dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
2741            dxil_module_get_int32_const(&ctx->mod, 1),
2742            call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX), 0), 0);
2743   }
2744
2745   store_dest_value(ctx, &intr->dest, 0, value);
2746   return true;
2747}
2748
2749static bool
2750emit_load_tess_coord(struct ntd_context *ctx,
2751                     nir_intrinsic_instr *intr)
2752{
2753   const struct dxil_func *func =
2754      dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
2755   if (!func)
2756      return false;
2757
2758   const struct dxil_value *opcode =
2759      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
2760   if (!opcode)
2761      return false;
2762
2763   unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
2764   for (unsigned i = 0; i < num_coords; ++i) {
2765      unsigned component_idx = i;
2766
2767      const struct dxil_value *component = dxil_module_get_int32_const(&ctx->mod, component_idx);
2768      if (!component)
2769         return false;
2770
2771      const struct dxil_value *args[] = { opcode, component };
2772
2773      const struct dxil_value *value =
2774         dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2775      store_dest_value(ctx, &intr->dest, i, value);
2776   }
2777
2778   for (unsigned i = num_coords; i < intr->dest.ssa.num_components; ++i) {
2779      const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
2780      store_dest_value(ctx, &intr->dest, i, value);
2781   }
2782
2783   return true;
2784}
2785
2786static const struct dxil_value *
2787get_int32_undef(struct dxil_module *m)
2788{
2789   const struct dxil_type *int32_type =
2790      dxil_module_get_int_type(m, 32);
2791   if (!int32_type)
2792      return NULL;
2793
2794   return dxil_module_get_undef(m, int32_type);
2795}
2796
2797static const struct dxil_value *
2798emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,
2799                   const struct dxil_value *index)
2800{
2801   assert(var->data.mode == nir_var_shader_temp);
2802
2803   struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);
2804   assert(he != NULL);
2805   const struct dxil_value *ptr = he->data;
2806
2807   const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);
2808   if (!zero)
2809      return NULL;
2810
2811   const struct dxil_value *ops[] = { ptr, zero, index };
2812   return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2813}
2814
2815static const struct dxil_value *
2816get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
2817                    enum dxil_resource_kind kind)
2818{
2819   /* This source might be one of:
2820    * 1. Constant resource index - just look it up in precomputed handle arrays
2821    *    If it's null in that array, create a handle, and store the result
2822    * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
2823    * 3. Dynamic resource index - create a handle for it here
2824    */
2825   assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
2826   nir_const_value *const_block_index = nir_src_as_const_value(*src);
2827   const struct dxil_value **handle_entry = NULL;
2828   if (const_block_index) {
2829      assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
2830      switch (kind) {
2831      case DXIL_RESOURCE_KIND_CBUFFER:
2832         handle_entry = &ctx->cbv_handles[const_block_index->u32];
2833         break;
2834      case DXIL_RESOURCE_KIND_RAW_BUFFER:
2835         if (class == DXIL_RESOURCE_CLASS_UAV)
2836            handle_entry = &ctx->ssbo_handles[const_block_index->u32];
2837         else
2838            handle_entry = &ctx->srv_handles[const_block_index->u32];
2839         break;
2840      case DXIL_RESOURCE_KIND_SAMPLER:
2841         handle_entry = &ctx->sampler_handles[const_block_index->u32];
2842         break;
2843      default:
2844         if (class == DXIL_RESOURCE_CLASS_UAV)
2845            handle_entry = &ctx->image_handles[const_block_index->u32];
2846         else
2847            handle_entry = &ctx->srv_handles[const_block_index->u32];
2848         break;
2849      }
2850   }
2851
2852   if (handle_entry && *handle_entry)
2853      return *handle_entry;
2854
2855   const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0);
2856   if (nir_src_as_deref(*src) ||
2857       ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
2858      return value;
2859   }
2860
2861   unsigned space = 0;
2862   if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
2863       class == DXIL_RESOURCE_CLASS_UAV) {
2864      if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
2865         space = 2;
2866      else
2867         space = 1;
2868   }
2869
2870   /* The base binding here will almost always be zero. The only cases where we end
2871    * up in this type of dynamic indexing are:
2872    * 1. GL UBOs
2873    * 2. GL SSBOs
2874    * 2. CL SSBOs
2875    * In all cases except GL UBOs, the resources are a single zero-based array.
2876    * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
2877    * indexed. All other cases should either fall into static indexing (first early return),
2878    * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
2879    * load_vulkan_descriptor handle creation.
2880    */
2881   unsigned base_binding = 0;
2882   if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
2883       class == DXIL_RESOURCE_CLASS_CBV)
2884      base_binding = 1;
2885
2886   const struct dxil_value *handle = emit_createhandle_call(ctx, class,
2887      get_resource_id(ctx, class, space, base_binding), value, !const_block_index);
2888   if (handle_entry)
2889      *handle_entry = handle;
2890
2891   return handle;
2892}
2893
2894static bool
2895emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2896{
2897   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2898
2899   enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
2900   if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
2901      nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
2902      if (var && var->data.access & ACCESS_NON_WRITEABLE)
2903         class = DXIL_RESOURCE_CLASS_SRV;
2904   }
2905
2906   const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
2907   const struct dxil_value *offset =
2908      get_src(ctx, &intr->src[1], 0, nir_type_uint);
2909   if (!int32_undef || !handle || !offset)
2910      return false;
2911
2912   assert(nir_src_bit_size(intr->src[0]) == 32);
2913   assert(nir_intrinsic_dest_components(intr) <= 4);
2914
2915   const struct dxil_value *coord[2] = {
2916      offset,
2917      int32_undef
2918   };
2919
2920   const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32);
2921   if (!load)
2922      return false;
2923
2924   for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2925      const struct dxil_value *val =
2926         dxil_emit_extractval(&ctx->mod, load, i);
2927      if (!val)
2928         return false;
2929      store_dest_value(ctx, &intr->dest, i, val);
2930   }
2931   return true;
2932}
2933
2934static bool
2935emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2936{
2937   const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
2938   const struct dxil_value *offset =
2939      get_src(ctx, &intr->src[2], 0, nir_type_uint);
2940   if (!handle || !offset)
2941      return false;
2942
2943   assert(nir_src_bit_size(intr->src[0]) == 32);
2944   unsigned num_components = nir_src_num_components(intr->src[0]);
2945   assert(num_components <= 4);
2946   const struct dxil_value *value[4];
2947   for (unsigned i = 0; i < num_components; ++i) {
2948      value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);
2949      if (!value[i])
2950         return false;
2951   }
2952
2953   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2954   if (!int32_undef)
2955      return false;
2956
2957   const struct dxil_value *coord[2] = {
2958      offset,
2959      int32_undef
2960   };
2961
2962   for (int i = num_components; i < 4; ++i)
2963      value[i] = int32_undef;
2964
2965   const struct dxil_value *write_mask =
2966      dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
2967   if (!write_mask)
2968      return false;
2969
2970   return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);
2971}
2972
2973static bool
2974emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2975{
2976   const struct dxil_value *value =
2977      get_src(ctx, &intr->src[0], 0, nir_type_uint);
2978   const struct dxil_value *mask =
2979      get_src(ctx, &intr->src[1], 0, nir_type_uint);
2980   const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
2981   const struct dxil_value *offset =
2982      get_src(ctx, &intr->src[3], 0, nir_type_uint);
2983   if (!value || !mask || !handle || !offset)
2984      return false;
2985
2986   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2987   if (!int32_undef)
2988      return false;
2989
2990   const struct dxil_value *coord[3] = {
2991      offset, int32_undef, int32_undef
2992   };
2993
2994   return
2995      emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&
2996      emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;
2997}
2998
2999static bool
3000emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3001{
3002   const struct dxil_value *zero, *index;
3003
3004   /* All shared mem accesses should have been lowered to scalar 32bit
3005    * accesses.
3006    */
3007   assert(nir_src_bit_size(intr->src[0]) == 32);
3008   assert(nir_src_num_components(intr->src[0]) == 1);
3009
3010   zero = dxil_module_get_int32_const(&ctx->mod, 0);
3011   if (!zero)
3012      return false;
3013
3014   if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
3015      index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3016   else
3017      index = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3018   if (!index)
3019      return false;
3020
3021   const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3022   const struct dxil_value *ptr, *value;
3023
3024   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3025   if (!ptr)
3026      return false;
3027
3028   value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3029   if (!value)
3030      return false;
3031
3032   if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
3033      return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
3034
3035   const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3036   if (!mask)
3037      return false;
3038
3039   if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,
3040                            DXIL_ATOMIC_ORDERING_ACQREL,
3041                            DXIL_SYNC_SCOPE_CROSSTHREAD))
3042      return false;
3043
3044   if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,
3045                            DXIL_ATOMIC_ORDERING_ACQREL,
3046                            DXIL_SYNC_SCOPE_CROSSTHREAD))
3047      return false;
3048
3049   return true;
3050}
3051
3052static bool
3053emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3054{
3055   const struct dxil_value *zero, *index;
3056
3057   /* All scratch mem accesses should have been lowered to scalar 32bit
3058    * accesses.
3059    */
3060   assert(nir_src_bit_size(intr->src[0]) == 32);
3061   assert(nir_src_num_components(intr->src[0]) == 1);
3062
3063   zero = dxil_module_get_int32_const(&ctx->mod, 0);
3064   if (!zero)
3065      return false;
3066
3067   index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3068   if (!index)
3069      return false;
3070
3071   const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3072   const struct dxil_value *ptr, *value;
3073
3074   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3075   if (!ptr)
3076      return false;
3077
3078   value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3079   if (!value)
3080      return false;
3081
3082   return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
3083}
3084
3085static bool
3086emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3087{
3088   const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3089   if (!handle)
3090      return false;
3091
3092   const struct dxil_value *offset;
3093   nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]);
3094   if (const_offset) {
3095      offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4);
3096   } else {
3097      const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3098      const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4);
3099      if (!offset_src || !c4)
3100         return false;
3101
3102      offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0);
3103   }
3104
3105   const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32);
3106
3107   if (!agg)
3108      return false;
3109
3110   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3111      const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i);
3112      store_dest(ctx, &intr->dest, i, retval,
3113                 nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool);
3114   }
3115   return true;
3116}
3117
3118static bool
3119emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3120{
3121   assert(nir_dest_num_components(intr->dest) <= 4);
3122   assert(nir_dest_bit_size(intr->dest) == 32);
3123
3124   const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3125   const struct dxil_value *offset =
3126      get_src(ctx, &intr->src[1], 0, nir_type_uint);
3127
3128   if (!handle || !offset)
3129      return false;
3130
3131   const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32);
3132   if (!agg)
3133      return false;
3134
3135   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++)
3136      store_dest_value(ctx, &intr->dest, i,
3137                       dxil_emit_extractval(&ctx->mod, agg, i));
3138
3139   return true;
3140}
3141
3142/* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3143 * between control points and patch variables in HS/DS
3144 */
3145static nir_variable *
3146find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3147{
3148   nir_foreach_variable_with_modes(var, s, mode) {
3149      if (var->data.driver_location == driver_location &&
3150          var->data.patch == patch)
3151         return var;
3152   }
3153   return NULL;
3154}
3155
3156static bool
3157emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3158{
3159   assert(intr->intrinsic == nir_intrinsic_store_output ||
3160          ctx->mod.shader_kind == DXIL_HULL_SHADER);
3161   bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3162      ctx->mod.shader_kind == DXIL_HULL_SHADER;
3163   nir_alu_type out_type = nir_intrinsic_src_type(intr);
3164   enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3165   const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3166      "dx.op.storePatchConstant" : "dx.op.storeOutput",
3167      overload);
3168
3169   if (!func)
3170      return false;
3171
3172   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3173      DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3174   const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3175   unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3176
3177   /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3178    * generation, so muck with them here too.
3179    */
3180   nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3181   bool is_tess_level = is_patch_constant &&
3182                        (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3183                         semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3184
3185   const struct dxil_value *row = NULL;
3186   const struct dxil_value *col = NULL;
3187   if (is_tess_level)
3188      col = dxil_module_get_int8_const(&ctx->mod, 0);
3189   else
3190      row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3191
3192   bool success = true;
3193   uint32_t writemask = nir_intrinsic_write_mask(intr);
3194
3195   nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3196   unsigned var_base_component = var->data.location_frac;
3197   unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3198
3199   if (ctx->mod.minor_validator >= 5) {
3200      struct dxil_signature_record *sig_rec = is_patch_constant ?
3201         &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3202         &ctx->mod.outputs[nir_intrinsic_base(intr)];
3203      unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3204      unsigned comp_mask = 0;
3205      if (is_tess_level)
3206         comp_mask = 1;
3207      else if (comp_size == 1)
3208         comp_mask = writemask << var_base_component;
3209      else {
3210         for (unsigned i = 0; i < intr->num_components; ++i)
3211            if ((writemask & (1 << i)))
3212               comp_mask |= 3 << ((i + var_base_component) * comp_size);
3213      }
3214      for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3215         sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3216
3217      if (!nir_src_is_const(intr->src[row_index])) {
3218         struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3219            &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3220            &ctx->mod.psv_outputs[nir_intrinsic_base(intr)];
3221         psv_rec->dynamic_mask_and_stream |= comp_mask;
3222      }
3223   }
3224
3225   for (unsigned i = 0; i < intr->num_components && success; ++i) {
3226      if (writemask & (1 << i)) {
3227         if (is_tess_level)
3228            row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3229         else
3230            col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3231         const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3232         if (!col || !row || !value)
3233            return false;
3234
3235         const struct dxil_value *args[] = {
3236            opcode, output_id, row, col, value
3237         };
3238         success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3239      }
3240   }
3241
3242   return success;
3243}
3244
3245static bool
3246emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3247{
3248   bool attr_at_vertex = false;
3249   if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3250      ctx->opts->interpolate_at_vertex &&
3251      ctx->opts->provoking_vertex != 0 &&
3252      (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3253      nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3254
3255      attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3256   }
3257
3258   bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3259                             intr->intrinsic == nir_intrinsic_load_input) ||
3260                            (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3261                             intr->intrinsic == nir_intrinsic_load_output);
3262   bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3263
3264   unsigned opcode_val;
3265   const char *func_name;
3266   if (attr_at_vertex) {
3267      opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3268      func_name = "dx.op.attributeAtVertex";
3269      if (ctx->mod.minor_validator >= 6)
3270         ctx->mod.feats.barycentrics = 1;
3271   } else if (is_patch_constant) {
3272      opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3273      func_name = "dx.op.loadPatchConstant";
3274   } else if (is_output_control_point) {
3275      opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3276      func_name = "dx.op.loadOutputControlPoint";
3277   } else {
3278      opcode_val = DXIL_INTR_LOAD_INPUT;
3279      func_name = "dx.op.loadInput";
3280   }
3281
3282   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3283   if (!opcode)
3284      return false;
3285
3286   const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod,
3287      is_patch_constant || is_output_control_point ?
3288         nir_intrinsic_base(intr) :
3289         ctx->mod.input_mappings[nir_intrinsic_base(intr)]);
3290   if (!input_id)
3291      return false;
3292
3293   bool is_per_vertex =
3294      intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3295      intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3296   int row_index = is_per_vertex ? 1 : 0;
3297   const struct dxil_value *vertex_id = NULL;
3298   if (!is_patch_constant) {
3299      if (is_per_vertex) {
3300         vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3301      } else if (attr_at_vertex) {
3302         vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3303      } else {
3304         const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3305         if (!int32_type)
3306            return false;
3307
3308         vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3309      }
3310      if (!vertex_id)
3311         return false;
3312   }
3313
3314   /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3315    * generation, so muck with them here too.
3316    */
3317   nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3318   bool is_tess_level = is_patch_constant &&
3319                        (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3320                         semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3321
3322   const struct dxil_value *row = NULL;
3323   const struct dxil_value *comp = NULL;
3324   if (is_tess_level)
3325      comp = dxil_module_get_int8_const(&ctx->mod, 0);
3326   else
3327      row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3328
3329   nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3330   enum overload_type overload = get_overload(out_type, intr->dest.ssa.bit_size);
3331
3332   const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3333
3334   if (!func)
3335      return false;
3336
3337   nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3338   unsigned var_base_component = var ? var->data.location_frac : 0;
3339   unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3340
3341   if (ctx->mod.minor_validator >= 5 &&
3342       !is_output_control_point &&
3343       intr->intrinsic != nir_intrinsic_load_output) {
3344      struct dxil_signature_record *sig_rec = is_patch_constant ?
3345         &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3346         &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3347      unsigned comp_size = intr->dest.ssa.bit_size == 64 ? 2 : 1;
3348      unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3349      comp_mask <<= (var_base_component * comp_size);
3350      if (is_tess_level)
3351         comp_mask = 1;
3352      for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3353         sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3354
3355      if (!nir_src_is_const(intr->src[row_index])) {
3356         struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3357            &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3358            &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3359         psv_rec->dynamic_mask_and_stream |= comp_mask;
3360      }
3361   }
3362
3363   for (unsigned i = 0; i < intr->num_components; ++i) {
3364      if (is_tess_level)
3365         row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3366      else
3367         comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3368
3369      if (!row || !comp)
3370         return false;
3371
3372      const struct dxil_value *args[] = {
3373         opcode, input_id, row, comp, vertex_id
3374      };
3375
3376      unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3377      const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3378      if (!retval)
3379         return false;
3380      store_dest(ctx, &intr->dest, i, retval, out_type);
3381   }
3382   return true;
3383}
3384
3385static bool
3386emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3387{
3388   nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3389
3390   const struct dxil_value *args[6] = { 0 };
3391
3392   unsigned opcode_val;
3393   const char *func_name;
3394   unsigned num_args;
3395   switch (barycentric->intrinsic) {
3396   case nir_intrinsic_load_barycentric_at_offset:
3397      opcode_val = DXIL_INTR_EVAL_SNAPPED;
3398      func_name = "dx.op.evalSnapped";
3399      num_args = 6;
3400      for (unsigned i = 0; i < 2; ++i) {
3401         const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3402         /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3403         const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3404            DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3405         args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3406            dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3407      }
3408      break;
3409   case nir_intrinsic_load_barycentric_pixel:
3410      opcode_val = DXIL_INTR_EVAL_SNAPPED;
3411      func_name = "dx.op.evalSnapped";
3412      num_args = 6;
3413      args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3414      break;
3415   case nir_intrinsic_load_barycentric_at_sample:
3416      opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3417      func_name = "dx.op.evalSampleIndex";
3418      num_args = 5;
3419      args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3420      break;
3421   case nir_intrinsic_load_barycentric_centroid:
3422      opcode_val = DXIL_INTR_EVAL_CENTROID;
3423      func_name = "dx.op.evalCentroid";
3424      num_args = 4;
3425      break;
3426   default:
3427      unreachable("Unsupported interpolation barycentric intrinsic");
3428   }
3429   args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3430   args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3431   args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3432
3433   const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3434
3435   if (!func)
3436      return false;
3437
3438   nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3439   unsigned var_base_component = var ? var->data.location_frac : 0;
3440   unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3441
3442   if (ctx->mod.minor_validator >= 5) {
3443      struct dxil_signature_record *sig_rec =
3444         &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3445      unsigned comp_size = intr->dest.ssa.bit_size == 64 ? 2 : 1;
3446      unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3447      comp_mask <<= (var_base_component * comp_size);
3448      for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3449         sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3450
3451      if (!nir_src_is_const(intr->src[1])) {
3452         struct dxil_psv_signature_element *psv_rec =
3453            &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3454         psv_rec->dynamic_mask_and_stream |= comp_mask;
3455      }
3456   }
3457
3458   for (unsigned i = 0; i < intr->num_components; ++i) {
3459      args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3460
3461      const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3462      if (!retval)
3463         return false;
3464      store_dest(ctx, &intr->dest, i, retval, nir_type_float);
3465   }
3466   return true;
3467}
3468
3469static bool
3470emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3471{
3472   struct nir_variable *var =
3473      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
3474
3475   const struct dxil_value *index =
3476      get_src(ctx, &intr->src[1], 0, nir_type_uint);
3477   if (!index)
3478      return false;
3479
3480   const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);
3481   if (!ptr)
3482      return false;
3483
3484   const struct dxil_value *retval =
3485      dxil_emit_load(&ctx->mod, ptr, 4, false);
3486   if (!retval)
3487      return false;
3488
3489   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3490   return true;
3491}
3492
3493static bool
3494emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3495{
3496   const struct dxil_value *zero, *index;
3497   unsigned bit_size = nir_dest_bit_size(intr->dest);
3498   unsigned align = bit_size / 8;
3499
3500   /* All shared mem accesses should have been lowered to scalar 32bit
3501    * accesses.
3502    */
3503   assert(bit_size == 32);
3504   assert(nir_dest_num_components(intr->dest) == 1);
3505
3506   zero = dxil_module_get_int32_const(&ctx->mod, 0);
3507   if (!zero)
3508      return false;
3509
3510   index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3511   if (!index)
3512      return false;
3513
3514   const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3515   const struct dxil_value *ptr, *retval;
3516
3517   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3518   if (!ptr)
3519      return false;
3520
3521   retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3522   if (!retval)
3523      return false;
3524
3525   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3526   return true;
3527}
3528
3529static bool
3530emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3531{
3532   const struct dxil_value *zero, *index;
3533   unsigned bit_size = nir_dest_bit_size(intr->dest);
3534   unsigned align = bit_size / 8;
3535
3536   /* All scratch mem accesses should have been lowered to scalar 32bit
3537    * accesses.
3538    */
3539   assert(bit_size == 32);
3540   assert(nir_dest_num_components(intr->dest) == 1);
3541
3542   zero = dxil_module_get_int32_const(&ctx->mod, 0);
3543   if (!zero)
3544      return false;
3545
3546   index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3547   if (!index)
3548      return false;
3549
3550   const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3551   const struct dxil_value *ptr, *retval;
3552
3553   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3554   if (!ptr)
3555      return false;
3556
3557   retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3558   if (!retval)
3559      return false;
3560
3561   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3562   return true;
3563}
3564
3565static bool
3566emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3567{
3568   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3569   if (!opcode)
3570      return false;
3571
3572   const struct dxil_value *args[] = {
3573     opcode,
3574     value
3575   };
3576
3577   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
3578   if (!func)
3579      return false;
3580
3581   return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3582}
3583
3584static bool
3585emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3586{
3587   const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
3588   if (!value)
3589      return false;
3590
3591   return emit_discard_if_with_value(ctx, value);
3592}
3593
3594static bool
3595emit_discard(struct ntd_context *ctx)
3596{
3597   const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
3598   return emit_discard_if_with_value(ctx, value);
3599}
3600
3601static bool
3602emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3603{
3604   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
3605   const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3606   if (!opcode || !stream_id)
3607      return false;
3608
3609   const struct dxil_value *args[] = {
3610     opcode,
3611     stream_id
3612   };
3613
3614   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
3615   if (!func)
3616      return false;
3617
3618   return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3619}
3620
3621static bool
3622emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3623{
3624   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
3625   const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3626   if (!opcode || !stream_id)
3627      return false;
3628
3629   const struct dxil_value *args[] = {
3630     opcode,
3631     stream_id
3632   };
3633
3634   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
3635   if (!func)
3636      return false;
3637
3638   return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3639}
3640
3641static bool
3642emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3643{
3644   const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3645   if (!handle)
3646      return false;
3647
3648   bool is_array = false;
3649   if (intr->intrinsic == nir_intrinsic_image_deref_store)
3650      is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3651   else
3652      is_array = nir_intrinsic_image_array(intr);
3653
3654   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3655   if (!int32_undef)
3656      return false;
3657
3658   const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3659   enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ?
3660      nir_intrinsic_image_dim(intr) :
3661      glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3662   unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3663   if (is_array)
3664      ++num_coords;
3665
3666   assert(num_coords <= nir_src_num_components(intr->src[1]));
3667   for (unsigned i = 0; i < num_coords; ++i) {
3668      coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3669      if (!coord[i])
3670         return false;
3671   }
3672
3673   nir_alu_type in_type = nir_intrinsic_src_type(intr);
3674   enum overload_type overload = get_overload(in_type, 32);
3675
3676   assert(nir_src_bit_size(intr->src[3]) == 32);
3677   unsigned num_components = nir_src_num_components(intr->src[3]);
3678   assert(num_components <= 4);
3679   const struct dxil_value *value[4];
3680   for (unsigned i = 0; i < num_components; ++i) {
3681      value[i] = get_src(ctx, &intr->src[3], i, in_type);
3682      if (!value[i])
3683         return false;
3684   }
3685
3686   for (int i = num_components; i < 4; ++i)
3687      value[i] = int32_undef;
3688
3689   const struct dxil_value *write_mask =
3690      dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3691   if (!write_mask)
3692      return false;
3693
3694   if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3695      coord[1] = int32_undef;
3696      return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3697   } else
3698      return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
3699}
3700
3701static bool
3702emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3703{
3704   const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3705   if (!handle)
3706      return false;
3707
3708   bool is_array = false;
3709   if (intr->intrinsic == nir_intrinsic_image_deref_load)
3710      is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3711   else
3712      is_array = nir_intrinsic_image_array(intr);
3713
3714   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3715   if (!int32_undef)
3716      return false;
3717
3718   const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3719   enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ?
3720      nir_intrinsic_image_dim(intr) :
3721      glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3722   unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3723   if (is_array)
3724      ++num_coords;
3725
3726   assert(num_coords <= nir_src_num_components(intr->src[1]));
3727   for (unsigned i = 0; i < num_coords; ++i) {
3728      coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3729      if (!coord[i])
3730         return false;
3731   }
3732
3733   nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3734   enum overload_type overload = get_overload(out_type, 32);
3735
3736   const struct dxil_value *load_result;
3737   if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3738      coord[1] = int32_undef;
3739      load_result = emit_bufferload_call(ctx, handle, coord, overload);
3740   } else
3741      load_result = emit_textureload_call(ctx, handle, coord, overload);
3742
3743   if (!load_result)
3744      return false;
3745
3746   assert(nir_dest_bit_size(intr->dest) == 32);
3747   unsigned num_components = nir_dest_num_components(intr->dest);
3748   assert(num_components <= 4);
3749   for (unsigned i = 0; i < num_components; ++i) {
3750      const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
3751      if (!component)
3752         return false;
3753      store_dest(ctx, &intr->dest, i, component, out_type);
3754   }
3755
3756   /* FIXME: This flag should be set to true when the RWTexture is attached
3757    * a vector, and we always declare a vec4 right now, so it should always be
3758    * true. Might be worth reworking the dxil_module_get_res_type() to use a
3759    * scalar when the image only has one component.
3760    */
3761   ctx->mod.feats.typed_uav_load_additional_formats = true;
3762
3763   return true;
3764}
3765
3766static bool
3767emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3768                  enum dxil_atomic_op op, nir_alu_type type)
3769{
3770   const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3771   if (!handle)
3772      return false;
3773
3774   bool is_array = false;
3775   nir_deref_instr *src_as_deref = nir_src_as_deref(intr->src[0]);
3776   if (src_as_deref)
3777      is_array = glsl_sampler_type_is_array(src_as_deref->type);
3778   else
3779      is_array = nir_intrinsic_image_array(intr);
3780
3781   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3782   if (!int32_undef)
3783      return false;
3784
3785   const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3786   enum glsl_sampler_dim image_dim = src_as_deref ?
3787      glsl_get_sampler_dim(src_as_deref->type) :
3788      nir_intrinsic_image_dim(intr);
3789   unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3790   if (is_array)
3791      ++num_coords;
3792
3793   assert(num_coords <= nir_src_num_components(intr->src[1]));
3794   for (unsigned i = 0; i < num_coords; ++i) {
3795      coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3796      if (!coord[i])
3797         return false;
3798   }
3799
3800   const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
3801   if (!value)
3802      return false;
3803
3804   const struct dxil_value *retval =
3805      emit_atomic_binop(ctx, handle, op, coord, value);
3806
3807   if (!retval)
3808      return false;
3809
3810   store_dest(ctx, &intr->dest, 0, retval, type);
3811   return true;
3812}
3813
3814static bool
3815emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3816{
3817   const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3818   if (!handle)
3819      return false;
3820
3821   bool is_array = false;
3822   if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap)
3823      is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3824   else
3825      is_array = nir_intrinsic_image_array(intr);
3826
3827   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3828   if (!int32_undef)
3829      return false;
3830
3831   const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3832   enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_atomic_comp_swap ?
3833      nir_intrinsic_image_dim(intr) :
3834      glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3835   unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3836   if (is_array)
3837      ++num_coords;
3838
3839   assert(num_coords <= nir_src_num_components(intr->src[1]));
3840   for (unsigned i = 0; i < num_coords; ++i) {
3841      coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3842      if (!coord[i])
3843         return false;
3844   }
3845
3846   const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
3847   const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
3848   if (!cmpval || !newval)
3849      return false;
3850
3851   const struct dxil_value *retval =
3852      emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3853
3854   if (!retval)
3855      return false;
3856
3857   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3858   return true;
3859}
3860
3861struct texop_parameters {
3862   const struct dxil_value *tex;
3863   const struct dxil_value *sampler;
3864   const struct dxil_value *bias, *lod_or_sample, *min_lod;
3865   const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
3866   const struct dxil_value *cmp;
3867   enum overload_type overload;
3868};
3869
3870static const struct dxil_value *
3871emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
3872{
3873   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
3874   if (!func)
3875      return false;
3876
3877   const struct dxil_value *args[] = {
3878      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
3879      params->tex,
3880      params->lod_or_sample
3881   };
3882
3883   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3884}
3885
3886static bool
3887emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3888{
3889   const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3890   if (!handle)
3891      return false;
3892
3893   const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3894   if (!lod)
3895      return false;
3896
3897   struct texop_parameters params = {
3898      .tex = handle,
3899      .lod_or_sample = lod
3900   };
3901   const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
3902   if (!dimensions)
3903      return false;
3904
3905   for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3906      const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
3907      store_dest(ctx, &intr->dest, i, retval, nir_type_uint);
3908   }
3909
3910   return true;
3911}
3912
3913static bool
3914emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3915{
3916   enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3917   if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3918      nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3919      if (var && var->data.access & ACCESS_NON_WRITEABLE)
3920         class = DXIL_RESOURCE_CLASS_SRV;
3921   }
3922
3923   const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3924   if (!handle)
3925      return false;
3926
3927   struct texop_parameters params = {
3928      .tex = handle,
3929      .lod_or_sample = dxil_module_get_undef(
3930                        &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
3931   };
3932
3933   const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
3934   if (!dimensions)
3935      return false;
3936
3937   const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
3938   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3939
3940   return true;
3941}
3942
3943static bool
3944emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3945                   enum dxil_atomic_op op, nir_alu_type type)
3946{
3947   const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3948   const struct dxil_value *offset =
3949      get_src(ctx, &intr->src[1], 0, nir_type_uint);
3950   const struct dxil_value *value =
3951      get_src(ctx, &intr->src[2], 0, type);
3952
3953   if (!value || !handle || !offset)
3954      return false;
3955
3956   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3957   if (!int32_undef)
3958      return false;
3959
3960   const struct dxil_value *coord[3] = {
3961      offset, int32_undef, int32_undef
3962   };
3963
3964   const struct dxil_value *retval =
3965      emit_atomic_binop(ctx, handle, op, coord, value);
3966
3967   if (!retval)
3968      return false;
3969
3970   store_dest(ctx, &intr->dest, 0, retval, type);
3971   return true;
3972}
3973
3974static bool
3975emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3976{
3977   const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3978   const struct dxil_value *offset =
3979      get_src(ctx, &intr->src[1], 0, nir_type_uint);
3980   const struct dxil_value *cmpval =
3981      get_src(ctx, &intr->src[2], 0, nir_type_int);
3982   const struct dxil_value *newval =
3983      get_src(ctx, &intr->src[3], 0, nir_type_int);
3984
3985   if (!cmpval || !newval || !handle || !offset)
3986      return false;
3987
3988   const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3989   if (!int32_undef)
3990      return false;
3991
3992   const struct dxil_value *coord[3] = {
3993      offset, int32_undef, int32_undef
3994   };
3995
3996   const struct dxil_value *retval =
3997      emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3998
3999   if (!retval)
4000      return false;
4001
4002   store_dest(ctx, &intr->dest, 0, retval, nir_type_int);
4003   return true;
4004}
4005
4006static bool
4007emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
4008                   enum dxil_rmw_op op, nir_alu_type type)
4009{
4010   const struct dxil_value *zero, *index;
4011
4012   assert(nir_src_bit_size(intr->src[1]) == 32);
4013
4014   zero = dxil_module_get_int32_const(&ctx->mod, 0);
4015   if (!zero)
4016      return false;
4017
4018   index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
4019   if (!index)
4020      return false;
4021
4022   const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
4023   const struct dxil_value *ptr, *value, *retval;
4024
4025   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
4026   if (!ptr)
4027      return false;
4028
4029   value = get_src(ctx, &intr->src[1], 0, type);
4030   if (!value)
4031      return false;
4032
4033   retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false,
4034                                DXIL_ATOMIC_ORDERING_ACQREL,
4035                                DXIL_SYNC_SCOPE_CROSSTHREAD);
4036   if (!retval)
4037      return false;
4038
4039   store_dest(ctx, &intr->dest, 0, retval, type);
4040   return true;
4041}
4042
4043static bool
4044emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4045{
4046   const struct dxil_value *zero, *index;
4047
4048   assert(nir_src_bit_size(intr->src[1]) == 32);
4049
4050   zero = dxil_module_get_int32_const(&ctx->mod, 0);
4051   if (!zero)
4052      return false;
4053
4054   index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
4055   if (!index)
4056      return false;
4057
4058   const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
4059   const struct dxil_value *ptr, *cmpval, *newval, *retval;
4060
4061   ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
4062   if (!ptr)
4063      return false;
4064
4065   cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint);
4066   newval = get_src(ctx, &intr->src[2], 0, nir_type_uint);
4067   if (!cmpval || !newval)
4068      return false;
4069
4070   retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false,
4071                              DXIL_ATOMIC_ORDERING_ACQREL,
4072                              DXIL_SYNC_SCOPE_CROSSTHREAD);
4073   if (!retval)
4074      return false;
4075
4076   store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
4077   return true;
4078}
4079
4080static bool
4081emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4082{
4083   unsigned int binding = nir_intrinsic_binding(intr);
4084
4085   bool const_index = nir_src_is_const(intr->src[0]);
4086   if (const_index) {
4087      binding += nir_src_as_const_value(intr->src[0])->u32;
4088   }
4089
4090   const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4091   if (!index_value)
4092      return false;
4093
4094   if (!const_index) {
4095      const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4096      if (!offset)
4097         return false;
4098
4099      index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4100      if (!index_value)
4101         return false;
4102   }
4103
4104   store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32);
4105   store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32);
4106   return true;
4107}
4108
4109static bool
4110emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4111{
4112   nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4113   /* We currently do not support reindex */
4114   assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index);
4115
4116   unsigned binding = nir_intrinsic_binding(index);
4117   unsigned space = nir_intrinsic_desc_set(index);
4118
4119   /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4120   assert(space < 32);
4121
4122   nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4123
4124   const struct dxil_value *handle = NULL;
4125   enum dxil_resource_class resource_class;
4126
4127   switch (nir_intrinsic_desc_type(intr)) {
4128   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4129      resource_class = DXIL_RESOURCE_CLASS_CBV;
4130      break;
4131   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4132      if (var->data.access & ACCESS_NON_WRITEABLE)
4133         resource_class = DXIL_RESOURCE_CLASS_SRV;
4134      else
4135         resource_class = DXIL_RESOURCE_CLASS_UAV;
4136      break;
4137   default:
4138      unreachable("unknown descriptor type");
4139      return false;
4140   }
4141
4142   const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4143   if (!index_value)
4144      return false;
4145
4146   handle = emit_createhandle_call(ctx, resource_class,
4147      get_resource_id(ctx, resource_class, space, binding),
4148      index_value, false);
4149
4150   store_dest_value(ctx, &intr->dest, 0, handle);
4151   store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32);
4152
4153   return true;
4154}
4155
4156static bool
4157emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4158{
4159   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4160   if (!func)
4161      return false;
4162
4163   const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4164   if (!opcode)
4165      return false;
4166
4167   const struct dxil_value *args[] = {
4168      opcode,
4169      get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4170   };
4171   if (!args[1])
4172      return false;
4173
4174   const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4175   if (!v)
4176      return false;
4177
4178   for (unsigned i = 0; i < 2; ++i) {
4179      /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4180      const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4181         dxil_emit_extractval(&ctx->mod, v, i),
4182         dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4183      store_dest(ctx, &intr->dest, i, coord, nir_type_float32);
4184   }
4185   return true;
4186}
4187
4188static bool
4189emit_load_layer_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4190{
4191   const struct dxil_value *layer_id = dxil_module_get_int32_const(&ctx->mod, 0);
4192   /* TODO: Properly implement this once multi-view is supported */
4193   store_dest_value(ctx, &intr->dest, 0, layer_id);
4194   return true;
4195}
4196
4197static bool
4198emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4199{
4200   assert(ctx->mod.info.has_per_sample_input ||
4201          intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4202
4203   if (ctx->mod.info.has_per_sample_input)
4204      return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4205                                               DXIL_INTR_SAMPLE_INDEX);
4206
4207   store_dest_value(ctx, &intr->dest, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4208   return true;
4209}
4210
4211static bool
4212emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4213{
4214   switch (intr->intrinsic) {
4215   case nir_intrinsic_load_global_invocation_id:
4216   case nir_intrinsic_load_global_invocation_id_zero_base:
4217      return emit_load_global_invocation_id(ctx, intr);
4218   case nir_intrinsic_load_local_invocation_id:
4219      return emit_load_local_invocation_id(ctx, intr);
4220   case nir_intrinsic_load_local_invocation_index:
4221      return emit_load_local_invocation_index(ctx, intr);
4222   case nir_intrinsic_load_workgroup_id:
4223   case nir_intrinsic_load_workgroup_id_zero_base:
4224      return emit_load_local_workgroup_id(ctx, intr);
4225   case nir_intrinsic_load_ssbo:
4226      return emit_load_ssbo(ctx, intr);
4227   case nir_intrinsic_store_ssbo:
4228      return emit_store_ssbo(ctx, intr);
4229   case nir_intrinsic_store_ssbo_masked_dxil:
4230      return emit_store_ssbo_masked(ctx, intr);
4231   case nir_intrinsic_store_shared_dxil:
4232   case nir_intrinsic_store_shared_masked_dxil:
4233      return emit_store_shared(ctx, intr);
4234   case nir_intrinsic_store_scratch_dxil:
4235      return emit_store_scratch(ctx, intr);
4236   case nir_intrinsic_load_ptr_dxil:
4237      return emit_load_ptr(ctx, intr);
4238   case nir_intrinsic_load_ubo:
4239      return emit_load_ubo(ctx, intr);
4240   case nir_intrinsic_load_ubo_dxil:
4241      return emit_load_ubo_dxil(ctx, intr);
4242   case nir_intrinsic_load_primitive_id:
4243      return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4244                                               DXIL_INTR_PRIMITIVE_ID);
4245   case nir_intrinsic_load_sample_id:
4246   case nir_intrinsic_load_sample_id_no_per_sample:
4247      return emit_load_sample_id(ctx, intr);
4248   case nir_intrinsic_load_invocation_id:
4249      switch (ctx->mod.shader_kind) {
4250      case DXIL_HULL_SHADER:
4251         return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4252                                                  DXIL_INTR_OUTPUT_CONTROL_POINT_ID);
4253      case DXIL_GEOMETRY_SHADER:
4254         return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4255                                                  DXIL_INTR_GS_INSTANCE_ID);
4256      default:
4257         unreachable("Unexpected shader kind for invocation ID");
4258      }
4259   case nir_intrinsic_load_sample_mask_in:
4260      return emit_load_sample_mask_in(ctx, intr);
4261   case nir_intrinsic_load_tess_coord:
4262      return emit_load_tess_coord(ctx, intr);
4263   case nir_intrinsic_load_shared_dxil:
4264      return emit_load_shared(ctx, intr);
4265   case nir_intrinsic_load_scratch_dxil:
4266      return emit_load_scratch(ctx, intr);
4267   case nir_intrinsic_discard_if:
4268   case nir_intrinsic_demote_if:
4269      return emit_discard_if(ctx, intr);
4270   case nir_intrinsic_discard:
4271   case nir_intrinsic_demote:
4272      return emit_discard(ctx);
4273   case nir_intrinsic_emit_vertex:
4274      return emit_emit_vertex(ctx, intr);
4275   case nir_intrinsic_end_primitive:
4276      return emit_end_primitive(ctx, intr);
4277   case nir_intrinsic_scoped_barrier:
4278      return emit_barrier(ctx, intr);
4279   case nir_intrinsic_memory_barrier:
4280   case nir_intrinsic_memory_barrier_buffer:
4281   case nir_intrinsic_memory_barrier_image:
4282   case nir_intrinsic_memory_barrier_atomic_counter:
4283      return emit_memory_barrier(ctx, intr);
4284   case nir_intrinsic_memory_barrier_shared:
4285      return emit_memory_barrier_shared(ctx, intr);
4286   case nir_intrinsic_group_memory_barrier:
4287      return emit_group_memory_barrier(ctx, intr);
4288   case nir_intrinsic_control_barrier:
4289      return emit_control_barrier(ctx, intr);
4290   case nir_intrinsic_ssbo_atomic_add:
4291      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
4292   case nir_intrinsic_ssbo_atomic_imin:
4293      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
4294   case nir_intrinsic_ssbo_atomic_umin:
4295      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
4296   case nir_intrinsic_ssbo_atomic_imax:
4297      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
4298   case nir_intrinsic_ssbo_atomic_umax:
4299      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);
4300   case nir_intrinsic_ssbo_atomic_and:
4301      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
4302   case nir_intrinsic_ssbo_atomic_or:
4303      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
4304   case nir_intrinsic_ssbo_atomic_xor:
4305      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
4306   case nir_intrinsic_ssbo_atomic_exchange:
4307      return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);
4308   case nir_intrinsic_ssbo_atomic_comp_swap:
4309      return emit_ssbo_atomic_comp_swap(ctx, intr);
4310   case nir_intrinsic_shared_atomic_add_dxil:
4311      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);
4312   case nir_intrinsic_shared_atomic_imin_dxil:
4313      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);
4314   case nir_intrinsic_shared_atomic_umin_dxil:
4315      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);
4316   case nir_intrinsic_shared_atomic_imax_dxil:
4317      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);
4318   case nir_intrinsic_shared_atomic_umax_dxil:
4319      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);
4320   case nir_intrinsic_shared_atomic_and_dxil:
4321      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);
4322   case nir_intrinsic_shared_atomic_or_dxil:
4323      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);
4324   case nir_intrinsic_shared_atomic_xor_dxil:
4325      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);
4326   case nir_intrinsic_shared_atomic_exchange_dxil:
4327      return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);
4328   case nir_intrinsic_shared_atomic_comp_swap_dxil:
4329      return emit_shared_atomic_comp_swap(ctx, intr);
4330   case nir_intrinsic_image_deref_atomic_add:
4331   case nir_intrinsic_image_atomic_add:
4332      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
4333   case nir_intrinsic_image_deref_atomic_imin:
4334   case nir_intrinsic_image_atomic_imin:
4335      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
4336   case nir_intrinsic_image_deref_atomic_umin:
4337   case nir_intrinsic_image_atomic_umin:
4338      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
4339   case nir_intrinsic_image_deref_atomic_imax:
4340   case nir_intrinsic_image_atomic_imax:
4341      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
4342   case nir_intrinsic_image_deref_atomic_umax:
4343   case nir_intrinsic_image_atomic_umax:
4344      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_uint);
4345   case nir_intrinsic_image_deref_atomic_and:
4346   case nir_intrinsic_image_atomic_and:
4347      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
4348   case nir_intrinsic_image_deref_atomic_or:
4349   case nir_intrinsic_image_atomic_or:
4350      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
4351   case nir_intrinsic_image_deref_atomic_xor:
4352   case nir_intrinsic_image_atomic_xor:
4353      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
4354   case nir_intrinsic_image_deref_atomic_exchange:
4355   case nir_intrinsic_image_atomic_exchange:
4356      return emit_image_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_uint);
4357   case nir_intrinsic_image_deref_atomic_comp_swap:
4358   case nir_intrinsic_image_atomic_comp_swap:
4359      return emit_image_atomic_comp_swap(ctx, intr);
4360   case nir_intrinsic_image_store:
4361   case nir_intrinsic_image_deref_store:
4362      return emit_image_store(ctx, intr);
4363   case nir_intrinsic_image_load:
4364   case nir_intrinsic_image_deref_load:
4365      return emit_image_load(ctx, intr);
4366   case nir_intrinsic_image_size:
4367   case nir_intrinsic_image_deref_size:
4368      return emit_image_size(ctx, intr);
4369   case nir_intrinsic_get_ssbo_size:
4370      return emit_get_ssbo_size(ctx, intr);
4371   case nir_intrinsic_load_input:
4372   case nir_intrinsic_load_per_vertex_input:
4373   case nir_intrinsic_load_output:
4374   case nir_intrinsic_load_per_vertex_output:
4375      return emit_load_input_via_intrinsic(ctx, intr);
4376   case nir_intrinsic_store_output:
4377   case nir_intrinsic_store_per_vertex_output:
4378      return emit_store_output_via_intrinsic(ctx, intr);
4379
4380   case nir_intrinsic_load_barycentric_at_offset:
4381   case nir_intrinsic_load_barycentric_at_sample:
4382   case nir_intrinsic_load_barycentric_centroid:
4383   case nir_intrinsic_load_barycentric_pixel:
4384      /* Emit nothing, we only support these as inputs to load_interpolated_input */
4385      return true;
4386   case nir_intrinsic_load_interpolated_input:
4387      return emit_load_interpolated_input(ctx, intr);
4388      break;
4389
4390   case nir_intrinsic_vulkan_resource_index:
4391      return emit_vulkan_resource_index(ctx, intr);
4392   case nir_intrinsic_load_vulkan_descriptor:
4393      return emit_load_vulkan_descriptor(ctx, intr);
4394   case nir_intrinsic_load_layer_id:
4395      return emit_load_layer_id(ctx, intr);
4396
4397   case nir_intrinsic_load_sample_pos_from_id:
4398      return emit_load_sample_pos_from_id(ctx, intr);
4399
4400   case nir_intrinsic_load_num_workgroups:
4401   case nir_intrinsic_load_workgroup_size:
4402   default:
4403      NIR_INSTR_UNSUPPORTED(&intr->instr);
4404      unreachable("Unimplemented intrinsic instruction");
4405      return false;
4406   }
4407}
4408
4409static bool
4410emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4411{
4412   for (int i = 0; i < load_const->def.num_components; ++i) {
4413      const struct dxil_value *value;
4414      switch (load_const->def.bit_size) {
4415      case 1:
4416         value = dxil_module_get_int1_const(&ctx->mod,
4417                                            load_const->value[i].b);
4418         break;
4419      case 16:
4420         ctx->mod.feats.native_low_precision = true;
4421         value = dxil_module_get_int16_const(&ctx->mod,
4422                                             load_const->value[i].u16);
4423         break;
4424      case 32:
4425         value = dxil_module_get_int32_const(&ctx->mod,
4426                                             load_const->value[i].u32);
4427         break;
4428      case 64:
4429         ctx->mod.feats.int64_ops = true;
4430         value = dxil_module_get_int64_const(&ctx->mod,
4431                                             load_const->value[i].u64);
4432         break;
4433      default:
4434         unreachable("unexpected bit_size");
4435      }
4436      if (!value)
4437         return false;
4438
4439      store_ssa_def(ctx, &load_const->def, i, value);
4440   }
4441   return true;
4442}
4443
4444static bool
4445emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
4446{
4447   assert(instr->deref_type == nir_deref_type_var ||
4448          instr->deref_type == nir_deref_type_array);
4449
4450   /* In the CL environment, there's nothing to emit. Any references to
4451    * derefs will emit the necessary logic to handle scratch/shared GEP addressing
4452    */
4453   if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
4454      return true;
4455
4456   /* In the Vulkan environment, we don't have cached handles for textures or
4457    * samplers, so let's use the opportunity of walking through the derefs to
4458    * emit those.
4459    */
4460   nir_variable *var = nir_deref_instr_get_variable(instr);
4461   assert(var);
4462
4463   if (!glsl_type_is_sampler(glsl_without_array(var->type)) &&
4464       !glsl_type_is_image(glsl_without_array(var->type)) &&
4465       !glsl_type_is_texture(glsl_without_array(var->type)))
4466      return true;
4467
4468   const struct glsl_type *type = instr->type;
4469   const struct dxil_value *binding;
4470   unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
4471      var->data.driver_location : var->data.binding;
4472
4473   if (instr->deref_type == nir_deref_type_var) {
4474      binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
4475   } else {
4476      const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
4477      const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
4478      if (!base || !offset)
4479         return false;
4480
4481      if (glsl_type_is_array(instr->type)) {
4482         offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
4483            dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
4484         if (!offset)
4485            return false;
4486      }
4487      binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
4488   }
4489
4490   if (!binding)
4491      return false;
4492
4493   /* Haven't finished chasing the deref chain yet, just store the value */
4494   if (glsl_type_is_array(type)) {
4495      store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32);
4496      return true;
4497   }
4498
4499   assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
4500   enum dxil_resource_class res_class;
4501   if (glsl_type_is_image(type)) {
4502      if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN &&
4503          (var->data.access & ACCESS_NON_WRITEABLE))
4504         res_class = DXIL_RESOURCE_CLASS_SRV;
4505      else
4506         res_class = DXIL_RESOURCE_CLASS_UAV;
4507   } else if (glsl_type_is_sampler(type)) {
4508      res_class = DXIL_RESOURCE_CLASS_SAMPLER;
4509   } else {
4510      res_class = DXIL_RESOURCE_CLASS_SRV;
4511   }
4512
4513   unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
4514      var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
4515   const struct dxil_value *handle = emit_createhandle_call(ctx, res_class,
4516      get_resource_id(ctx, res_class, descriptor_set, binding_val), binding, false);
4517   if (!handle)
4518      return false;
4519
4520   store_dest_value(ctx, &instr->dest, 0, handle);
4521   return true;
4522}
4523
4524static bool
4525emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
4526                 int true_block, int false_block)
4527{
4528   assert(cond);
4529   assert(true_block >= 0);
4530   assert(false_block >= 0);
4531   return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
4532}
4533
4534static bool
4535emit_branch(struct ntd_context *ctx, int block)
4536{
4537   assert(block >= 0);
4538   return dxil_emit_branch(&ctx->mod, NULL, block, -1);
4539}
4540
4541static bool
4542emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
4543{
4544   switch (instr->type) {
4545   case nir_jump_break:
4546   case nir_jump_continue:
4547      assert(instr->instr.block->successors[0]);
4548      assert(!instr->instr.block->successors[1]);
4549      return emit_branch(ctx, instr->instr.block->successors[0]->index);
4550
4551   default:
4552      unreachable("Unsupported jump type\n");
4553   }
4554}
4555
4556struct phi_block {
4557   unsigned num_components;
4558   struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
4559};
4560
4561static bool
4562emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
4563{
4564   unsigned bit_size = nir_dest_bit_size(instr->dest);
4565   const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod,
4566                                                           bit_size);
4567
4568   struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
4569   vphi->num_components = nir_dest_num_components(instr->dest);
4570
4571   for (unsigned i = 0; i < vphi->num_components; ++i) {
4572      struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
4573      if (!phi)
4574         return false;
4575      store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi));
4576   }
4577   _mesa_hash_table_insert(ctx->phis, instr, vphi);
4578   return true;
4579}
4580
4581static bool
4582fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
4583          struct phi_block *vphi)
4584{
4585   const struct dxil_value *values[16];
4586   unsigned blocks[16];
4587   for (unsigned i = 0; i < vphi->num_components; ++i) {
4588      size_t num_incoming = 0;
4589      nir_foreach_phi_src(src, instr) {
4590         assert(src->src.is_ssa);
4591         const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
4592         values[num_incoming] = val;
4593         blocks[num_incoming] = src->pred->index;
4594         ++num_incoming;
4595         if (num_incoming == ARRAY_SIZE(values)) {
4596            if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
4597                                       num_incoming))
4598               return false;
4599            num_incoming = 0;
4600         }
4601      }
4602      if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
4603                                                     blocks, num_incoming))
4604         return false;
4605   }
4606   return true;
4607}
4608
4609static unsigned
4610get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
4611          unsigned max_components, nir_tex_src *src, nir_alu_type type)
4612{
4613   unsigned num_components = nir_src_num_components(src->src);
4614   unsigned i = 0;
4615
4616   assert(num_components <= max_components);
4617
4618   for (i = 0; i < num_components; ++i) {
4619      values[i] = get_src(ctx, &src->src, i, type);
4620      if (!values[i])
4621         return 0;
4622   }
4623
4624   return num_components;
4625}
4626
4627#define PAD_SRC(ctx, array, components, undef) \
4628   for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
4629      array[i] = undef; \
4630   }
4631
4632static const struct dxil_value *
4633emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
4634{
4635   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
4636   if (!func)
4637      return NULL;
4638
4639   const struct dxil_value *args[11] = {
4640      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
4641      params->tex, params->sampler,
4642      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4643      params->offset[0], params->offset[1], params->offset[2],
4644      params->min_lod
4645   };
4646
4647   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4648}
4649
4650static const struct dxil_value *
4651emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
4652{
4653   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
4654   if (!func)
4655      return NULL;
4656
4657   assert(params->bias != NULL);
4658
4659   const struct dxil_value *args[12] = {
4660      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
4661      params->tex, params->sampler,
4662      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4663      params->offset[0], params->offset[1], params->offset[2],
4664      params->bias, params->min_lod
4665   };
4666
4667   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4668}
4669
4670static const struct dxil_value *
4671emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
4672{
4673   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
4674   if (!func)
4675      return NULL;
4676
4677   assert(params->lod_or_sample != NULL);
4678
4679   const struct dxil_value *args[11] = {
4680      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
4681      params->tex, params->sampler,
4682      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4683      params->offset[0], params->offset[1], params->offset[2],
4684      params->lod_or_sample
4685   };
4686
4687   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4688}
4689
4690static const struct dxil_value *
4691emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
4692{
4693   const struct dxil_func *func;
4694   enum dxil_intr opcode;
4695   int numparam;
4696
4697   if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER)  {
4698      func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
4699      opcode = DXIL_INTR_SAMPLE_CMP;
4700      numparam = 12;
4701   } else {
4702      func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
4703      opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
4704      numparam = 11;
4705   }
4706
4707   if (!func)
4708      return NULL;
4709
4710   const struct dxil_value *args[12] = {
4711      dxil_module_get_int32_const(&ctx->mod, opcode),
4712      params->tex, params->sampler,
4713      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4714      params->offset[0], params->offset[1], params->offset[2],
4715      params->cmp, params->min_lod
4716   };
4717
4718   return dxil_emit_call(&ctx->mod, func, args, numparam);
4719}
4720
4721static const struct dxil_value *
4722emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
4723{
4724   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
4725   if (!func)
4726      return false;
4727
4728   const struct dxil_value *args[17] = {
4729      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
4730      params->tex, params->sampler,
4731      params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4732      params->offset[0], params->offset[1], params->offset[2],
4733      params->dx[0], params->dx[1], params->dx[2],
4734      params->dy[0], params->dy[1], params->dy[2],
4735      params->min_lod
4736   };
4737
4738   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4739}
4740
4741static const struct dxil_value *
4742emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
4743{
4744   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
4745   if (!func)
4746      return false;
4747
4748   if (!params->lod_or_sample)
4749      params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
4750
4751   const struct dxil_value *args[] = {
4752      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
4753      params->tex,
4754      params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
4755      params->offset[0], params->offset[1], params->offset[2]
4756   };
4757
4758   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4759}
4760
4761static const struct dxil_value *
4762emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
4763{
4764   const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
4765   if (!func)
4766      return false;
4767
4768   const struct dxil_value *args[] = {
4769      dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
4770      params->tex,
4771      params->sampler,
4772      params->coord[0],
4773      params->coord[1],
4774      params->coord[2],
4775      dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
4776   };
4777
4778   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4779}
4780
4781static const struct dxil_value *
4782emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
4783{
4784   const struct dxil_func *func = dxil_get_function(&ctx->mod,
4785      params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
4786   if (!func)
4787      return false;
4788
4789   const struct dxil_value *args[] = {
4790      dxil_module_get_int32_const(&ctx->mod, params->cmp ?
4791         DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
4792      params->tex,
4793      params->sampler,
4794      params->coord[0],
4795      params->coord[1],
4796      params->coord[2],
4797      params->coord[3],
4798      params->offset[0],
4799      params->offset[1],
4800      dxil_module_get_int32_const(&ctx->mod, component),
4801      params->cmp
4802   };
4803
4804   return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
4805}
4806
4807static bool
4808emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
4809{
4810   struct texop_parameters params;
4811   memset(&params, 0, sizeof(struct texop_parameters));
4812   if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
4813      params.tex = ctx->srv_handles[instr->texture_index];
4814      params.sampler = ctx->sampler_handles[instr->sampler_index];
4815   }
4816
4817   const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
4818   const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
4819   const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
4820   const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
4821
4822   unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
4823   params.overload = get_overload(instr->dest_type, 32);
4824
4825   for (unsigned i = 0; i < instr->num_srcs; i++) {
4826      nir_alu_type type = nir_tex_instr_src_type(instr, i);
4827
4828      switch (instr->src[i].src_type) {
4829      case nir_tex_src_coord:
4830         coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
4831                                      &instr->src[i], type);
4832         if (!coord_components)
4833            return false;
4834         break;
4835
4836      case nir_tex_src_offset:
4837         offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
4838                                       &instr->src[i],  nir_type_int);
4839         if (!offset_components)
4840            return false;
4841         break;
4842
4843      case nir_tex_src_bias:
4844         assert(instr->op == nir_texop_txb);
4845         assert(nir_src_num_components(instr->src[i].src) == 1);
4846         params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4847         if (!params.bias)
4848            return false;
4849         break;
4850
4851      case nir_tex_src_lod:
4852         assert(nir_src_num_components(instr->src[i].src) == 1);
4853         if (instr->op == nir_texop_txf_ms) {
4854            assert(nir_src_as_int(instr->src[i].src) == 0);
4855            break;
4856         }
4857
4858         /* Buffers don't have a LOD */
4859         if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
4860            params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
4861         else
4862            params.lod_or_sample = int_undef;
4863         if (!params.lod_or_sample)
4864            return false;
4865         break;
4866
4867      case nir_tex_src_min_lod:
4868         assert(nir_src_num_components(instr->src[i].src) == 1);
4869         params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
4870         if (!params.min_lod)
4871            return false;
4872         break;
4873
4874      case nir_tex_src_comparator:
4875         assert(nir_src_num_components(instr->src[i].src) == 1);
4876         params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4877         if (!params.cmp)
4878            return false;
4879         break;
4880
4881      case nir_tex_src_ddx:
4882         dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
4883                                   &instr->src[i], nir_type_float);
4884         if (!dx_components)
4885            return false;
4886         break;
4887
4888      case nir_tex_src_ddy:
4889         dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
4890                                   &instr->src[i], nir_type_float);
4891         if (!dy_components)
4892            return false;
4893         break;
4894
4895      case nir_tex_src_ms_index:
4896         params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
4897         if (!params.lod_or_sample)
4898            return false;
4899         break;
4900
4901      case nir_tex_src_texture_deref:
4902         assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
4903         params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4904         break;
4905
4906      case nir_tex_src_sampler_deref:
4907         assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
4908         params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4909         break;
4910
4911      case nir_tex_src_texture_offset:
4912         params.tex = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SRV,
4913            get_resource_id(ctx, DXIL_RESOURCE_CLASS_SRV, 0, instr->texture_index),
4914            dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4915               get_src_ssa(ctx, instr->src[i].src.ssa, 0),
4916               dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
4917            instr->texture_non_uniform);
4918         break;
4919
4920      case nir_tex_src_sampler_offset:
4921         if (nir_tex_instr_need_sampler(instr)) {
4922            params.sampler = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
4923               get_resource_id(ctx, DXIL_RESOURCE_CLASS_SAMPLER, 0, instr->sampler_index),
4924               dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4925                  get_src_ssa(ctx, instr->src[i].src.ssa, 0),
4926                  dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
4927               instr->sampler_non_uniform);
4928         }
4929         break;
4930
4931      case nir_tex_src_projector:
4932         unreachable("Texture projector should have been lowered");
4933
4934      default:
4935         fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
4936         unreachable("unknown texture source");
4937      }
4938   }
4939
4940   assert(params.tex != NULL);
4941   assert(instr->op == nir_texop_txf ||
4942          instr->op == nir_texop_txf_ms ||
4943          nir_tex_instr_is_query(instr) ||
4944          params.sampler != NULL);
4945
4946   PAD_SRC(ctx, params.coord, coord_components, float_undef);
4947   PAD_SRC(ctx, params.offset, offset_components, int_undef);
4948   if (!params.min_lod) params.min_lod = float_undef;
4949
4950   const struct dxil_value *sample = NULL;
4951   switch (instr->op) {
4952   case nir_texop_txb:
4953      sample = emit_sample_bias(ctx, &params);
4954      break;
4955
4956   case nir_texop_tex:
4957      if (params.cmp != NULL) {
4958         sample = emit_sample_cmp(ctx, &params);
4959         break;
4960      } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
4961         sample = emit_sample(ctx, &params);
4962         break;
4963      }
4964      params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
4965      FALLTHROUGH;
4966   case nir_texop_txl:
4967      sample = emit_sample_level(ctx, &params);
4968      break;
4969
4970   case nir_texop_txd:
4971      PAD_SRC(ctx, params.dx, dx_components, float_undef);
4972      PAD_SRC(ctx, params.dy, dy_components,float_undef);
4973      sample = emit_sample_grad(ctx, &params);
4974      break;
4975
4976   case nir_texop_txf:
4977   case nir_texop_txf_ms:
4978      if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4979         params.coord[1] = int_undef;
4980         sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
4981      } else {
4982         PAD_SRC(ctx, params.coord, coord_components, int_undef);
4983         sample = emit_texel_fetch(ctx, &params);
4984      }
4985      break;
4986
4987   case nir_texop_txs:
4988      sample = emit_texture_size(ctx, &params);
4989      break;
4990
4991   case nir_texop_tg4:
4992      sample = emit_texture_gather(ctx, &params, instr->component);
4993      break;
4994
4995   case nir_texop_lod:
4996      sample = emit_texture_lod(ctx, &params, true);
4997      store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type));
4998      sample = emit_texture_lod(ctx, &params, false);
4999      store_dest(ctx, &instr->dest, 1, sample, nir_alu_type_get_base_type(instr->dest_type));
5000      return true;
5001
5002   case nir_texop_query_levels:
5003      params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5004      sample = emit_texture_size(ctx, &params);
5005      const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5006      store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type));
5007      return true;
5008
5009   default:
5010      fprintf(stderr, "texture op: %d\n", instr->op);
5011      unreachable("unknown texture op");
5012   }
5013
5014   if (!sample)
5015      return false;
5016
5017   for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
5018      const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5019      store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type));
5020   }
5021
5022   return true;
5023}
5024
5025static bool
5026emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef)
5027{
5028   for (unsigned i = 0; i < undef->def.num_components; ++i)
5029      store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5030   return true;
5031}
5032
5033static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5034{
5035   switch (instr->type) {
5036   case nir_instr_type_alu:
5037      return emit_alu(ctx, nir_instr_as_alu(instr));
5038   case nir_instr_type_intrinsic:
5039      return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5040   case nir_instr_type_load_const:
5041      return emit_load_const(ctx, nir_instr_as_load_const(instr));
5042   case nir_instr_type_deref:
5043      return emit_deref(ctx, nir_instr_as_deref(instr));
5044   case nir_instr_type_jump:
5045      return emit_jump(ctx, nir_instr_as_jump(instr));
5046   case nir_instr_type_phi:
5047      return emit_phi(ctx, nir_instr_as_phi(instr));
5048   case nir_instr_type_tex:
5049      return emit_tex(ctx, nir_instr_as_tex(instr));
5050   case nir_instr_type_ssa_undef:
5051      return emit_undefined(ctx, nir_instr_as_ssa_undef(instr));
5052   default:
5053      NIR_INSTR_UNSUPPORTED(instr);
5054      unreachable("Unimplemented instruction type");
5055      return false;
5056   }
5057}
5058
5059
5060static bool
5061emit_block(struct ntd_context *ctx, struct nir_block *block)
5062{
5063   assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5064   ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5065
5066   nir_foreach_instr(instr, block) {
5067      TRACE_CONVERSION(instr);
5068
5069      if (!emit_instr(ctx, instr))  {
5070         return false;
5071      }
5072   }
5073   return true;
5074}
5075
5076static bool
5077emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5078
5079static bool
5080emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5081{
5082   assert(nir_src_num_components(if_stmt->condition) == 1);
5083   const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5084                                           nir_type_bool);
5085   if (!cond)
5086      return false;
5087
5088   /* prepare blocks */
5089   nir_block *then_block = nir_if_first_then_block(if_stmt);
5090   assert(nir_if_last_then_block(if_stmt)->successors[0]);
5091   assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5092   int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5093
5094   nir_block *else_block = NULL;
5095   int else_succ = -1;
5096   if (!exec_list_is_empty(&if_stmt->else_list)) {
5097      else_block = nir_if_first_else_block(if_stmt);
5098      assert(nir_if_last_else_block(if_stmt)->successors[0]);
5099      assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5100      else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5101   }
5102
5103   if (!emit_cond_branch(ctx, cond, then_block->index,
5104                         else_block ? else_block->index : then_succ))
5105      return false;
5106
5107   /* handle then-block */
5108   if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5109       (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5110        !emit_branch(ctx, then_succ)))
5111      return false;
5112
5113   if (else_block) {
5114      /* handle else-block */
5115      if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5116          (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5117           !emit_branch(ctx, else_succ)))
5118         return false;
5119   }
5120
5121   return true;
5122}
5123
5124static bool
5125emit_loop(struct ntd_context *ctx, nir_loop *loop)
5126{
5127   nir_block *first_block = nir_loop_first_block(loop);
5128
5129   assert(nir_loop_last_block(loop)->successors[0]);
5130   assert(!nir_loop_last_block(loop)->successors[1]);
5131
5132   if (!emit_branch(ctx, first_block->index))
5133      return false;
5134
5135   if (!emit_cf_list(ctx, &loop->body))
5136      return false;
5137
5138   if (!emit_branch(ctx, first_block->index))
5139      return false;
5140
5141   return true;
5142}
5143
5144static bool
5145emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5146{
5147   foreach_list_typed(nir_cf_node, node, node, list) {
5148      switch (node->type) {
5149      case nir_cf_node_block:
5150         if (!emit_block(ctx, nir_cf_node_as_block(node)))
5151            return false;
5152         break;
5153
5154      case nir_cf_node_if:
5155         if (!emit_if(ctx, nir_cf_node_as_if(node)))
5156            return false;
5157         break;
5158
5159      case nir_cf_node_loop:
5160         if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5161            return false;
5162         break;
5163
5164      default:
5165         unreachable("unsupported cf-list node");
5166         break;
5167      }
5168   }
5169   return true;
5170}
5171
5172static void
5173insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5174{
5175   nir_foreach_variable_in_list(var, var_list) {
5176      if (var->data.binding > new_var->data.binding) {
5177         exec_node_insert_node_before(&var->node, &new_var->node);
5178         return;
5179      }
5180   }
5181   exec_list_push_tail(var_list, &new_var->node);
5182}
5183
5184
5185static void
5186sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5187{
5188   struct exec_list new_list;
5189   exec_list_make_empty(&new_list);
5190
5191   nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5192      exec_node_remove(&var->node);
5193      const struct glsl_type *type = glsl_without_array(var->type);
5194      if (!glsl_type_is_struct(type))
5195         insert_sorted_by_binding(&new_list, var);
5196   }
5197   exec_list_append(&s->variables, &new_list);
5198}
5199
5200static void
5201prepare_phi_values(struct ntd_context *ctx, nir_function_impl *impl)
5202{
5203   /* PHI nodes are difficult to get right when tracking the types:
5204    * Since the incoming sources are linked to blocks, we can't bitcast
5205    * on the fly while loading. So scan the shader and insert a typed dummy
5206    * value for each phi source, and when storing we convert if the incoming
5207    * value has a different type then the one expected by the phi node.
5208    * We choose int as default, because it supports more bit sizes.
5209    */
5210   nir_foreach_block(block, impl) {
5211      nir_foreach_instr(instr, block) {
5212         if (instr->type == nir_instr_type_phi) {
5213            nir_phi_instr *ir = nir_instr_as_phi(instr);
5214            unsigned bitsize = nir_dest_bit_size(ir->dest);
5215            const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize);
5216            nir_foreach_phi_src(src, ir) {
5217               for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i)
5218                  store_ssa_def(ctx, src->src.ssa, i, dummy);
5219            }
5220         }
5221      }
5222   }
5223}
5224
5225static bool
5226emit_cbvs(struct ntd_context *ctx)
5227{
5228   if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5229      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5230         if (!emit_ubo_var(ctx, var))
5231            return false;
5232      }
5233   } else {
5234      if (ctx->shader->info.num_ubos) {
5235         const unsigned ubo_size = 16384 /*4096 vec4's*/;
5236         bool has_ubo0 = !ctx->opts->no_ubo0;
5237         bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5238         unsigned ubo1_array_size = ctx->shader->info.num_ubos -
5239            (has_state_vars ? 2 : 1);
5240
5241         if (has_ubo0 &&
5242             !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5243            return false;
5244         if (ubo1_array_size &&
5245             !emit_cbv(ctx, 1, 0, ubo_size, ubo1_array_size, "__ubos"))
5246            return false;
5247         if (has_state_vars &&
5248             !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5249            return false;
5250      }
5251   }
5252
5253   return true;
5254}
5255
5256static bool
5257emit_scratch(struct ntd_context *ctx)
5258{
5259   if (ctx->shader->scratch_size) {
5260      /*
5261       * We always allocate an u32 array, no matter the actual variable types.
5262       * According to the DXIL spec, the minimum load/store granularity is
5263       * 32-bit, anything smaller requires using a read-extract/read-write-modify
5264       * approach.
5265       */
5266      unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t));
5267      const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);
5268      const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));
5269      if (!int32 || !array_length)
5270         return false;
5271
5272      const struct dxil_type *type = dxil_module_get_array_type(
5273         &ctx->mod, int32, size / sizeof(uint32_t));
5274      if (!type)
5275         return false;
5276
5277      ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);
5278      if (!ctx->scratchvars)
5279         return false;
5280   }
5281
5282   return true;
5283}
5284
5285/* The validator complains if we don't have ops that reference a global variable. */
5286static bool
5287shader_has_shared_ops(struct nir_shader *s)
5288{
5289   nir_foreach_function(func, s) {
5290      if (!func->impl)
5291         continue;
5292      nir_foreach_block(block, func->impl) {
5293         nir_foreach_instr(instr, block) {
5294            if (instr->type != nir_instr_type_intrinsic)
5295               continue;
5296            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
5297            switch (intrin->intrinsic) {
5298            case nir_intrinsic_load_shared_dxil:
5299            case nir_intrinsic_store_shared_dxil:
5300            case nir_intrinsic_shared_atomic_add_dxil:
5301            case nir_intrinsic_shared_atomic_and_dxil:
5302            case nir_intrinsic_shared_atomic_comp_swap_dxil:
5303            case nir_intrinsic_shared_atomic_exchange_dxil:
5304            case nir_intrinsic_shared_atomic_imax_dxil:
5305            case nir_intrinsic_shared_atomic_imin_dxil:
5306            case nir_intrinsic_shared_atomic_or_dxil:
5307            case nir_intrinsic_shared_atomic_umax_dxil:
5308            case nir_intrinsic_shared_atomic_umin_dxil:
5309            case nir_intrinsic_shared_atomic_xor_dxil:
5310               return true;
5311            default: break;
5312            }
5313         }
5314      }
5315   }
5316   return false;
5317}
5318
5319static bool
5320emit_function(struct ntd_context *ctx, nir_function *func)
5321{
5322   assert(func->num_params == 0);
5323   nir_function_impl *impl = func->impl;
5324   if (!impl)
5325      return true;
5326
5327   nir_metadata_require(impl, nir_metadata_block_index);
5328
5329   const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5330   const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5331   struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks);
5332   if (!func_def)
5333      return false;
5334
5335   if (func->is_entrypoint)
5336      ctx->main_func_def = func_def;
5337   else if (func == ctx->tess_ctrl_patch_constant_func)
5338      ctx->tess_ctrl_patch_constant_func_def = func_def;
5339
5340   ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5341   if (!ctx->defs)
5342      return false;
5343   ctx->num_defs = impl->ssa_alloc;
5344
5345   ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5346   if (!ctx->phis)
5347      return false;
5348
5349   prepare_phi_values(ctx, impl);
5350
5351   if (!emit_scratch(ctx))
5352      return false;
5353
5354   if (!emit_static_indexing_handles(ctx))
5355      return false;
5356
5357   if (!emit_cf_list(ctx, &impl->body))
5358      return false;
5359
5360   hash_table_foreach(ctx->phis, entry) {
5361      if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
5362                     (struct phi_block *)entry->data))
5363         return false;
5364   }
5365
5366   if (!dxil_emit_ret_void(&ctx->mod))
5367      return false;
5368
5369   ralloc_free(ctx->defs);
5370   ctx->defs = NULL;
5371   _mesa_hash_table_destroy(ctx->phis, NULL);
5372   return true;
5373}
5374
5375static bool
5376emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
5377{
5378   /* The validator forces us to emit resources in a specific order:
5379    * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
5380    * stale struct uniforms, they are lowered but might not have been removed */
5381   sort_uniforms_by_binding_and_remove_structs(ctx->shader);
5382
5383   /* CBVs */
5384   if (!emit_cbvs(ctx))
5385      return false;
5386
5387   /* Samplers */
5388   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
5389      unsigned count = glsl_type_get_sampler_count(var->type);
5390      assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
5391      if (count > 0 && !emit_sampler(ctx, var, count))
5392         return false;
5393   }
5394
5395   /* SRVs */
5396   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
5397      if (glsl_type_is_texture(glsl_without_array(var->type)) &&
5398          !emit_srv(ctx, var, glsl_type_get_texture_count(var->type)))
5399         return false;
5400   }
5401
5402   if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5403      nir_foreach_image_variable(var, ctx->shader) {
5404         if ((var->data.access & ACCESS_NON_WRITEABLE) &&
5405             !emit_srv(ctx, var, glsl_type_get_image_count(var->type)))
5406            return false;
5407      }
5408   }
5409
5410   /* Handle read-only SSBOs as SRVs */
5411   if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5412      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
5413         if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
5414            unsigned count = 1;
5415            if (glsl_type_is_array(var->type))
5416               count = glsl_get_length(var->type);
5417            if (!emit_srv(ctx, var, count))
5418               return false;
5419         }
5420      }
5421   }
5422
5423   if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) {
5424      const struct dxil_type *type;
5425      unsigned size;
5426
5427     /*
5428      * We always allocate an u32 array, no matter the actual variable types.
5429      * According to the DXIL spec, the minimum load/store granularity is
5430      * 32-bit, anything smaller requires using a read-extract/read-write-modify
5431      * approach. Non-atomic 64-bit accesses are allowed, but the
5432      * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))
5433      * sequences don't seem to be accepted by the DXIL validator when the
5434      * pointer is in the groupshared address space, making the 32-bit -> 64-bit
5435      * pointer cast impossible.
5436      */
5437      size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t));
5438      type = dxil_module_get_array_type(&ctx->mod,
5439                                        dxil_module_get_int_type(&ctx->mod, 32),
5440                                        size / sizeof(uint32_t));
5441      ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,
5442                                                DXIL_AS_GROUPSHARED,
5443                                                ffs(sizeof(uint64_t)),
5444                                                NULL);
5445   }
5446
5447   /* UAVs */
5448   if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
5449      if (!emit_globals(ctx, opts->num_kernel_globals))
5450         return false;
5451
5452      ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5453      if (!ctx->consts)
5454         return false;
5455      if (!emit_global_consts(ctx))
5456         return false;
5457   } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5458      /* Handle read/write SSBOs as UAVs */
5459      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
5460         if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
5461            unsigned count = 1;
5462            if (glsl_type_is_array(var->type))
5463               count = glsl_get_length(var->type);
5464            if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
5465                        count, DXIL_COMP_TYPE_INVALID,
5466                        DXIL_RESOURCE_KIND_RAW_BUFFER, var->name))
5467               return false;
5468
5469         }
5470      }
5471   } else {
5472      for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
5473         char name[64];
5474         snprintf(name, sizeof(name), "__ssbo%d", i);
5475         if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID,
5476                       DXIL_RESOURCE_KIND_RAW_BUFFER, name))
5477            return false;
5478      }
5479      /* To work around a WARP bug, bind these descriptors a second time in descriptor
5480       * space 2. Space 0 will be used for static indexing, while space 2 will be used
5481       * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
5482       * space 2 will be a single array.
5483       */
5484      if (ctx->shader->info.num_ssbos &&
5485          !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID,
5486                    DXIL_RESOURCE_KIND_RAW_BUFFER, "__ssbo_dynamic"))
5487         return false;
5488   }
5489
5490   nir_foreach_image_variable(var, ctx->shader) {
5491      if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN &&
5492          var && (var->data.access & ACCESS_NON_WRITEABLE))
5493         continue; // already handled in SRV
5494
5495      if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
5496         return false;
5497   }
5498
5499   ctx->mod.info.has_per_sample_input =
5500      BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
5501   if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
5502      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
5503         if (var->data.sample) {
5504            ctx->mod.info.has_per_sample_input = true;
5505            break;
5506         }
5507      }
5508   }
5509
5510   unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
5511      ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
5512   preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
5513
5514   nir_foreach_function(func, ctx->shader) {
5515      if (!emit_function(ctx, func))
5516         return false;
5517   }
5518
5519   if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
5520      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
5521         if (var->data.location == FRAG_RESULT_STENCIL) {
5522            ctx->mod.feats.stencil_ref = true;
5523         }
5524      }
5525   } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
5526              ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5527      if (ctx->shader->info.outputs_written &
5528          (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
5529         ctx->mod.feats.array_layer_from_vs_or_ds = true;
5530   }
5531
5532   if (ctx->mod.feats.native_low_precision)
5533      ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2);
5534
5535   return emit_metadata(ctx) &&
5536          dxil_emit_module(&ctx->mod);
5537}
5538
5539static unsigned int
5540get_dxil_shader_kind(struct nir_shader *s)
5541{
5542   switch (s->info.stage) {
5543   case MESA_SHADER_VERTEX:
5544      return DXIL_VERTEX_SHADER;
5545   case MESA_SHADER_TESS_CTRL:
5546      return DXIL_HULL_SHADER;
5547   case MESA_SHADER_TESS_EVAL:
5548      return DXIL_DOMAIN_SHADER;
5549   case MESA_SHADER_GEOMETRY:
5550      return DXIL_GEOMETRY_SHADER;
5551   case MESA_SHADER_FRAGMENT:
5552      return DXIL_PIXEL_SHADER;
5553   case MESA_SHADER_KERNEL:
5554   case MESA_SHADER_COMPUTE:
5555      return DXIL_COMPUTE_SHADER;
5556   default:
5557      unreachable("unknown shader stage in nir_to_dxil");
5558      return DXIL_COMPUTE_SHADER;
5559   }
5560}
5561
5562static unsigned
5563lower_bit_size_callback(const nir_instr* instr, void *data)
5564{
5565   if (instr->type != nir_instr_type_alu)
5566      return 0;
5567   const nir_alu_instr *alu = nir_instr_as_alu(instr);
5568
5569   if (nir_op_infos[alu->op].is_conversion)
5570      return 0;
5571
5572   unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
5573   const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
5574   unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
5575
5576   unsigned ret = 0;
5577   for (unsigned i = 0; i < num_inputs; i++) {
5578      unsigned bit_size = nir_src_bit_size(alu->src[i].src);
5579      if (bit_size != 1 && bit_size < min_bit_size)
5580         ret = min_bit_size;
5581   }
5582
5583   return ret;
5584}
5585
5586static void
5587optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
5588{
5589   bool progress;
5590   do {
5591      progress = false;
5592      NIR_PASS_V(s, nir_lower_vars_to_ssa);
5593      NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX);
5594      NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
5595      NIR_PASS(progress, s, nir_copy_prop);
5596      NIR_PASS(progress, s, nir_opt_copy_prop_vars);
5597      NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
5598      NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
5599      if (opts->lower_int16)
5600         NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
5601      NIR_PASS(progress, s, nir_opt_remove_phis);
5602      NIR_PASS(progress, s, nir_opt_dce);
5603      NIR_PASS(progress, s, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
5604      NIR_PASS(progress, s, nir_opt_dead_cf);
5605      NIR_PASS(progress, s, nir_opt_cse);
5606      NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
5607      NIR_PASS(progress, s, nir_opt_algebraic);
5608      NIR_PASS(progress, s, dxil_nir_lower_x2b);
5609      if (s->options->lower_int64_options)
5610         NIR_PASS(progress, s, nir_lower_int64);
5611      NIR_PASS(progress, s, nir_lower_alu);
5612      NIR_PASS(progress, s, nir_opt_constant_folding);
5613      NIR_PASS(progress, s, nir_opt_undef);
5614      NIR_PASS(progress, s, nir_lower_undef_to_zero);
5615      NIR_PASS(progress, s, nir_opt_deref);
5616      NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
5617      NIR_PASS(progress, s, nir_lower_64bit_phis);
5618      NIR_PASS_V(s, nir_lower_system_values);
5619   } while (progress);
5620
5621   do {
5622      progress = false;
5623      NIR_PASS(progress, s, nir_opt_algebraic_late);
5624   } while (progress);
5625}
5626
5627static
5628void dxil_fill_validation_state(struct ntd_context *ctx,
5629                                struct dxil_validation_state *state)
5630{
5631   unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
5632      sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
5633   state->num_resources = ctx->resources.size / resource_element_size;
5634   state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
5635   state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
5636   state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
5637   state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
5638   state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
5639   state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
5640
5641   switch (ctx->mod.shader_kind) {
5642   case DXIL_VERTEX_SHADER:
5643      state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
5644      break;
5645   case DXIL_PIXEL_SHADER:
5646      /* TODO: handle depth outputs */
5647      state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
5648      state->state.psv1.psv0.ps.sample_frequency =
5649         ctx->mod.info.has_per_sample_input;
5650      break;
5651   case DXIL_COMPUTE_SHADER:
5652      state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
5653      state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
5654      state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
5655      break;
5656   case DXIL_GEOMETRY_SHADER:
5657      state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
5658      state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
5659      state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
5660      state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
5661      state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
5662      break;
5663   case DXIL_HULL_SHADER:
5664      state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
5665      state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
5666      state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
5667      state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
5668      state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
5669      break;
5670   case DXIL_DOMAIN_SHADER:
5671      state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
5672      state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
5673      state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
5674      state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
5675      break;
5676   default:
5677      assert(0 && "Shader type not (yet) supported");
5678   }
5679}
5680
5681static nir_variable *
5682add_sysvalue(struct ntd_context *ctx,
5683              uint8_t value, char *name,
5684              int driver_location)
5685{
5686
5687   nir_variable *var = rzalloc(ctx->shader, nir_variable);
5688   if (!var)
5689      return NULL;
5690   var->data.driver_location = driver_location;
5691   var->data.location = value;
5692   var->type = glsl_uint_type();
5693   var->name = name;
5694   var->data.mode = nir_var_system_value;
5695   var->data.interpolation = INTERP_MODE_FLAT;
5696   return var;
5697}
5698
5699static bool
5700append_input_or_sysvalue(struct ntd_context *ctx,
5701                         int input_loc,  int sv_slot,
5702                         char *name, int driver_location)
5703{
5704   if (input_loc >= 0) {
5705      /* Check inputs whether a variable is available the corresponds
5706       * to the sysvalue */
5707      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
5708         if (var->data.location == input_loc) {
5709            ctx->system_value[sv_slot] = var;
5710            return true;
5711         }
5712      }
5713   }
5714
5715   ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
5716   if (!ctx->system_value[sv_slot])
5717      return false;
5718
5719   nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
5720   return true;
5721}
5722
5723struct sysvalue_name {
5724   gl_system_value value;
5725   int slot;
5726   char *name;
5727   gl_shader_stage only_in_shader;
5728} possible_sysvalues[] = {
5729   {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
5730   {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
5731   {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
5732   {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
5733   {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
5734};
5735
5736static bool
5737allocate_sysvalues(struct ntd_context *ctx)
5738{
5739   unsigned driver_location = 0;
5740   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
5741      driver_location++;
5742   nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
5743      driver_location++;
5744
5745   if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
5746       ctx->shader->info.inputs_read &&
5747       !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
5748      bool need_sample_id = true;
5749
5750      /* "var->data.sample = true" sometimes just mean, "I want per-sample
5751       * shading", which explains why we can end up with vars having flat
5752       * interpolation with the per-sample bit set. If there's only such
5753       * type of variables, we need to tell DXIL that we read SV_SampleIndex
5754       * to make DXIL validation happy.
5755       */
5756      nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
5757         if (!var->data.sample || var->data.interpolation != INTERP_MODE_FLAT) {
5758            need_sample_id = false;
5759            break;
5760         }
5761      }
5762
5763      if (need_sample_id)
5764         BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
5765   }
5766
5767   for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
5768      struct sysvalue_name *info = &possible_sysvalues[i];
5769      if (info->only_in_shader != MESA_SHADER_NONE &&
5770          info->only_in_shader != ctx->shader->info.stage)
5771         continue;
5772      if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
5773         if (!append_input_or_sysvalue(ctx, info->slot,
5774                                       info->value, info->name,
5775                                       driver_location++))
5776            return false;
5777      }
5778   }
5779   return true;
5780}
5781
5782static int
5783type_size_vec4(const struct glsl_type *type, bool bindless)
5784{
5785   return glsl_count_attribute_slots(type, false);
5786}
5787
5788static bool
5789dxil_validator_can_validate_shader_model(unsigned sm_minor, unsigned val_minor)
5790{
5791   /* Currently the validators are versioned such that val 1.x is needed for SM6.x */
5792   return sm_minor <= val_minor;
5793}
5794
5795static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
5796static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_7;
5797
5798bool
5799nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
5800            struct blob *blob)
5801{
5802   assert(opts);
5803   bool retval = true;
5804   debug_dxil = (int)debug_get_option_debug_dxil();
5805   blob_init(blob);
5806
5807   if (opts->shader_model_max < SHADER_MODEL_6_1) {
5808      debug_printf("D3D12: cannot support emitting shader model 6.0 or lower\n");
5809      return false;
5810   }
5811
5812   if (opts->validator_version_max != NO_DXIL_VALIDATION &&
5813       opts->validator_version_max < dxil_validator_min_capable_version) {
5814      debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
5815         opts->validator_version_max >> 16,
5816         opts->validator_version_max & 0xffff);
5817      return false;
5818   }
5819
5820   /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
5821    * Same if the validator is newer than we know how to write for.
5822    */
5823   uint32_t validator_version =
5824      opts->validator_version_max == NO_DXIL_VALIDATION ||
5825      opts->validator_version_max > dxil_validator_max_capable_version ?
5826      dxil_validator_max_capable_version : opts->validator_version_max;
5827
5828   struct ntd_context *ctx = calloc(1, sizeof(*ctx));
5829   if (!ctx)
5830      return false;
5831
5832   ctx->opts = opts;
5833   ctx->shader = s;
5834
5835   ctx->ralloc_ctx = ralloc_context(NULL);
5836   if (!ctx->ralloc_ctx) {
5837      retval = false;
5838      goto out;
5839   }
5840
5841   util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
5842   util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
5843   util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
5844   util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
5845   util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
5846   dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
5847   ctx->mod.shader_kind = get_dxil_shader_kind(s);
5848   ctx->mod.major_version = 6;
5849   ctx->mod.minor_version = 1;
5850   ctx->mod.major_validator = validator_version >> 16;
5851   ctx->mod.minor_validator = validator_version & 0xffff;
5852
5853   if (s->info.stage <= MESA_SHADER_FRAGMENT) {
5854      uint64_t in_mask =
5855         s->info.stage == MESA_SHADER_VERTEX ?
5856         0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
5857      uint64_t out_mask =
5858         s->info.stage == MESA_SHADER_FRAGMENT ?
5859         ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
5860         (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
5861
5862      NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
5863   }
5864
5865   NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
5866   NIR_PASS_V(s, nir_lower_frexp);
5867   NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
5868   NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
5869   NIR_PASS_V(s, dxil_nir_ensure_position_writes);
5870   NIR_PASS_V(s, nir_lower_pack);
5871   NIR_PASS_V(s, dxil_nir_lower_system_values);
5872   NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out);
5873
5874   if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
5875      NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
5876
5877   if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
5878       ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
5879      /* Make sure any derefs are gone after lower_io before updating tess level vars */
5880      NIR_PASS_V(s, nir_opt_dce);
5881      NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
5882   }
5883
5884   optimize_nir(s, opts);
5885
5886   NIR_PASS_V(s, nir_remove_dead_variables,
5887              nir_var_function_temp | nir_var_shader_temp, NULL);
5888
5889   if (!allocate_sysvalues(ctx))
5890      return false;
5891
5892   NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
5893   NIR_PASS_V(s, nir_opt_dce);
5894
5895   if (debug_dxil & DXIL_DEBUG_VERBOSE)
5896      nir_print_shader(s, stderr);
5897
5898   if (!emit_module(ctx, opts)) {
5899      debug_printf("D3D12: dxil_container_add_module failed\n");
5900      retval = false;
5901      goto out;
5902   }
5903
5904   assert(ctx->mod.major_version == 6 && ctx->mod.minor_version >= 1);
5905   if ((ctx->mod.major_version << 16 | ctx->mod.minor_version) > opts->shader_model_max) {
5906      debug_printf("D3D12: max shader model exceeded\n");
5907      retval = false;
5908      goto out;
5909   }
5910
5911   assert(ctx->mod.major_validator == 1);
5912   if (!dxil_validator_can_validate_shader_model(ctx->mod.minor_version, ctx->mod.minor_validator)) {
5913      debug_printf("D3D12: shader model exceeds max that can be validated\n");
5914      retval = false;
5915      goto out;
5916   }
5917
5918   if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
5919      struct dxil_dumper *dumper = dxil_dump_create();
5920      dxil_dump_module(dumper, &ctx->mod);
5921      fprintf(stderr, "\n");
5922      dxil_dump_buf_to_file(dumper, stderr);
5923      fprintf(stderr, "\n\n");
5924      dxil_dump_free(dumper);
5925   }
5926
5927   struct dxil_container container;
5928   dxil_container_init(&container);
5929   if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
5930      debug_printf("D3D12: dxil_container_add_features failed\n");
5931      retval = false;
5932      goto out;
5933   }
5934
5935   if (!dxil_container_add_io_signature(&container,
5936                                        DXIL_ISG1,
5937                                        ctx->mod.num_sig_inputs,
5938                                        ctx->mod.inputs,
5939                                        ctx->mod.minor_validator >= 7)) {
5940      debug_printf("D3D12: failed to write input signature\n");
5941      retval = false;
5942      goto out;
5943   }
5944
5945   if (!dxil_container_add_io_signature(&container,
5946                                        DXIL_OSG1,
5947                                        ctx->mod.num_sig_outputs,
5948                                        ctx->mod.outputs,
5949                                        ctx->mod.minor_validator >= 7)) {
5950      debug_printf("D3D12: failed to write output signature\n");
5951      retval = false;
5952      goto out;
5953   }
5954
5955   if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
5956        ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
5957       !dxil_container_add_io_signature(&container,
5958                                        DXIL_PSG1,
5959                                        ctx->mod.num_sig_patch_consts,
5960                                        ctx->mod.patch_consts,
5961                                        ctx->mod.minor_validator >= 7)) {
5962      debug_printf("D3D12: failed to write patch constant signature\n");
5963      retval = false;
5964      goto out;
5965   }
5966
5967   struct dxil_validation_state validation_state;
5968   memset(&validation_state, 0, sizeof(validation_state));
5969   dxil_fill_validation_state(ctx, &validation_state);
5970
5971   if (!dxil_container_add_state_validation(&container,&ctx->mod,
5972                                            &validation_state)) {
5973      debug_printf("D3D12: failed to write state-validation\n");
5974      retval = false;
5975      goto out;
5976   }
5977
5978   if (!dxil_container_add_module(&container, &ctx->mod)) {
5979      debug_printf("D3D12: failed to write module\n");
5980      retval = false;
5981      goto out;
5982   }
5983
5984   if (!dxil_container_write(&container, blob)) {
5985      debug_printf("D3D12: dxil_container_write failed\n");
5986      retval = false;
5987      goto out;
5988   }
5989   dxil_container_finish(&container);
5990
5991   if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
5992      static int shader_id = 0;
5993      char buffer[64];
5994      snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
5995               get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
5996      debug_printf("Try to write blob to %s\n", buffer);
5997      FILE *f = fopen(buffer, "wb");
5998      if (f) {
5999         fwrite(blob->data, 1, blob->size, f);
6000         fclose(f);
6001      }
6002   }
6003
6004out:
6005   dxil_module_release(&ctx->mod);
6006   ralloc_free(ctx->ralloc_ctx);
6007   free(ctx);
6008   return retval;
6009}
6010
6011enum dxil_sysvalue_type
6012nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
6013{
6014   switch (var->data.location) {
6015   case VARYING_SLOT_FACE:
6016      return DXIL_GENERATED_SYSVALUE;
6017   case VARYING_SLOT_POS:
6018   case VARYING_SLOT_PRIMITIVE_ID:
6019   case VARYING_SLOT_CLIP_DIST0:
6020   case VARYING_SLOT_CLIP_DIST1:
6021   case VARYING_SLOT_PSIZ:
6022   case VARYING_SLOT_TESS_LEVEL_INNER:
6023   case VARYING_SLOT_TESS_LEVEL_OUTER:
6024   case VARYING_SLOT_VIEWPORT:
6025   case VARYING_SLOT_LAYER:
6026      if (!((1ull << var->data.location) & other_stage_mask))
6027         return DXIL_SYSVALUE;
6028      FALLTHROUGH;
6029   default:
6030      return DXIL_NO_SYSVALUE;
6031   }
6032}
6033