1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#include "d3d12_nir_passes.h"
25#include "d3d12_compiler.h"
26#include "nir_builder.h"
27#include "nir_builtin_builder.h"
28#include "nir_deref.h"
29#include "nir_format_convert.h"
30#include "program/prog_instruction.h"
31#include "dxil_nir.h"
32
33/**
34 * Lower Y Flip:
35 *
36 * We can't do a Y flip simply by negating the viewport height,
37 * so we need to lower the flip into the NIR shader.
38 */
39
40nir_ssa_def *
41d3d12_get_state_var(nir_builder *b,
42                    enum d3d12_state_var var_enum,
43                    const char *var_name,
44                    const struct glsl_type *var_type,
45                    nir_variable **out_var)
46{
47   const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER, var_enum };
48   if (*out_var == NULL) {
49      nir_variable *var = nir_variable_create(b->shader,
50                                              nir_var_uniform,
51                                              var_type,
52                                              var_name);
53
54      var->num_state_slots = 1;
55      var->state_slots = ralloc_array(var, nir_state_slot, 1);
56      memcpy(var->state_slots[0].tokens, tokens,
57             sizeof(var->state_slots[0].tokens));
58      var->data.how_declared = nir_var_hidden;
59      b->shader->num_uniforms++;
60      *out_var = var;
61   }
62   return nir_load_var(b, *out_var);
63}
64
65static void
66lower_pos_write(nir_builder *b, struct nir_instr *instr, nir_variable **flip)
67{
68   if (instr->type != nir_instr_type_intrinsic)
69      return;
70
71   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
72   if (intr->intrinsic != nir_intrinsic_store_deref)
73      return;
74
75   nir_variable *var = nir_intrinsic_get_var(intr, 0);
76   if (var->data.mode != nir_var_shader_out ||
77       var->data.location != VARYING_SLOT_POS)
78      return;
79
80   b->cursor = nir_before_instr(&intr->instr);
81
82   nir_ssa_def *pos = nir_ssa_for_src(b, intr->src[1], 4);
83   nir_ssa_def *flip_y = d3d12_get_state_var(b, D3D12_STATE_VAR_Y_FLIP, "d3d12_FlipY",
84                                             glsl_float_type(), flip);
85   nir_ssa_def *def = nir_vec4(b,
86                               nir_channel(b, pos, 0),
87                               nir_fmul(b, nir_channel(b, pos, 1), flip_y),
88                               nir_channel(b, pos, 2),
89                               nir_channel(b, pos, 3));
90   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
91}
92
93void
94d3d12_lower_yflip(nir_shader *nir)
95{
96   nir_variable *flip = NULL;
97
98   if (nir->info.stage != MESA_SHADER_VERTEX &&
99       nir->info.stage != MESA_SHADER_TESS_EVAL &&
100       nir->info.stage != MESA_SHADER_GEOMETRY)
101      return;
102
103   nir_foreach_function(function, nir) {
104      if (function->impl) {
105         nir_builder b;
106         nir_builder_init(&b, function->impl);
107
108         nir_foreach_block(block, function->impl) {
109            nir_foreach_instr_safe(instr, block) {
110               lower_pos_write(&b, instr, &flip);
111            }
112         }
113
114         nir_metadata_preserve(function->impl, nir_metadata_block_index |
115                                               nir_metadata_dominance);
116      }
117   }
118}
119
120static void
121lower_load_face(nir_builder *b, struct nir_instr *instr, nir_variable *var)
122{
123   if (instr->type != nir_instr_type_intrinsic)
124      return;
125
126   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
127   if (intr->intrinsic != nir_intrinsic_load_front_face)
128      return;
129
130   b->cursor = nir_before_instr(&intr->instr);
131
132   nir_ssa_def *load = nir_load_var(b, var);
133
134   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
135   nir_instr_remove(instr);
136}
137
138void
139d3d12_forward_front_face(nir_shader *nir)
140{
141   assert(nir->info.stage == MESA_SHADER_FRAGMENT);
142
143   nir_variable *var = nir_variable_create(nir, nir_var_shader_in,
144                                           glsl_bool_type(),
145                                           "gl_FrontFacing");
146   var->data.location = VARYING_SLOT_VAR12;
147   var->data.interpolation = INTERP_MODE_FLAT;
148
149
150   nir_foreach_function(function, nir) {
151      if (function->impl) {
152         nir_builder b;
153         nir_builder_init(&b, function->impl);
154
155         nir_foreach_block(block, function->impl) {
156            nir_foreach_instr_safe(instr, block) {
157               lower_load_face(&b, instr, var);
158            }
159         }
160
161         nir_metadata_preserve(function->impl, nir_metadata_block_index |
162                                               nir_metadata_dominance);
163      }
164   }
165}
166
167static void
168lower_pos_read(nir_builder *b, struct nir_instr *instr,
169               nir_variable **depth_transform_var)
170{
171   if (instr->type != nir_instr_type_intrinsic)
172      return;
173
174   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
175   if (intr->intrinsic != nir_intrinsic_load_deref)
176      return;
177
178   nir_variable *var = nir_intrinsic_get_var(intr, 0);
179   if (var->data.mode != nir_var_shader_in ||
180       var->data.location != VARYING_SLOT_POS)
181      return;
182
183   b->cursor = nir_after_instr(instr);
184
185   nir_ssa_def *pos = nir_instr_ssa_def(instr);
186   nir_ssa_def *depth = nir_channel(b, pos, 2);
187
188   assert(depth_transform_var);
189   nir_ssa_def *depth_transform = d3d12_get_state_var(b, D3D12_STATE_VAR_DEPTH_TRANSFORM,
190                                                      "d3d12_DepthTransform",
191                                                      glsl_vec_type(2),
192                                                      depth_transform_var);
193   depth = nir_fmad(b, depth, nir_channel(b, depth_transform, 0),
194                              nir_channel(b, depth_transform, 1));
195
196   pos = nir_vector_insert_imm(b, pos, depth, 2);
197
198   assert(intr->dest.is_ssa);
199   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, pos,
200                                  pos->parent_instr);
201}
202
203void
204d3d12_lower_depth_range(nir_shader *nir)
205{
206   assert(nir->info.stage == MESA_SHADER_FRAGMENT);
207   nir_variable *depth_transform = NULL;
208   nir_foreach_function(function, nir) {
209      if (function->impl) {
210         nir_builder b;
211         nir_builder_init(&b, function->impl);
212
213         nir_foreach_block(block, function->impl) {
214            nir_foreach_instr_safe(instr, block) {
215               lower_pos_read(&b, instr, &depth_transform);
216            }
217         }
218
219         nir_metadata_preserve(function->impl, nir_metadata_block_index |
220                                               nir_metadata_dominance);
221      }
222   }
223}
224
225struct compute_state_vars {
226   nir_variable *num_workgroups;
227};
228
229static bool
230lower_compute_state_vars(nir_builder *b, nir_instr *instr, void *_state)
231{
232   if (instr->type != nir_instr_type_intrinsic)
233      return false;
234
235   b->cursor = nir_after_instr(instr);
236   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
237   struct compute_state_vars *vars = _state;
238   nir_ssa_def *result = NULL;
239   switch (intr->intrinsic) {
240   case nir_intrinsic_load_num_workgroups:
241      result = d3d12_get_state_var(b, D3D12_STATE_VAR_NUM_WORKGROUPS, "d3d12_NumWorkgroups",
242         glsl_vec_type(3), &vars->num_workgroups);
243      break;
244   default:
245      return false;
246   }
247
248   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
249   nir_instr_remove(instr);
250   return true;
251}
252
253bool
254d3d12_lower_compute_state_vars(nir_shader *nir)
255{
256   assert(nir->info.stage == MESA_SHADER_COMPUTE);
257   struct compute_state_vars vars = { 0 };
258   return nir_shader_instructions_pass(nir, lower_compute_state_vars,
259      nir_metadata_block_index | nir_metadata_dominance, &vars);
260}
261
262static bool
263is_color_output(nir_variable *var)
264{
265   return (var->data.mode == nir_var_shader_out &&
266           (var->data.location == FRAG_RESULT_COLOR ||
267            var->data.location >= FRAG_RESULT_DATA0));
268}
269
270static void
271lower_uint_color_write(nir_builder *b, struct nir_instr *instr, bool is_signed)
272{
273   const unsigned NUM_BITS = 8;
274   const unsigned bits[4] = { NUM_BITS, NUM_BITS, NUM_BITS, NUM_BITS };
275
276   if (instr->type != nir_instr_type_intrinsic)
277      return;
278
279   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
280   if (intr->intrinsic != nir_intrinsic_store_deref)
281      return;
282
283   nir_variable *var = nir_intrinsic_get_var(intr, 0);
284   if (!is_color_output(var))
285      return;
286
287   b->cursor = nir_before_instr(&intr->instr);
288
289   nir_ssa_def *col = nir_ssa_for_src(b, intr->src[1], intr->num_components);
290   nir_ssa_def *def = is_signed ? nir_format_float_to_snorm(b, col, bits) :
291                                  nir_format_float_to_unorm(b, col, bits);
292   if (is_signed)
293      def = nir_bcsel(b, nir_ilt(b, def, nir_imm_int(b, 0)),
294                      nir_iadd(b, def, nir_imm_int(b, 1 << NUM_BITS)),
295                      def);
296   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
297}
298
299void
300d3d12_lower_uint_cast(nir_shader *nir, bool is_signed)
301{
302   if (nir->info.stage != MESA_SHADER_FRAGMENT)
303      return;
304
305   nir_foreach_function(function, nir) {
306      if (function->impl) {
307         nir_builder b;
308         nir_builder_init(&b, function->impl);
309
310         nir_foreach_block(block, function->impl) {
311            nir_foreach_instr_safe(instr, block) {
312               lower_uint_color_write(&b, instr, is_signed);
313            }
314         }
315
316         nir_metadata_preserve(function->impl, nir_metadata_block_index |
317                                               nir_metadata_dominance);
318      }
319   }
320}
321
322static bool
323lower_load_draw_params(nir_builder *b, nir_instr *instr, void *draw_params)
324{
325   if (instr->type != nir_instr_type_intrinsic)
326      return false;
327
328   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
329
330   if (intr->intrinsic != nir_intrinsic_load_first_vertex &&
331       intr->intrinsic != nir_intrinsic_load_base_instance &&
332       intr->intrinsic != nir_intrinsic_load_draw_id &&
333       intr->intrinsic != nir_intrinsic_load_is_indexed_draw)
334      return false;
335
336   b->cursor = nir_before_instr(&intr->instr);
337
338   nir_ssa_def *load = d3d12_get_state_var(b, D3D12_STATE_VAR_DRAW_PARAMS, "d3d12_DrawParams",
339                                           glsl_uvec4_type(), draw_params);
340   unsigned channel = intr->intrinsic == nir_intrinsic_load_first_vertex ? 0 :
341      intr->intrinsic == nir_intrinsic_load_base_instance ? 1 :
342      intr->intrinsic == nir_intrinsic_load_draw_id ? 2 : 3;
343   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_channel(b, load, channel));
344   nir_instr_remove(instr);
345
346   return true;
347}
348
349bool
350d3d12_lower_load_draw_params(struct nir_shader *nir)
351{
352   nir_variable *draw_params = NULL;
353   if (nir->info.stage != MESA_SHADER_VERTEX)
354      return false;
355
356   return nir_shader_instructions_pass(nir, lower_load_draw_params,
357      nir_metadata_block_index | nir_metadata_dominance, &draw_params);
358}
359
360static bool
361lower_load_patch_vertices_in(nir_builder *b, nir_instr *instr, void *_state)
362{
363   if (instr->type != nir_instr_type_intrinsic)
364      return false;
365   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
366   if (intr->intrinsic != nir_intrinsic_load_patch_vertices_in)
367      return false;
368
369   b->cursor = nir_before_instr(&intr->instr);
370   nir_ssa_def *load = b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
371      d3d12_get_state_var(b, D3D12_STATE_VAR_PATCH_VERTICES_IN, "d3d12_FirstVertex", glsl_uint_type(), _state) :
372      nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
373   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
374   nir_instr_remove(instr);
375   return true;
376}
377
378bool
379d3d12_lower_load_patch_vertices_in(struct nir_shader *nir)
380{
381   nir_variable *var = NULL;
382
383   if (nir->info.stage != MESA_SHADER_TESS_CTRL &&
384       nir->info.stage != MESA_SHADER_TESS_EVAL)
385      return false;
386
387   return nir_shader_instructions_pass(nir, lower_load_patch_vertices_in,
388      nir_metadata_block_index | nir_metadata_dominance, &var);
389}
390
391struct invert_depth_state
392{
393   unsigned viewport_mask;
394   bool clip_halfz;
395   nir_ssa_def *viewport_index;
396   nir_instr *store_pos_instr;
397};
398
399static void
400invert_depth_impl(nir_builder *b, struct invert_depth_state *state)
401{
402   assert(state->store_pos_instr);
403
404   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(state->store_pos_instr);
405   if (state->viewport_index) {
406      /* Cursor is assigned before calling. Make sure that storing pos comes
407       * after computing the viewport.
408       */
409      nir_instr_move(b->cursor, &intr->instr);
410   }
411
412   b->cursor = nir_before_instr(&intr->instr);
413
414   nir_ssa_def *pos = nir_ssa_for_src(b, intr->src[1], 4);
415
416   if (state->viewport_index) {
417      nir_push_if(b, nir_test_mask(b, nir_ishl(b, nir_imm_int(b, 1), state->viewport_index), state->viewport_mask));
418   }
419   nir_ssa_def *old_depth = nir_channel(b, pos, 2);
420   nir_ssa_def *new_depth = nir_fneg(b, old_depth);
421   if (state->clip_halfz)
422      new_depth = nir_fadd_imm(b, new_depth, 1.0);
423   nir_ssa_def *def = nir_vec4(b,
424                               nir_channel(b, pos, 0),
425                               nir_channel(b, pos, 1),
426                               new_depth,
427                               nir_channel(b, pos, 3));
428   if (state->viewport_index) {
429      nir_pop_if(b, NULL);
430      def = nir_if_phi(b, def, pos);
431   }
432   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
433
434   state->viewport_index = NULL;
435   state->store_pos_instr = NULL;
436}
437
438static void
439invert_depth_instr(nir_builder *b, struct nir_instr *instr, struct invert_depth_state *state)
440{
441   if (instr->type != nir_instr_type_intrinsic)
442      return;
443
444   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
445   if (intr->intrinsic == nir_intrinsic_store_deref) {
446      nir_variable *var = nir_intrinsic_get_var(intr, 0);
447      if (var->data.mode != nir_var_shader_out)
448         return;
449
450      if (var->data.location == VARYING_SLOT_VIEWPORT)
451         state->viewport_index = intr->src[1].ssa;
452      if (var->data.location == VARYING_SLOT_POS)
453         state->store_pos_instr = instr;
454   } else if (intr->intrinsic == nir_intrinsic_emit_vertex) {
455      b->cursor = nir_before_instr(instr);
456      invert_depth_impl(b, state);
457   }
458}
459
460/* In OpenGL the windows space depth value z_w is evaluated according to "s * z_d + b"
461 * with  "s = (far - near) / 2" (depth clip:minus_one_to_one) [OpenGL 3.3, 2.13.1].
462 * When we switch the far and near value to satisfy DirectX requirements we have
463 * to compensate by inverting "z_d' = -z_d" with this lowering pass.
464 * When depth clip is set zero_to_one, we compensate with "z_d' = 1.0f - z_d" instead.
465 */
466void
467d3d12_nir_invert_depth(nir_shader *shader, unsigned viewport_mask, bool clip_halfz)
468{
469   if (shader->info.stage != MESA_SHADER_VERTEX &&
470       shader->info.stage != MESA_SHADER_TESS_EVAL &&
471       shader->info.stage != MESA_SHADER_GEOMETRY)
472      return;
473
474   struct invert_depth_state state = { viewport_mask, clip_halfz };
475   nir_foreach_function(function, shader) {
476      if (function->impl) {
477         nir_builder b;
478         nir_builder_init(&b, function->impl);
479
480         nir_foreach_block(block, function->impl) {
481            nir_foreach_instr_safe(instr, block) {
482               invert_depth_instr(&b, instr, &state);
483            }
484         }
485
486         if (state.store_pos_instr) {
487            b.cursor = nir_after_block(function->impl->end_block);
488            invert_depth_impl(&b, &state);
489         }
490
491         nir_metadata_preserve(function->impl, nir_metadata_block_index |
492                                               nir_metadata_dominance);
493      }
494   }
495}
496
497
498/**
499 * Lower State Vars:
500 *
501 * All uniforms related to internal D3D12 variables are
502 * condensed into a UBO that is appended at the end of the
503 * current ones.
504 */
505
506static unsigned
507get_state_var_offset(struct d3d12_shader *shader, enum d3d12_state_var var)
508{
509   for (unsigned i = 0; i < shader->num_state_vars; ++i) {
510      if (shader->state_vars[i].var == var)
511         return shader->state_vars[i].offset;
512   }
513
514   unsigned offset = shader->state_vars_size;
515   shader->state_vars[shader->num_state_vars].offset = offset;
516   shader->state_vars[shader->num_state_vars].var = var;
517   shader->state_vars_size += 4; /* Use 4-words slots no matter the variable size */
518   shader->num_state_vars++;
519
520   return offset;
521}
522
523static bool
524lower_instr(nir_intrinsic_instr *instr, nir_builder *b,
525            struct d3d12_shader *shader, unsigned binding)
526{
527   nir_variable *variable = NULL;
528   nir_deref_instr *deref = NULL;
529
530   b->cursor = nir_before_instr(&instr->instr);
531
532   if (instr->intrinsic == nir_intrinsic_load_uniform) {
533      nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
534         if (var->data.driver_location == nir_intrinsic_base(instr)) {
535            variable = var;
536            break;
537         }
538      }
539   } else if (instr->intrinsic == nir_intrinsic_load_deref) {
540      deref = nir_src_as_deref(instr->src[0]);
541      variable = nir_intrinsic_get_var(instr, 0);
542   }
543
544   if (variable == NULL ||
545       variable->num_state_slots != 1 ||
546       variable->state_slots[0].tokens[0] != STATE_INTERNAL_DRIVER)
547      return false;
548
549   enum d3d12_state_var var = variable->state_slots[0].tokens[1];
550   nir_ssa_def *ubo_idx = nir_imm_int(b, binding);
551   nir_ssa_def *ubo_offset =  nir_imm_int(b, get_state_var_offset(shader, var) * 4);
552   nir_ssa_def *load =
553      nir_load_ubo(b, instr->num_components, instr->dest.ssa.bit_size,
554                   ubo_idx, ubo_offset,
555                   .align_mul = instr->dest.ssa.bit_size / 8,
556                   .align_offset = 0,
557                   .range_base = 0,
558                   .range = ~0,
559                   );
560
561   nir_ssa_def_rewrite_uses(&instr->dest.ssa, load);
562
563   /* Remove the old load_* instruction and any parent derefs */
564   nir_instr_remove(&instr->instr);
565   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
566      /* If anyone is using this deref, leave it alone */
567      assert(d->dest.is_ssa);
568      if (!list_is_empty(&d->dest.ssa.uses))
569         break;
570
571      nir_instr_remove(&d->instr);
572   }
573
574   return true;
575}
576
577bool
578d3d12_lower_state_vars(nir_shader *nir, struct d3d12_shader *shader)
579{
580   bool progress = false;
581
582   /* The state var UBO is added after all the other UBOs if it already
583    * exists it will be replaced by using the same binding.
584    * In the event there are no other UBO's, use binding slot 1 to
585    * be consistent with other non-default UBO's */
586   unsigned binding = MAX2(nir->info.num_ubos, 1);
587
588   nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
589      if (var->num_state_slots == 1 &&
590          var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
591         if (var->data.mode == nir_var_mem_ubo) {
592            binding = var->data.binding;
593         }
594      }
595   }
596
597   nir_foreach_function(function, nir) {
598      if (function->impl) {
599         nir_builder builder;
600         nir_builder_init(&builder, function->impl);
601         nir_foreach_block(block, function->impl) {
602            nir_foreach_instr_safe(instr, block) {
603               if (instr->type == nir_instr_type_intrinsic)
604                  progress |= lower_instr(nir_instr_as_intrinsic(instr),
605                                          &builder,
606                                          shader,
607                                          binding);
608            }
609         }
610
611         nir_metadata_preserve(function->impl, nir_metadata_block_index |
612                                               nir_metadata_dominance);
613      }
614   }
615
616   if (progress) {
617      assert(shader->num_state_vars > 0);
618
619      shader->state_vars_used = true;
620
621      /* Remove state variables */
622      nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
623         if (var->num_state_slots == 1 &&
624             var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
625            exec_node_remove(&var->node);
626            nir->num_uniforms--;
627         }
628      }
629
630      const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER };
631      const struct glsl_type *type = glsl_array_type(glsl_vec4_type(),
632                                                     shader->state_vars_size / 4, 0);
633      nir_variable *ubo = nir_variable_create(nir, nir_var_mem_ubo, type,
634                                                  "d3d12_state_vars");
635      if (binding >= nir->info.num_ubos)
636         nir->info.num_ubos = binding + 1;
637      ubo->data.binding = binding;
638      ubo->num_state_slots = 1;
639      ubo->state_slots = ralloc_array(ubo, nir_state_slot, 1);
640      memcpy(ubo->state_slots[0].tokens, tokens,
641              sizeof(ubo->state_slots[0].tokens));
642
643      struct glsl_struct_field field = {
644          .type = type,
645          .name = "data",
646          .location = -1,
647      };
648      ubo->interface_type =
649              glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
650                                  false, "__d3d12_state_vars_interface");
651   }
652
653   return progress;
654}
655
656void
657d3d12_add_missing_dual_src_target(struct nir_shader *s,
658                                  unsigned missing_mask)
659{
660   assert(missing_mask != 0);
661   nir_builder b;
662   nir_function_impl *impl = nir_shader_get_entrypoint(s);
663   nir_builder_init(&b, impl);
664   b.cursor = nir_before_cf_list(&impl->body);
665
666   nir_ssa_def *zero = nir_imm_zero(&b, 4, 32);
667   for (unsigned i = 0; i < 2; ++i) {
668
669      if (!(missing_mask & (1u << i)))
670         continue;
671
672      const char *name = i == 0 ? "gl_FragData[0]" :
673                                  "gl_SecondaryFragDataEXT[0]";
674      nir_variable *out = nir_variable_create(s, nir_var_shader_out,
675                                              glsl_vec4_type(), name);
676      out->data.location = FRAG_RESULT_DATA0;
677      out->data.driver_location = i;
678      out->data.index = i;
679
680      nir_store_var(&b, out, zero, 0xf);
681   }
682   nir_metadata_preserve(impl, nir_metadata_block_index |
683                               nir_metadata_dominance);
684}
685
686static bool
687lower_load_ubo_packed_filter(const nir_instr *instr,
688                             UNUSED const void *_options) {
689   if (instr->type != nir_instr_type_intrinsic)
690      return false;
691
692   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
693
694   return intr->intrinsic == nir_intrinsic_load_ubo;
695}
696
697static nir_ssa_def *
698lower_load_ubo_packed_impl(nir_builder *b, nir_instr *instr,
699                              UNUSED void *_options) {
700   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
701
702   nir_ssa_def *buffer = intr->src[0].ssa;
703   nir_ssa_def *offset = intr->src[1].ssa;
704
705   nir_ssa_def *result =
706      build_load_ubo_dxil(b, buffer,
707                          offset,
708                          nir_dest_num_components(intr->dest),
709                          nir_dest_bit_size(intr->dest));
710   return result;
711}
712
713bool
714nir_lower_packed_ubo_loads(nir_shader *nir) {
715   return nir_shader_lower_instructions(nir,
716                                        lower_load_ubo_packed_filter,
717                                        lower_load_ubo_packed_impl,
718                                        NULL);
719}
720
721void
722d3d12_lower_primitive_id(nir_shader *shader)
723{
724   nir_builder b;
725   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
726   nir_ssa_def *primitive_id;
727   nir_builder_init(&b, impl);
728
729   nir_variable *primitive_id_var = nir_variable_create(shader, nir_var_shader_out,
730                                                        glsl_uint_type(), "primitive_id");
731   primitive_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
732   primitive_id_var->data.interpolation = INTERP_MODE_FLAT;
733
734   nir_foreach_block(block, impl) {
735      b.cursor = nir_before_block(block);
736      primitive_id = nir_load_primitive_id(&b);
737
738      nir_foreach_instr_safe(instr, block) {
739         if (instr->type != nir_instr_type_intrinsic ||
740             nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_emit_vertex)
741            continue;
742
743         b.cursor = nir_before_instr(instr);
744         nir_store_var(&b, primitive_id_var, primitive_id, 0x1);
745      }
746   }
747
748   nir_metadata_preserve(impl, nir_metadata_none);
749}
750
751static void
752lower_triangle_strip_store(nir_builder *b, nir_intrinsic_instr *intr,
753                           nir_variable *vertex_count_var,
754                           nir_variable **varyings)
755{
756   /**
757    * tmp_varying[slot][min(vertex_count, 2)] = src
758    */
759   nir_ssa_def *vertex_count = nir_load_var(b, vertex_count_var);
760   nir_ssa_def *index = nir_imin(b, vertex_count, nir_imm_int(b, 2));
761   nir_variable *var = nir_intrinsic_get_var(intr, 0);
762
763   if (var->data.mode != nir_var_shader_out)
764      return;
765
766   nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, varyings[var->data.location]), index);
767   nir_ssa_def *value = nir_ssa_for_src(b, intr->src[1], intr->num_components);
768   nir_store_deref(b, deref, value, 0xf);
769   nir_instr_remove(&intr->instr);
770}
771
772static void
773lower_triangle_strip_emit_vertex(nir_builder *b, nir_intrinsic_instr *intr,
774                                 nir_variable *vertex_count_var,
775                                 nir_variable **varyings,
776                                 nir_variable **out_varyings)
777{
778   // TODO xfb + flat shading + last_pv
779   /**
780    * if (vertex_count >= 2) {
781    *    for (i = 0; i < 3; i++) {
782    *       foreach(slot)
783    *          out[slot] = tmp_varying[slot][i];
784    *       EmitVertex();
785    *    }
786    *    EndPrimitive();
787    *    foreach(slot)
788    *       tmp_varying[slot][vertex_count % 2] = tmp_varying[slot][2];
789    * }
790    * vertex_count++;
791    */
792
793   nir_ssa_def *two = nir_imm_int(b, 2);
794   nir_ssa_def *vertex_count = nir_load_var(b, vertex_count_var);
795   nir_ssa_def *count_cmp = nir_uge(b, vertex_count, two);
796   nir_if *count_check = nir_push_if(b, count_cmp);
797
798   for (int j = 0; j < 3; ++j) {
799      for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
800         if (!varyings[i])
801            continue;
802         nir_copy_deref(b, nir_build_deref_var(b, out_varyings[i]),
803                        nir_build_deref_array_imm(b, nir_build_deref_var(b, varyings[i]), j));
804      }
805      nir_emit_vertex(b, 0);
806   }
807
808   for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
809      if (!varyings[i])
810         continue;
811      nir_copy_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), nir_umod(b, vertex_count, two)),
812                        nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), two));
813   }
814
815   nir_end_primitive(b, .stream_id = 0);
816
817   nir_pop_if(b, count_check);
818
819   vertex_count = nir_iadd(b, vertex_count, nir_imm_int(b, 1));
820   nir_store_var(b, vertex_count_var, vertex_count, 0x1);
821
822   nir_instr_remove(&intr->instr);
823}
824
825static void
826lower_triangle_strip_end_primitive(nir_builder *b, nir_intrinsic_instr *intr,
827                                   nir_variable *vertex_count_var)
828{
829   /**
830    * vertex_count = 0;
831    */
832   nir_store_var(b, vertex_count_var, nir_imm_int(b, 0), 0x1);
833   nir_instr_remove(&intr->instr);
834}
835
836void
837d3d12_lower_triangle_strip(nir_shader *shader)
838{
839   nir_builder b;
840   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
841   nir_variable *tmp_vars[VARYING_SLOT_MAX] = {0};
842   nir_variable *out_vars[VARYING_SLOT_MAX] = {0};
843   nir_builder_init(&b, impl);
844
845   shader->info.gs.vertices_out = (shader->info.gs.vertices_out - 2) * 3;
846
847   nir_variable *vertex_count_var =
848      nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
849
850   nir_block *first = nir_start_block(impl);
851   b.cursor = nir_before_block(first);
852   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
853      const struct glsl_type *type = glsl_array_type(var->type, 3, 0);
854      tmp_vars[var->data.location] =  nir_local_variable_create(impl, type, "tmp_var");
855      out_vars[var->data.location] = var;
856   }
857   nir_store_var(&b, vertex_count_var, nir_imm_int(&b, 0), 1);
858
859   nir_foreach_block(block, impl) {
860      nir_foreach_instr_safe(instr, block) {
861         if (instr->type != nir_instr_type_intrinsic)
862            continue;
863
864         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
865         switch (intrin->intrinsic) {
866         case nir_intrinsic_store_deref:
867            b.cursor = nir_before_instr(instr);
868            lower_triangle_strip_store(&b, intrin, vertex_count_var, tmp_vars);
869            break;
870         case nir_intrinsic_emit_vertex_with_counter:
871         case nir_intrinsic_emit_vertex:
872            b.cursor = nir_before_instr(instr);
873            lower_triangle_strip_emit_vertex(&b, intrin, vertex_count_var,
874                                             tmp_vars, out_vars);
875            break;
876         case nir_intrinsic_end_primitive:
877         case nir_intrinsic_end_primitive_with_counter:
878            b.cursor = nir_before_instr(instr);
879            lower_triangle_strip_end_primitive(&b, intrin, vertex_count_var);
880            break;
881         default:
882            break;
883         }
884      }
885   }
886
887   nir_metadata_preserve(impl, nir_metadata_none);
888   NIR_PASS_V(shader, nir_lower_var_copies);
889}
890
891static bool
892is_sample_pos(const nir_instr *instr, const void *_data)
893{
894   if (instr->type != nir_instr_type_intrinsic)
895      return false;
896   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
897   return intr->intrinsic == nir_intrinsic_load_sample_pos;
898}
899
900static nir_ssa_def *
901lower_sample_pos(nir_builder *b, nir_instr *instr, void *_data)
902{
903   return nir_load_sample_pos_from_id(b, 32, nir_load_sample_id(b));
904}
905
906bool
907d3d12_lower_sample_pos(nir_shader *s)
908{
909   return nir_shader_lower_instructions(s, is_sample_pos, lower_sample_pos, NULL);
910}
911
912static bool
913is_multisampling_instr(const nir_instr *instr, const void *_data)
914{
915   if (instr->type != nir_instr_type_intrinsic)
916      return false;
917   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
918   if (intr->intrinsic == nir_intrinsic_store_output) {
919      nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
920      return semantics.location == FRAG_RESULT_SAMPLE_MASK;
921   } else if (intr->intrinsic == nir_intrinsic_store_deref) {
922      nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
923      return var->data.location == FRAG_RESULT_SAMPLE_MASK;
924   } else if (intr->intrinsic == nir_intrinsic_load_sample_id ||
925              intr->intrinsic == nir_intrinsic_load_sample_mask_in)
926      return true;
927   return false;
928}
929
930static nir_ssa_def *
931lower_multisampling_instr(nir_builder *b, nir_instr *instr, void *_data)
932{
933   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
934   switch (intr->intrinsic) {
935   case nir_intrinsic_store_output:
936   case nir_intrinsic_store_deref:
937      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
938   case nir_intrinsic_load_sample_id:
939      return nir_imm_int(b, 0);
940   case nir_intrinsic_load_sample_mask_in:
941      return nir_imm_int(b, 1);
942   default:
943      unreachable("Invalid intrinsic");
944   }
945}
946
947bool
948d3d12_disable_multisampling(nir_shader *s)
949{
950   if (s->info.stage != MESA_SHADER_FRAGMENT)
951      return false;
952   bool progress = nir_shader_lower_instructions(s, is_multisampling_instr, lower_multisampling_instr, NULL);
953
954   nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
955      if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
956         exec_node_remove(&var->node);
957         s->info.outputs_written &= ~(1ull << FRAG_RESULT_SAMPLE_MASK);
958         progress = true;
959      }
960   }
961   nir_foreach_variable_with_modes_safe(var, s, nir_var_system_value) {
962      if (var->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN ||
963          var->data.location == SYSTEM_VALUE_SAMPLE_ID) {
964         exec_node_remove(&var->node);
965         progress = true;
966      }
967      var->data.sample = false;
968   }
969   BITSET_CLEAR(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
970   return progress;
971}
972
973struct multistream_subvar_state {
974   nir_variable *var;
975   uint8_t stream;
976   uint8_t num_components;
977};
978struct multistream_var_state {
979   unsigned num_subvars;
980   struct multistream_subvar_state subvars[4];
981};
982struct multistream_state {
983   struct multistream_var_state vars[VARYING_SLOT_MAX];
984};
985
986static bool
987split_multistream_varying_stores(nir_builder *b, nir_instr *instr, void *_state)
988{
989   if (instr->type != nir_instr_type_intrinsic)
990      return false;
991   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
992   if (intr->intrinsic != nir_intrinsic_store_deref)
993      return false;
994
995   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
996   if (!nir_deref_mode_is(deref, nir_var_shader_out))
997      return false;
998
999   nir_variable *var = nir_deref_instr_get_variable(deref);
1000   assert(var);
1001
1002   struct multistream_state *state = _state;
1003   struct multistream_var_state *var_state = &state->vars[var->data.location];
1004   if (var_state->num_subvars <= 1)
1005      return false;
1006
1007   nir_deref_path path;
1008   nir_deref_path_init(&path, deref, b->shader);
1009   assert(path.path[0]->deref_type == nir_deref_type_var && path.path[0]->var == var);
1010
1011   unsigned first_channel = 0;
1012   for (unsigned subvar = 0; subvar < var_state->num_subvars; ++subvar) {
1013      b->cursor = nir_after_instr(&path.path[0]->instr);
1014      nir_deref_instr *new_path = nir_build_deref_var(b, var_state->subvars[subvar].var);
1015
1016      for (unsigned i = 1; path.path[i]; ++i) {
1017         b->cursor = nir_after_instr(&path.path[i]->instr);
1018         new_path = nir_build_deref_follower(b, new_path, path.path[i]);
1019      }
1020
1021      b->cursor = nir_before_instr(instr);
1022      unsigned mask_num_channels = (1 << var_state->subvars[subvar].num_components) - 1;
1023      unsigned orig_write_mask = nir_intrinsic_write_mask(intr);
1024      nir_ssa_def *sub_value = nir_channels(b, intr->src[1].ssa, mask_num_channels << first_channel);
1025
1026      first_channel += var_state->subvars[subvar].num_components;
1027
1028      unsigned new_write_mask = (orig_write_mask >> first_channel) & mask_num_channels;
1029      nir_build_store_deref(b, &new_path->dest.ssa, sub_value, new_write_mask, nir_intrinsic_access(intr));
1030   }
1031
1032   nir_deref_path_finish(&path);
1033   nir_instr_free_and_dce(instr);
1034   return true;
1035}
1036
1037bool
1038d3d12_split_multistream_varyings(nir_shader *s)
1039{
1040   if (s->info.stage != MESA_SHADER_GEOMETRY)
1041      return false;
1042
1043   struct multistream_state state;
1044   memset(&state, 0, sizeof(state));
1045
1046   bool progress = false;
1047   nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
1048      if ((var->data.stream & NIR_STREAM_PACKED) == 0)
1049         continue;
1050
1051      struct multistream_var_state *var_state = &state.vars[var->data.location];
1052      struct multistream_subvar_state *subvars = var_state->subvars;
1053      for (unsigned i = 0; i < glsl_get_vector_elements(var->type); ++i) {
1054         unsigned stream = (var->data.stream >> (2 * (i + var->data.location_frac))) & 0x3;
1055         if (var_state->num_subvars == 0 || stream != subvars[var_state->num_subvars - 1].stream) {
1056            subvars[var_state->num_subvars].stream = stream;
1057            subvars[var_state->num_subvars].num_components = 1;
1058            var_state->num_subvars++;
1059         } else {
1060            subvars[var_state->num_subvars - 1].num_components++;
1061         }
1062      }
1063
1064      var->data.stream = subvars[0].stream;
1065      if (var_state->num_subvars == 1)
1066         continue;
1067
1068      progress = true;
1069
1070      subvars[0].var = var;
1071      var->type = glsl_vector_type(glsl_get_base_type(var->type), subvars[0].num_components);
1072      unsigned location_frac = var->data.location_frac + subvars[0].num_components;
1073      for (unsigned subvar = 1; subvar < var_state->num_subvars; ++subvar) {
1074         char *name = ralloc_asprintf(s, "unpacked:%s_stream%d", var->name, subvars[subvar].stream);
1075         nir_variable *new_var = nir_variable_create(s, nir_var_shader_out,
1076            glsl_vector_type(glsl_get_base_type(var->type), subvars[subvar].num_components),
1077            name);
1078
1079         new_var->data = var->data;
1080         new_var->data.stream = subvars[subvar].stream;
1081         new_var->data.location_frac = location_frac;
1082         location_frac += subvars[subvar].num_components;
1083         subvars[subvar].var = new_var;
1084      }
1085   }
1086
1087   if (progress) {
1088      nir_shader_instructions_pass(s, split_multistream_varying_stores,
1089         nir_metadata_block_index | nir_metadata_dominance, &state);
1090   } else {
1091      nir_shader_preserve_all_metadata(s);
1092   }
1093
1094   return progress;
1095}
1096
1097static void
1098write_0(nir_builder *b, nir_deref_instr *deref)
1099{
1100   if (glsl_type_is_array_or_matrix(deref->type)) {
1101      for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
1102         write_0(b, nir_build_deref_array_imm(b, deref, i));
1103   } else if (glsl_type_is_struct(deref->type)) {
1104      for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
1105         write_0(b, nir_build_deref_struct(b, deref, i));
1106   } else {
1107      nir_ssa_def *scalar = nir_imm_intN_t(b, 0, glsl_get_bit_size(deref->type));
1108      nir_ssa_def *scalar_arr[NIR_MAX_VEC_COMPONENTS];
1109      unsigned num_comps = glsl_get_components(deref->type);
1110      unsigned writemask = (1 << num_comps) - 1;
1111      for (unsigned i = 0; i < num_comps; ++i)
1112         scalar_arr[i] = scalar;
1113      nir_ssa_def *zero_val = nir_vec(b, scalar_arr, num_comps);
1114      nir_store_deref(b, deref, zero_val, writemask);
1115   }
1116}
1117
1118void
1119d3d12_write_0_to_new_varying(nir_shader *s, nir_variable *var)
1120{
1121   /* Skip per-vertex HS outputs */
1122   if (s->info.stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
1123      return;
1124
1125   nir_foreach_function(func, s) {
1126      if (!func->impl)
1127         continue;
1128
1129      nir_builder b;
1130      nir_builder_init(&b, func->impl);
1131
1132      nir_foreach_block(block, func->impl) {
1133         b.cursor = nir_before_block(block);
1134         if (s->info.stage != MESA_SHADER_GEOMETRY) {
1135            write_0(&b, nir_build_deref_var(&b, var));
1136            break;
1137         }
1138
1139         nir_foreach_instr_safe(instr, block) {
1140            if (instr->type != nir_instr_type_intrinsic)
1141               continue;
1142            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1143            if (intr->intrinsic != nir_intrinsic_emit_vertex)
1144               continue;
1145
1146            b.cursor = nir_before_instr(instr);
1147            write_0(&b, nir_build_deref_var(&b, var));
1148         }
1149      }
1150
1151      nir_metadata_preserve(func->impl, nir_metadata_block_index | nir_metadata_dominance);
1152   }
1153}
1154