1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © Microsoft Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "d3d12_nir_passes.h"
25bf215546Sopenharmony_ci#include "d3d12_compiler.h"
26bf215546Sopenharmony_ci#include "nir_builder.h"
27bf215546Sopenharmony_ci#include "nir_builtin_builder.h"
28bf215546Sopenharmony_ci#include "nir_deref.h"
29bf215546Sopenharmony_ci#include "nir_format_convert.h"
30bf215546Sopenharmony_ci#include "program/prog_instruction.h"
31bf215546Sopenharmony_ci#include "dxil_nir.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci/**
34bf215546Sopenharmony_ci * Lower Y Flip:
35bf215546Sopenharmony_ci *
36bf215546Sopenharmony_ci * We can't do a Y flip simply by negating the viewport height,
37bf215546Sopenharmony_ci * so we need to lower the flip into the NIR shader.
38bf215546Sopenharmony_ci */
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cinir_ssa_def *
41bf215546Sopenharmony_cid3d12_get_state_var(nir_builder *b,
42bf215546Sopenharmony_ci                    enum d3d12_state_var var_enum,
43bf215546Sopenharmony_ci                    const char *var_name,
44bf215546Sopenharmony_ci                    const struct glsl_type *var_type,
45bf215546Sopenharmony_ci                    nir_variable **out_var)
46bf215546Sopenharmony_ci{
47bf215546Sopenharmony_ci   const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER, var_enum };
48bf215546Sopenharmony_ci   if (*out_var == NULL) {
49bf215546Sopenharmony_ci      nir_variable *var = nir_variable_create(b->shader,
50bf215546Sopenharmony_ci                                              nir_var_uniform,
51bf215546Sopenharmony_ci                                              var_type,
52bf215546Sopenharmony_ci                                              var_name);
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci      var->num_state_slots = 1;
55bf215546Sopenharmony_ci      var->state_slots = ralloc_array(var, nir_state_slot, 1);
56bf215546Sopenharmony_ci      memcpy(var->state_slots[0].tokens, tokens,
57bf215546Sopenharmony_ci             sizeof(var->state_slots[0].tokens));
58bf215546Sopenharmony_ci      var->data.how_declared = nir_var_hidden;
59bf215546Sopenharmony_ci      b->shader->num_uniforms++;
60bf215546Sopenharmony_ci      *out_var = var;
61bf215546Sopenharmony_ci   }
62bf215546Sopenharmony_ci   return nir_load_var(b, *out_var);
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_cistatic void
66bf215546Sopenharmony_cilower_pos_write(nir_builder *b, struct nir_instr *instr, nir_variable **flip)
67bf215546Sopenharmony_ci{
68bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
69bf215546Sopenharmony_ci      return;
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
72bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_store_deref)
73bf215546Sopenharmony_ci      return;
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   nir_variable *var = nir_intrinsic_get_var(intr, 0);
76bf215546Sopenharmony_ci   if (var->data.mode != nir_var_shader_out ||
77bf215546Sopenharmony_ci       var->data.location != VARYING_SLOT_POS)
78bf215546Sopenharmony_ci      return;
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci   nir_ssa_def *pos = nir_ssa_for_src(b, intr->src[1], 4);
83bf215546Sopenharmony_ci   nir_ssa_def *flip_y = d3d12_get_state_var(b, D3D12_STATE_VAR_Y_FLIP, "d3d12_FlipY",
84bf215546Sopenharmony_ci                                             glsl_float_type(), flip);
85bf215546Sopenharmony_ci   nir_ssa_def *def = nir_vec4(b,
86bf215546Sopenharmony_ci                               nir_channel(b, pos, 0),
87bf215546Sopenharmony_ci                               nir_fmul(b, nir_channel(b, pos, 1), flip_y),
88bf215546Sopenharmony_ci                               nir_channel(b, pos, 2),
89bf215546Sopenharmony_ci                               nir_channel(b, pos, 3));
90bf215546Sopenharmony_ci   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
91bf215546Sopenharmony_ci}
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_civoid
94bf215546Sopenharmony_cid3d12_lower_yflip(nir_shader *nir)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   nir_variable *flip = NULL;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   if (nir->info.stage != MESA_SHADER_VERTEX &&
99bf215546Sopenharmony_ci       nir->info.stage != MESA_SHADER_TESS_EVAL &&
100bf215546Sopenharmony_ci       nir->info.stage != MESA_SHADER_GEOMETRY)
101bf215546Sopenharmony_ci      return;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   nir_foreach_function(function, nir) {
104bf215546Sopenharmony_ci      if (function->impl) {
105bf215546Sopenharmony_ci         nir_builder b;
106bf215546Sopenharmony_ci         nir_builder_init(&b, function->impl);
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci         nir_foreach_block(block, function->impl) {
109bf215546Sopenharmony_ci            nir_foreach_instr_safe(instr, block) {
110bf215546Sopenharmony_ci               lower_pos_write(&b, instr, &flip);
111bf215546Sopenharmony_ci            }
112bf215546Sopenharmony_ci         }
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_block_index |
115bf215546Sopenharmony_ci                                               nir_metadata_dominance);
116bf215546Sopenharmony_ci      }
117bf215546Sopenharmony_ci   }
118bf215546Sopenharmony_ci}
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_cistatic void
121bf215546Sopenharmony_cilower_load_face(nir_builder *b, struct nir_instr *instr, nir_variable *var)
122bf215546Sopenharmony_ci{
123bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
124bf215546Sopenharmony_ci      return;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
127bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_front_face)
128bf215546Sopenharmony_ci      return;
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci   nir_ssa_def *load = nir_load_var(b, var);
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
135bf215546Sopenharmony_ci   nir_instr_remove(instr);
136bf215546Sopenharmony_ci}
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_civoid
139bf215546Sopenharmony_cid3d12_forward_front_face(nir_shader *nir)
140bf215546Sopenharmony_ci{
141bf215546Sopenharmony_ci   assert(nir->info.stage == MESA_SHADER_FRAGMENT);
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci   nir_variable *var = nir_variable_create(nir, nir_var_shader_in,
144bf215546Sopenharmony_ci                                           glsl_bool_type(),
145bf215546Sopenharmony_ci                                           "gl_FrontFacing");
146bf215546Sopenharmony_ci   var->data.location = VARYING_SLOT_VAR12;
147bf215546Sopenharmony_ci   var->data.interpolation = INTERP_MODE_FLAT;
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci   nir_foreach_function(function, nir) {
151bf215546Sopenharmony_ci      if (function->impl) {
152bf215546Sopenharmony_ci         nir_builder b;
153bf215546Sopenharmony_ci         nir_builder_init(&b, function->impl);
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci         nir_foreach_block(block, function->impl) {
156bf215546Sopenharmony_ci            nir_foreach_instr_safe(instr, block) {
157bf215546Sopenharmony_ci               lower_load_face(&b, instr, var);
158bf215546Sopenharmony_ci            }
159bf215546Sopenharmony_ci         }
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_block_index |
162bf215546Sopenharmony_ci                                               nir_metadata_dominance);
163bf215546Sopenharmony_ci      }
164bf215546Sopenharmony_ci   }
165bf215546Sopenharmony_ci}
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_cistatic void
168bf215546Sopenharmony_cilower_pos_read(nir_builder *b, struct nir_instr *instr,
169bf215546Sopenharmony_ci               nir_variable **depth_transform_var)
170bf215546Sopenharmony_ci{
171bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
172bf215546Sopenharmony_ci      return;
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
175bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_deref)
176bf215546Sopenharmony_ci      return;
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   nir_variable *var = nir_intrinsic_get_var(intr, 0);
179bf215546Sopenharmony_ci   if (var->data.mode != nir_var_shader_in ||
180bf215546Sopenharmony_ci       var->data.location != VARYING_SLOT_POS)
181bf215546Sopenharmony_ci      return;
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci   b->cursor = nir_after_instr(instr);
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   nir_ssa_def *pos = nir_instr_ssa_def(instr);
186bf215546Sopenharmony_ci   nir_ssa_def *depth = nir_channel(b, pos, 2);
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci   assert(depth_transform_var);
189bf215546Sopenharmony_ci   nir_ssa_def *depth_transform = d3d12_get_state_var(b, D3D12_STATE_VAR_DEPTH_TRANSFORM,
190bf215546Sopenharmony_ci                                                      "d3d12_DepthTransform",
191bf215546Sopenharmony_ci                                                      glsl_vec_type(2),
192bf215546Sopenharmony_ci                                                      depth_transform_var);
193bf215546Sopenharmony_ci   depth = nir_fmad(b, depth, nir_channel(b, depth_transform, 0),
194bf215546Sopenharmony_ci                              nir_channel(b, depth_transform, 1));
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   pos = nir_vector_insert_imm(b, pos, depth, 2);
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   assert(intr->dest.is_ssa);
199bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, pos,
200bf215546Sopenharmony_ci                                  pos->parent_instr);
201bf215546Sopenharmony_ci}
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_civoid
204bf215546Sopenharmony_cid3d12_lower_depth_range(nir_shader *nir)
205bf215546Sopenharmony_ci{
206bf215546Sopenharmony_ci   assert(nir->info.stage == MESA_SHADER_FRAGMENT);
207bf215546Sopenharmony_ci   nir_variable *depth_transform = NULL;
208bf215546Sopenharmony_ci   nir_foreach_function(function, nir) {
209bf215546Sopenharmony_ci      if (function->impl) {
210bf215546Sopenharmony_ci         nir_builder b;
211bf215546Sopenharmony_ci         nir_builder_init(&b, function->impl);
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci         nir_foreach_block(block, function->impl) {
214bf215546Sopenharmony_ci            nir_foreach_instr_safe(instr, block) {
215bf215546Sopenharmony_ci               lower_pos_read(&b, instr, &depth_transform);
216bf215546Sopenharmony_ci            }
217bf215546Sopenharmony_ci         }
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_block_index |
220bf215546Sopenharmony_ci                                               nir_metadata_dominance);
221bf215546Sopenharmony_ci      }
222bf215546Sopenharmony_ci   }
223bf215546Sopenharmony_ci}
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_cistruct compute_state_vars {
226bf215546Sopenharmony_ci   nir_variable *num_workgroups;
227bf215546Sopenharmony_ci};
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_cistatic bool
230bf215546Sopenharmony_cilower_compute_state_vars(nir_builder *b, nir_instr *instr, void *_state)
231bf215546Sopenharmony_ci{
232bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
233bf215546Sopenharmony_ci      return false;
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci   b->cursor = nir_after_instr(instr);
236bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
237bf215546Sopenharmony_ci   struct compute_state_vars *vars = _state;
238bf215546Sopenharmony_ci   nir_ssa_def *result = NULL;
239bf215546Sopenharmony_ci   switch (intr->intrinsic) {
240bf215546Sopenharmony_ci   case nir_intrinsic_load_num_workgroups:
241bf215546Sopenharmony_ci      result = d3d12_get_state_var(b, D3D12_STATE_VAR_NUM_WORKGROUPS, "d3d12_NumWorkgroups",
242bf215546Sopenharmony_ci         glsl_vec_type(3), &vars->num_workgroups);
243bf215546Sopenharmony_ci      break;
244bf215546Sopenharmony_ci   default:
245bf215546Sopenharmony_ci      return false;
246bf215546Sopenharmony_ci   }
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
249bf215546Sopenharmony_ci   nir_instr_remove(instr);
250bf215546Sopenharmony_ci   return true;
251bf215546Sopenharmony_ci}
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_cibool
254bf215546Sopenharmony_cid3d12_lower_compute_state_vars(nir_shader *nir)
255bf215546Sopenharmony_ci{
256bf215546Sopenharmony_ci   assert(nir->info.stage == MESA_SHADER_COMPUTE);
257bf215546Sopenharmony_ci   struct compute_state_vars vars = { 0 };
258bf215546Sopenharmony_ci   return nir_shader_instructions_pass(nir, lower_compute_state_vars,
259bf215546Sopenharmony_ci      nir_metadata_block_index | nir_metadata_dominance, &vars);
260bf215546Sopenharmony_ci}
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_cistatic bool
263bf215546Sopenharmony_ciis_color_output(nir_variable *var)
264bf215546Sopenharmony_ci{
265bf215546Sopenharmony_ci   return (var->data.mode == nir_var_shader_out &&
266bf215546Sopenharmony_ci           (var->data.location == FRAG_RESULT_COLOR ||
267bf215546Sopenharmony_ci            var->data.location >= FRAG_RESULT_DATA0));
268bf215546Sopenharmony_ci}
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_cistatic void
271bf215546Sopenharmony_cilower_uint_color_write(nir_builder *b, struct nir_instr *instr, bool is_signed)
272bf215546Sopenharmony_ci{
273bf215546Sopenharmony_ci   const unsigned NUM_BITS = 8;
274bf215546Sopenharmony_ci   const unsigned bits[4] = { NUM_BITS, NUM_BITS, NUM_BITS, NUM_BITS };
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
277bf215546Sopenharmony_ci      return;
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
280bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_store_deref)
281bf215546Sopenharmony_ci      return;
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci   nir_variable *var = nir_intrinsic_get_var(intr, 0);
284bf215546Sopenharmony_ci   if (!is_color_output(var))
285bf215546Sopenharmony_ci      return;
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   nir_ssa_def *col = nir_ssa_for_src(b, intr->src[1], intr->num_components);
290bf215546Sopenharmony_ci   nir_ssa_def *def = is_signed ? nir_format_float_to_snorm(b, col, bits) :
291bf215546Sopenharmony_ci                                  nir_format_float_to_unorm(b, col, bits);
292bf215546Sopenharmony_ci   if (is_signed)
293bf215546Sopenharmony_ci      def = nir_bcsel(b, nir_ilt(b, def, nir_imm_int(b, 0)),
294bf215546Sopenharmony_ci                      nir_iadd(b, def, nir_imm_int(b, 1 << NUM_BITS)),
295bf215546Sopenharmony_ci                      def);
296bf215546Sopenharmony_ci   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
297bf215546Sopenharmony_ci}
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_civoid
300bf215546Sopenharmony_cid3d12_lower_uint_cast(nir_shader *nir, bool is_signed)
301bf215546Sopenharmony_ci{
302bf215546Sopenharmony_ci   if (nir->info.stage != MESA_SHADER_FRAGMENT)
303bf215546Sopenharmony_ci      return;
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci   nir_foreach_function(function, nir) {
306bf215546Sopenharmony_ci      if (function->impl) {
307bf215546Sopenharmony_ci         nir_builder b;
308bf215546Sopenharmony_ci         nir_builder_init(&b, function->impl);
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci         nir_foreach_block(block, function->impl) {
311bf215546Sopenharmony_ci            nir_foreach_instr_safe(instr, block) {
312bf215546Sopenharmony_ci               lower_uint_color_write(&b, instr, is_signed);
313bf215546Sopenharmony_ci            }
314bf215546Sopenharmony_ci         }
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_block_index |
317bf215546Sopenharmony_ci                                               nir_metadata_dominance);
318bf215546Sopenharmony_ci      }
319bf215546Sopenharmony_ci   }
320bf215546Sopenharmony_ci}
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_cistatic bool
323bf215546Sopenharmony_cilower_load_draw_params(nir_builder *b, nir_instr *instr, void *draw_params)
324bf215546Sopenharmony_ci{
325bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
326bf215546Sopenharmony_ci      return false;
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_first_vertex &&
331bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_load_base_instance &&
332bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_load_draw_id &&
333bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_load_is_indexed_draw)
334bf215546Sopenharmony_ci      return false;
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci   nir_ssa_def *load = d3d12_get_state_var(b, D3D12_STATE_VAR_DRAW_PARAMS, "d3d12_DrawParams",
339bf215546Sopenharmony_ci                                           glsl_uvec4_type(), draw_params);
340bf215546Sopenharmony_ci   unsigned channel = intr->intrinsic == nir_intrinsic_load_first_vertex ? 0 :
341bf215546Sopenharmony_ci      intr->intrinsic == nir_intrinsic_load_base_instance ? 1 :
342bf215546Sopenharmony_ci      intr->intrinsic == nir_intrinsic_load_draw_id ? 2 : 3;
343bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_channel(b, load, channel));
344bf215546Sopenharmony_ci   nir_instr_remove(instr);
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   return true;
347bf215546Sopenharmony_ci}
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_cibool
350bf215546Sopenharmony_cid3d12_lower_load_draw_params(struct nir_shader *nir)
351bf215546Sopenharmony_ci{
352bf215546Sopenharmony_ci   nir_variable *draw_params = NULL;
353bf215546Sopenharmony_ci   if (nir->info.stage != MESA_SHADER_VERTEX)
354bf215546Sopenharmony_ci      return false;
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   return nir_shader_instructions_pass(nir, lower_load_draw_params,
357bf215546Sopenharmony_ci      nir_metadata_block_index | nir_metadata_dominance, &draw_params);
358bf215546Sopenharmony_ci}
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_cistatic bool
361bf215546Sopenharmony_cilower_load_patch_vertices_in(nir_builder *b, nir_instr *instr, void *_state)
362bf215546Sopenharmony_ci{
363bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
364bf215546Sopenharmony_ci      return false;
365bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
366bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_patch_vertices_in)
367bf215546Sopenharmony_ci      return false;
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
370bf215546Sopenharmony_ci   nir_ssa_def *load = b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
371bf215546Sopenharmony_ci      d3d12_get_state_var(b, D3D12_STATE_VAR_PATCH_VERTICES_IN, "d3d12_FirstVertex", glsl_uint_type(), _state) :
372bf215546Sopenharmony_ci      nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
373bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
374bf215546Sopenharmony_ci   nir_instr_remove(instr);
375bf215546Sopenharmony_ci   return true;
376bf215546Sopenharmony_ci}
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_cibool
379bf215546Sopenharmony_cid3d12_lower_load_patch_vertices_in(struct nir_shader *nir)
380bf215546Sopenharmony_ci{
381bf215546Sopenharmony_ci   nir_variable *var = NULL;
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   if (nir->info.stage != MESA_SHADER_TESS_CTRL &&
384bf215546Sopenharmony_ci       nir->info.stage != MESA_SHADER_TESS_EVAL)
385bf215546Sopenharmony_ci      return false;
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci   return nir_shader_instructions_pass(nir, lower_load_patch_vertices_in,
388bf215546Sopenharmony_ci      nir_metadata_block_index | nir_metadata_dominance, &var);
389bf215546Sopenharmony_ci}
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_cistruct invert_depth_state
392bf215546Sopenharmony_ci{
393bf215546Sopenharmony_ci   unsigned viewport_mask;
394bf215546Sopenharmony_ci   bool clip_halfz;
395bf215546Sopenharmony_ci   nir_ssa_def *viewport_index;
396bf215546Sopenharmony_ci   nir_instr *store_pos_instr;
397bf215546Sopenharmony_ci};
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_cistatic void
400bf215546Sopenharmony_ciinvert_depth_impl(nir_builder *b, struct invert_depth_state *state)
401bf215546Sopenharmony_ci{
402bf215546Sopenharmony_ci   assert(state->store_pos_instr);
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(state->store_pos_instr);
405bf215546Sopenharmony_ci   if (state->viewport_index) {
406bf215546Sopenharmony_ci      /* Cursor is assigned before calling. Make sure that storing pos comes
407bf215546Sopenharmony_ci       * after computing the viewport.
408bf215546Sopenharmony_ci       */
409bf215546Sopenharmony_ci      nir_instr_move(b->cursor, &intr->instr);
410bf215546Sopenharmony_ci   }
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   nir_ssa_def *pos = nir_ssa_for_src(b, intr->src[1], 4);
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci   if (state->viewport_index) {
417bf215546Sopenharmony_ci      nir_push_if(b, nir_test_mask(b, nir_ishl(b, nir_imm_int(b, 1), state->viewport_index), state->viewport_mask));
418bf215546Sopenharmony_ci   }
419bf215546Sopenharmony_ci   nir_ssa_def *old_depth = nir_channel(b, pos, 2);
420bf215546Sopenharmony_ci   nir_ssa_def *new_depth = nir_fneg(b, old_depth);
421bf215546Sopenharmony_ci   if (state->clip_halfz)
422bf215546Sopenharmony_ci      new_depth = nir_fadd_imm(b, new_depth, 1.0);
423bf215546Sopenharmony_ci   nir_ssa_def *def = nir_vec4(b,
424bf215546Sopenharmony_ci                               nir_channel(b, pos, 0),
425bf215546Sopenharmony_ci                               nir_channel(b, pos, 1),
426bf215546Sopenharmony_ci                               new_depth,
427bf215546Sopenharmony_ci                               nir_channel(b, pos, 3));
428bf215546Sopenharmony_ci   if (state->viewport_index) {
429bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
430bf215546Sopenharmony_ci      def = nir_if_phi(b, def, pos);
431bf215546Sopenharmony_ci   }
432bf215546Sopenharmony_ci   nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def));
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_ci   state->viewport_index = NULL;
435bf215546Sopenharmony_ci   state->store_pos_instr = NULL;
436bf215546Sopenharmony_ci}
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_cistatic void
439bf215546Sopenharmony_ciinvert_depth_instr(nir_builder *b, struct nir_instr *instr, struct invert_depth_state *state)
440bf215546Sopenharmony_ci{
441bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
442bf215546Sopenharmony_ci      return;
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
445bf215546Sopenharmony_ci   if (intr->intrinsic == nir_intrinsic_store_deref) {
446bf215546Sopenharmony_ci      nir_variable *var = nir_intrinsic_get_var(intr, 0);
447bf215546Sopenharmony_ci      if (var->data.mode != nir_var_shader_out)
448bf215546Sopenharmony_ci         return;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci      if (var->data.location == VARYING_SLOT_VIEWPORT)
451bf215546Sopenharmony_ci         state->viewport_index = intr->src[1].ssa;
452bf215546Sopenharmony_ci      if (var->data.location == VARYING_SLOT_POS)
453bf215546Sopenharmony_ci         state->store_pos_instr = instr;
454bf215546Sopenharmony_ci   } else if (intr->intrinsic == nir_intrinsic_emit_vertex) {
455bf215546Sopenharmony_ci      b->cursor = nir_before_instr(instr);
456bf215546Sopenharmony_ci      invert_depth_impl(b, state);
457bf215546Sopenharmony_ci   }
458bf215546Sopenharmony_ci}
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci/* In OpenGL the windows space depth value z_w is evaluated according to "s * z_d + b"
461bf215546Sopenharmony_ci * with  "s = (far - near) / 2" (depth clip:minus_one_to_one) [OpenGL 3.3, 2.13.1].
462bf215546Sopenharmony_ci * When we switch the far and near value to satisfy DirectX requirements we have
463bf215546Sopenharmony_ci * to compensate by inverting "z_d' = -z_d" with this lowering pass.
464bf215546Sopenharmony_ci * When depth clip is set zero_to_one, we compensate with "z_d' = 1.0f - z_d" instead.
465bf215546Sopenharmony_ci */
466bf215546Sopenharmony_civoid
467bf215546Sopenharmony_cid3d12_nir_invert_depth(nir_shader *shader, unsigned viewport_mask, bool clip_halfz)
468bf215546Sopenharmony_ci{
469bf215546Sopenharmony_ci   if (shader->info.stage != MESA_SHADER_VERTEX &&
470bf215546Sopenharmony_ci       shader->info.stage != MESA_SHADER_TESS_EVAL &&
471bf215546Sopenharmony_ci       shader->info.stage != MESA_SHADER_GEOMETRY)
472bf215546Sopenharmony_ci      return;
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci   struct invert_depth_state state = { viewport_mask, clip_halfz };
475bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
476bf215546Sopenharmony_ci      if (function->impl) {
477bf215546Sopenharmony_ci         nir_builder b;
478bf215546Sopenharmony_ci         nir_builder_init(&b, function->impl);
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci         nir_foreach_block(block, function->impl) {
481bf215546Sopenharmony_ci            nir_foreach_instr_safe(instr, block) {
482bf215546Sopenharmony_ci               invert_depth_instr(&b, instr, &state);
483bf215546Sopenharmony_ci            }
484bf215546Sopenharmony_ci         }
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci         if (state.store_pos_instr) {
487bf215546Sopenharmony_ci            b.cursor = nir_after_block(function->impl->end_block);
488bf215546Sopenharmony_ci            invert_depth_impl(&b, &state);
489bf215546Sopenharmony_ci         }
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_block_index |
492bf215546Sopenharmony_ci                                               nir_metadata_dominance);
493bf215546Sopenharmony_ci      }
494bf215546Sopenharmony_ci   }
495bf215546Sopenharmony_ci}
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci/**
499bf215546Sopenharmony_ci * Lower State Vars:
500bf215546Sopenharmony_ci *
501bf215546Sopenharmony_ci * All uniforms related to internal D3D12 variables are
502bf215546Sopenharmony_ci * condensed into a UBO that is appended at the end of the
503bf215546Sopenharmony_ci * current ones.
504bf215546Sopenharmony_ci */
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_cistatic unsigned
507bf215546Sopenharmony_ciget_state_var_offset(struct d3d12_shader *shader, enum d3d12_state_var var)
508bf215546Sopenharmony_ci{
509bf215546Sopenharmony_ci   for (unsigned i = 0; i < shader->num_state_vars; ++i) {
510bf215546Sopenharmony_ci      if (shader->state_vars[i].var == var)
511bf215546Sopenharmony_ci         return shader->state_vars[i].offset;
512bf215546Sopenharmony_ci   }
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci   unsigned offset = shader->state_vars_size;
515bf215546Sopenharmony_ci   shader->state_vars[shader->num_state_vars].offset = offset;
516bf215546Sopenharmony_ci   shader->state_vars[shader->num_state_vars].var = var;
517bf215546Sopenharmony_ci   shader->state_vars_size += 4; /* Use 4-words slots no matter the variable size */
518bf215546Sopenharmony_ci   shader->num_state_vars++;
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci   return offset;
521bf215546Sopenharmony_ci}
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_cistatic bool
524bf215546Sopenharmony_cilower_instr(nir_intrinsic_instr *instr, nir_builder *b,
525bf215546Sopenharmony_ci            struct d3d12_shader *shader, unsigned binding)
526bf215546Sopenharmony_ci{
527bf215546Sopenharmony_ci   nir_variable *variable = NULL;
528bf215546Sopenharmony_ci   nir_deref_instr *deref = NULL;
529bf215546Sopenharmony_ci
530bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&instr->instr);
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   if (instr->intrinsic == nir_intrinsic_load_uniform) {
533bf215546Sopenharmony_ci      nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
534bf215546Sopenharmony_ci         if (var->data.driver_location == nir_intrinsic_base(instr)) {
535bf215546Sopenharmony_ci            variable = var;
536bf215546Sopenharmony_ci            break;
537bf215546Sopenharmony_ci         }
538bf215546Sopenharmony_ci      }
539bf215546Sopenharmony_ci   } else if (instr->intrinsic == nir_intrinsic_load_deref) {
540bf215546Sopenharmony_ci      deref = nir_src_as_deref(instr->src[0]);
541bf215546Sopenharmony_ci      variable = nir_intrinsic_get_var(instr, 0);
542bf215546Sopenharmony_ci   }
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci   if (variable == NULL ||
545bf215546Sopenharmony_ci       variable->num_state_slots != 1 ||
546bf215546Sopenharmony_ci       variable->state_slots[0].tokens[0] != STATE_INTERNAL_DRIVER)
547bf215546Sopenharmony_ci      return false;
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci   enum d3d12_state_var var = variable->state_slots[0].tokens[1];
550bf215546Sopenharmony_ci   nir_ssa_def *ubo_idx = nir_imm_int(b, binding);
551bf215546Sopenharmony_ci   nir_ssa_def *ubo_offset =  nir_imm_int(b, get_state_var_offset(shader, var) * 4);
552bf215546Sopenharmony_ci   nir_ssa_def *load =
553bf215546Sopenharmony_ci      nir_load_ubo(b, instr->num_components, instr->dest.ssa.bit_size,
554bf215546Sopenharmony_ci                   ubo_idx, ubo_offset,
555bf215546Sopenharmony_ci                   .align_mul = instr->dest.ssa.bit_size / 8,
556bf215546Sopenharmony_ci                   .align_offset = 0,
557bf215546Sopenharmony_ci                   .range_base = 0,
558bf215546Sopenharmony_ci                   .range = ~0,
559bf215546Sopenharmony_ci                   );
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&instr->dest.ssa, load);
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   /* Remove the old load_* instruction and any parent derefs */
564bf215546Sopenharmony_ci   nir_instr_remove(&instr->instr);
565bf215546Sopenharmony_ci   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
566bf215546Sopenharmony_ci      /* If anyone is using this deref, leave it alone */
567bf215546Sopenharmony_ci      assert(d->dest.is_ssa);
568bf215546Sopenharmony_ci      if (!list_is_empty(&d->dest.ssa.uses))
569bf215546Sopenharmony_ci         break;
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci      nir_instr_remove(&d->instr);
572bf215546Sopenharmony_ci   }
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci   return true;
575bf215546Sopenharmony_ci}
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_cibool
578bf215546Sopenharmony_cid3d12_lower_state_vars(nir_shader *nir, struct d3d12_shader *shader)
579bf215546Sopenharmony_ci{
580bf215546Sopenharmony_ci   bool progress = false;
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci   /* The state var UBO is added after all the other UBOs if it already
583bf215546Sopenharmony_ci    * exists it will be replaced by using the same binding.
584bf215546Sopenharmony_ci    * In the event there are no other UBO's, use binding slot 1 to
585bf215546Sopenharmony_ci    * be consistent with other non-default UBO's */
586bf215546Sopenharmony_ci   unsigned binding = MAX2(nir->info.num_ubos, 1);
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
589bf215546Sopenharmony_ci      if (var->num_state_slots == 1 &&
590bf215546Sopenharmony_ci          var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
591bf215546Sopenharmony_ci         if (var->data.mode == nir_var_mem_ubo) {
592bf215546Sopenharmony_ci            binding = var->data.binding;
593bf215546Sopenharmony_ci         }
594bf215546Sopenharmony_ci      }
595bf215546Sopenharmony_ci   }
596bf215546Sopenharmony_ci
597bf215546Sopenharmony_ci   nir_foreach_function(function, nir) {
598bf215546Sopenharmony_ci      if (function->impl) {
599bf215546Sopenharmony_ci         nir_builder builder;
600bf215546Sopenharmony_ci         nir_builder_init(&builder, function->impl);
601bf215546Sopenharmony_ci         nir_foreach_block(block, function->impl) {
602bf215546Sopenharmony_ci            nir_foreach_instr_safe(instr, block) {
603bf215546Sopenharmony_ci               if (instr->type == nir_instr_type_intrinsic)
604bf215546Sopenharmony_ci                  progress |= lower_instr(nir_instr_as_intrinsic(instr),
605bf215546Sopenharmony_ci                                          &builder,
606bf215546Sopenharmony_ci                                          shader,
607bf215546Sopenharmony_ci                                          binding);
608bf215546Sopenharmony_ci            }
609bf215546Sopenharmony_ci         }
610bf215546Sopenharmony_ci
611bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_block_index |
612bf215546Sopenharmony_ci                                               nir_metadata_dominance);
613bf215546Sopenharmony_ci      }
614bf215546Sopenharmony_ci   }
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   if (progress) {
617bf215546Sopenharmony_ci      assert(shader->num_state_vars > 0);
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci      shader->state_vars_used = true;
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci      /* Remove state variables */
622bf215546Sopenharmony_ci      nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
623bf215546Sopenharmony_ci         if (var->num_state_slots == 1 &&
624bf215546Sopenharmony_ci             var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
625bf215546Sopenharmony_ci            exec_node_remove(&var->node);
626bf215546Sopenharmony_ci            nir->num_uniforms--;
627bf215546Sopenharmony_ci         }
628bf215546Sopenharmony_ci      }
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci      const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER };
631bf215546Sopenharmony_ci      const struct glsl_type *type = glsl_array_type(glsl_vec4_type(),
632bf215546Sopenharmony_ci                                                     shader->state_vars_size / 4, 0);
633bf215546Sopenharmony_ci      nir_variable *ubo = nir_variable_create(nir, nir_var_mem_ubo, type,
634bf215546Sopenharmony_ci                                                  "d3d12_state_vars");
635bf215546Sopenharmony_ci      if (binding >= nir->info.num_ubos)
636bf215546Sopenharmony_ci         nir->info.num_ubos = binding + 1;
637bf215546Sopenharmony_ci      ubo->data.binding = binding;
638bf215546Sopenharmony_ci      ubo->num_state_slots = 1;
639bf215546Sopenharmony_ci      ubo->state_slots = ralloc_array(ubo, nir_state_slot, 1);
640bf215546Sopenharmony_ci      memcpy(ubo->state_slots[0].tokens, tokens,
641bf215546Sopenharmony_ci              sizeof(ubo->state_slots[0].tokens));
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci      struct glsl_struct_field field = {
644bf215546Sopenharmony_ci          .type = type,
645bf215546Sopenharmony_ci          .name = "data",
646bf215546Sopenharmony_ci          .location = -1,
647bf215546Sopenharmony_ci      };
648bf215546Sopenharmony_ci      ubo->interface_type =
649bf215546Sopenharmony_ci              glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
650bf215546Sopenharmony_ci                                  false, "__d3d12_state_vars_interface");
651bf215546Sopenharmony_ci   }
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci   return progress;
654bf215546Sopenharmony_ci}
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_civoid
657bf215546Sopenharmony_cid3d12_add_missing_dual_src_target(struct nir_shader *s,
658bf215546Sopenharmony_ci                                  unsigned missing_mask)
659bf215546Sopenharmony_ci{
660bf215546Sopenharmony_ci   assert(missing_mask != 0);
661bf215546Sopenharmony_ci   nir_builder b;
662bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(s);
663bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
664bf215546Sopenharmony_ci   b.cursor = nir_before_cf_list(&impl->body);
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_ci   nir_ssa_def *zero = nir_imm_zero(&b, 4, 32);
667bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; ++i) {
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ci      if (!(missing_mask & (1u << i)))
670bf215546Sopenharmony_ci         continue;
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci      const char *name = i == 0 ? "gl_FragData[0]" :
673bf215546Sopenharmony_ci                                  "gl_SecondaryFragDataEXT[0]";
674bf215546Sopenharmony_ci      nir_variable *out = nir_variable_create(s, nir_var_shader_out,
675bf215546Sopenharmony_ci                                              glsl_vec4_type(), name);
676bf215546Sopenharmony_ci      out->data.location = FRAG_RESULT_DATA0;
677bf215546Sopenharmony_ci      out->data.driver_location = i;
678bf215546Sopenharmony_ci      out->data.index = i;
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci      nir_store_var(&b, out, zero, 0xf);
681bf215546Sopenharmony_ci   }
682bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_block_index |
683bf215546Sopenharmony_ci                               nir_metadata_dominance);
684bf215546Sopenharmony_ci}
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_cistatic bool
687bf215546Sopenharmony_cilower_load_ubo_packed_filter(const nir_instr *instr,
688bf215546Sopenharmony_ci                             UNUSED const void *_options) {
689bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
690bf215546Sopenharmony_ci      return false;
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_ci   return intr->intrinsic == nir_intrinsic_load_ubo;
695bf215546Sopenharmony_ci}
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_cistatic nir_ssa_def *
698bf215546Sopenharmony_cilower_load_ubo_packed_impl(nir_builder *b, nir_instr *instr,
699bf215546Sopenharmony_ci                              UNUSED void *_options) {
700bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci   nir_ssa_def *buffer = intr->src[0].ssa;
703bf215546Sopenharmony_ci   nir_ssa_def *offset = intr->src[1].ssa;
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci   nir_ssa_def *result =
706bf215546Sopenharmony_ci      build_load_ubo_dxil(b, buffer,
707bf215546Sopenharmony_ci                          offset,
708bf215546Sopenharmony_ci                          nir_dest_num_components(intr->dest),
709bf215546Sopenharmony_ci                          nir_dest_bit_size(intr->dest));
710bf215546Sopenharmony_ci   return result;
711bf215546Sopenharmony_ci}
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_cibool
714bf215546Sopenharmony_cinir_lower_packed_ubo_loads(nir_shader *nir) {
715bf215546Sopenharmony_ci   return nir_shader_lower_instructions(nir,
716bf215546Sopenharmony_ci                                        lower_load_ubo_packed_filter,
717bf215546Sopenharmony_ci                                        lower_load_ubo_packed_impl,
718bf215546Sopenharmony_ci                                        NULL);
719bf215546Sopenharmony_ci}
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_civoid
722bf215546Sopenharmony_cid3d12_lower_primitive_id(nir_shader *shader)
723bf215546Sopenharmony_ci{
724bf215546Sopenharmony_ci   nir_builder b;
725bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
726bf215546Sopenharmony_ci   nir_ssa_def *primitive_id;
727bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
728bf215546Sopenharmony_ci
729bf215546Sopenharmony_ci   nir_variable *primitive_id_var = nir_variable_create(shader, nir_var_shader_out,
730bf215546Sopenharmony_ci                                                        glsl_uint_type(), "primitive_id");
731bf215546Sopenharmony_ci   primitive_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
732bf215546Sopenharmony_ci   primitive_id_var->data.interpolation = INTERP_MODE_FLAT;
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
735bf215546Sopenharmony_ci      b.cursor = nir_before_block(block);
736bf215546Sopenharmony_ci      primitive_id = nir_load_primitive_id(&b);
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci      nir_foreach_instr_safe(instr, block) {
739bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic ||
740bf215546Sopenharmony_ci             nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_emit_vertex)
741bf215546Sopenharmony_ci            continue;
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_ci         b.cursor = nir_before_instr(instr);
744bf215546Sopenharmony_ci         nir_store_var(&b, primitive_id_var, primitive_id, 0x1);
745bf215546Sopenharmony_ci      }
746bf215546Sopenharmony_ci   }
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_none);
749bf215546Sopenharmony_ci}
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_cistatic void
752bf215546Sopenharmony_cilower_triangle_strip_store(nir_builder *b, nir_intrinsic_instr *intr,
753bf215546Sopenharmony_ci                           nir_variable *vertex_count_var,
754bf215546Sopenharmony_ci                           nir_variable **varyings)
755bf215546Sopenharmony_ci{
756bf215546Sopenharmony_ci   /**
757bf215546Sopenharmony_ci    * tmp_varying[slot][min(vertex_count, 2)] = src
758bf215546Sopenharmony_ci    */
759bf215546Sopenharmony_ci   nir_ssa_def *vertex_count = nir_load_var(b, vertex_count_var);
760bf215546Sopenharmony_ci   nir_ssa_def *index = nir_imin(b, vertex_count, nir_imm_int(b, 2));
761bf215546Sopenharmony_ci   nir_variable *var = nir_intrinsic_get_var(intr, 0);
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci   if (var->data.mode != nir_var_shader_out)
764bf215546Sopenharmony_ci      return;
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, varyings[var->data.location]), index);
767bf215546Sopenharmony_ci   nir_ssa_def *value = nir_ssa_for_src(b, intr->src[1], intr->num_components);
768bf215546Sopenharmony_ci   nir_store_deref(b, deref, value, 0xf);
769bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
770bf215546Sopenharmony_ci}
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_cistatic void
773bf215546Sopenharmony_cilower_triangle_strip_emit_vertex(nir_builder *b, nir_intrinsic_instr *intr,
774bf215546Sopenharmony_ci                                 nir_variable *vertex_count_var,
775bf215546Sopenharmony_ci                                 nir_variable **varyings,
776bf215546Sopenharmony_ci                                 nir_variable **out_varyings)
777bf215546Sopenharmony_ci{
778bf215546Sopenharmony_ci   // TODO xfb + flat shading + last_pv
779bf215546Sopenharmony_ci   /**
780bf215546Sopenharmony_ci    * if (vertex_count >= 2) {
781bf215546Sopenharmony_ci    *    for (i = 0; i < 3; i++) {
782bf215546Sopenharmony_ci    *       foreach(slot)
783bf215546Sopenharmony_ci    *          out[slot] = tmp_varying[slot][i];
784bf215546Sopenharmony_ci    *       EmitVertex();
785bf215546Sopenharmony_ci    *    }
786bf215546Sopenharmony_ci    *    EndPrimitive();
787bf215546Sopenharmony_ci    *    foreach(slot)
788bf215546Sopenharmony_ci    *       tmp_varying[slot][vertex_count % 2] = tmp_varying[slot][2];
789bf215546Sopenharmony_ci    * }
790bf215546Sopenharmony_ci    * vertex_count++;
791bf215546Sopenharmony_ci    */
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci   nir_ssa_def *two = nir_imm_int(b, 2);
794bf215546Sopenharmony_ci   nir_ssa_def *vertex_count = nir_load_var(b, vertex_count_var);
795bf215546Sopenharmony_ci   nir_ssa_def *count_cmp = nir_uge(b, vertex_count, two);
796bf215546Sopenharmony_ci   nir_if *count_check = nir_push_if(b, count_cmp);
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci   for (int j = 0; j < 3; ++j) {
799bf215546Sopenharmony_ci      for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
800bf215546Sopenharmony_ci         if (!varyings[i])
801bf215546Sopenharmony_ci            continue;
802bf215546Sopenharmony_ci         nir_copy_deref(b, nir_build_deref_var(b, out_varyings[i]),
803bf215546Sopenharmony_ci                        nir_build_deref_array_imm(b, nir_build_deref_var(b, varyings[i]), j));
804bf215546Sopenharmony_ci      }
805bf215546Sopenharmony_ci      nir_emit_vertex(b, 0);
806bf215546Sopenharmony_ci   }
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci   for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
809bf215546Sopenharmony_ci      if (!varyings[i])
810bf215546Sopenharmony_ci         continue;
811bf215546Sopenharmony_ci      nir_copy_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), nir_umod(b, vertex_count, two)),
812bf215546Sopenharmony_ci                        nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), two));
813bf215546Sopenharmony_ci   }
814bf215546Sopenharmony_ci
815bf215546Sopenharmony_ci   nir_end_primitive(b, .stream_id = 0);
816bf215546Sopenharmony_ci
817bf215546Sopenharmony_ci   nir_pop_if(b, count_check);
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci   vertex_count = nir_iadd(b, vertex_count, nir_imm_int(b, 1));
820bf215546Sopenharmony_ci   nir_store_var(b, vertex_count_var, vertex_count, 0x1);
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
823bf215546Sopenharmony_ci}
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_cistatic void
826bf215546Sopenharmony_cilower_triangle_strip_end_primitive(nir_builder *b, nir_intrinsic_instr *intr,
827bf215546Sopenharmony_ci                                   nir_variable *vertex_count_var)
828bf215546Sopenharmony_ci{
829bf215546Sopenharmony_ci   /**
830bf215546Sopenharmony_ci    * vertex_count = 0;
831bf215546Sopenharmony_ci    */
832bf215546Sopenharmony_ci   nir_store_var(b, vertex_count_var, nir_imm_int(b, 0), 0x1);
833bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
834bf215546Sopenharmony_ci}
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_civoid
837bf215546Sopenharmony_cid3d12_lower_triangle_strip(nir_shader *shader)
838bf215546Sopenharmony_ci{
839bf215546Sopenharmony_ci   nir_builder b;
840bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
841bf215546Sopenharmony_ci   nir_variable *tmp_vars[VARYING_SLOT_MAX] = {0};
842bf215546Sopenharmony_ci   nir_variable *out_vars[VARYING_SLOT_MAX] = {0};
843bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_ci   shader->info.gs.vertices_out = (shader->info.gs.vertices_out - 2) * 3;
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci   nir_variable *vertex_count_var =
848bf215546Sopenharmony_ci      nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_ci   nir_block *first = nir_start_block(impl);
851bf215546Sopenharmony_ci   b.cursor = nir_before_block(first);
852bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
853bf215546Sopenharmony_ci      const struct glsl_type *type = glsl_array_type(var->type, 3, 0);
854bf215546Sopenharmony_ci      tmp_vars[var->data.location] =  nir_local_variable_create(impl, type, "tmp_var");
855bf215546Sopenharmony_ci      out_vars[var->data.location] = var;
856bf215546Sopenharmony_ci   }
857bf215546Sopenharmony_ci   nir_store_var(&b, vertex_count_var, nir_imm_int(&b, 0), 1);
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
860bf215546Sopenharmony_ci      nir_foreach_instr_safe(instr, block) {
861bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
862bf215546Sopenharmony_ci            continue;
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
865bf215546Sopenharmony_ci         switch (intrin->intrinsic) {
866bf215546Sopenharmony_ci         case nir_intrinsic_store_deref:
867bf215546Sopenharmony_ci            b.cursor = nir_before_instr(instr);
868bf215546Sopenharmony_ci            lower_triangle_strip_store(&b, intrin, vertex_count_var, tmp_vars);
869bf215546Sopenharmony_ci            break;
870bf215546Sopenharmony_ci         case nir_intrinsic_emit_vertex_with_counter:
871bf215546Sopenharmony_ci         case nir_intrinsic_emit_vertex:
872bf215546Sopenharmony_ci            b.cursor = nir_before_instr(instr);
873bf215546Sopenharmony_ci            lower_triangle_strip_emit_vertex(&b, intrin, vertex_count_var,
874bf215546Sopenharmony_ci                                             tmp_vars, out_vars);
875bf215546Sopenharmony_ci            break;
876bf215546Sopenharmony_ci         case nir_intrinsic_end_primitive:
877bf215546Sopenharmony_ci         case nir_intrinsic_end_primitive_with_counter:
878bf215546Sopenharmony_ci            b.cursor = nir_before_instr(instr);
879bf215546Sopenharmony_ci            lower_triangle_strip_end_primitive(&b, intrin, vertex_count_var);
880bf215546Sopenharmony_ci            break;
881bf215546Sopenharmony_ci         default:
882bf215546Sopenharmony_ci            break;
883bf215546Sopenharmony_ci         }
884bf215546Sopenharmony_ci      }
885bf215546Sopenharmony_ci   }
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_none);
888bf215546Sopenharmony_ci   NIR_PASS_V(shader, nir_lower_var_copies);
889bf215546Sopenharmony_ci}
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_cistatic bool
892bf215546Sopenharmony_ciis_sample_pos(const nir_instr *instr, const void *_data)
893bf215546Sopenharmony_ci{
894bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
895bf215546Sopenharmony_ci      return false;
896bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
897bf215546Sopenharmony_ci   return intr->intrinsic == nir_intrinsic_load_sample_pos;
898bf215546Sopenharmony_ci}
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_cistatic nir_ssa_def *
901bf215546Sopenharmony_cilower_sample_pos(nir_builder *b, nir_instr *instr, void *_data)
902bf215546Sopenharmony_ci{
903bf215546Sopenharmony_ci   return nir_load_sample_pos_from_id(b, 32, nir_load_sample_id(b));
904bf215546Sopenharmony_ci}
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_cibool
907bf215546Sopenharmony_cid3d12_lower_sample_pos(nir_shader *s)
908bf215546Sopenharmony_ci{
909bf215546Sopenharmony_ci   return nir_shader_lower_instructions(s, is_sample_pos, lower_sample_pos, NULL);
910bf215546Sopenharmony_ci}
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_cistatic bool
913bf215546Sopenharmony_ciis_multisampling_instr(const nir_instr *instr, const void *_data)
914bf215546Sopenharmony_ci{
915bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
916bf215546Sopenharmony_ci      return false;
917bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
918bf215546Sopenharmony_ci   if (intr->intrinsic == nir_intrinsic_store_output) {
919bf215546Sopenharmony_ci      nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
920bf215546Sopenharmony_ci      return semantics.location == FRAG_RESULT_SAMPLE_MASK;
921bf215546Sopenharmony_ci   } else if (intr->intrinsic == nir_intrinsic_store_deref) {
922bf215546Sopenharmony_ci      nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
923bf215546Sopenharmony_ci      return var->data.location == FRAG_RESULT_SAMPLE_MASK;
924bf215546Sopenharmony_ci   } else if (intr->intrinsic == nir_intrinsic_load_sample_id ||
925bf215546Sopenharmony_ci              intr->intrinsic == nir_intrinsic_load_sample_mask_in)
926bf215546Sopenharmony_ci      return true;
927bf215546Sopenharmony_ci   return false;
928bf215546Sopenharmony_ci}
929bf215546Sopenharmony_ci
930bf215546Sopenharmony_cistatic nir_ssa_def *
931bf215546Sopenharmony_cilower_multisampling_instr(nir_builder *b, nir_instr *instr, void *_data)
932bf215546Sopenharmony_ci{
933bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
934bf215546Sopenharmony_ci   switch (intr->intrinsic) {
935bf215546Sopenharmony_ci   case nir_intrinsic_store_output:
936bf215546Sopenharmony_ci   case nir_intrinsic_store_deref:
937bf215546Sopenharmony_ci      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
938bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_id:
939bf215546Sopenharmony_ci      return nir_imm_int(b, 0);
940bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_mask_in:
941bf215546Sopenharmony_ci      return nir_imm_int(b, 1);
942bf215546Sopenharmony_ci   default:
943bf215546Sopenharmony_ci      unreachable("Invalid intrinsic");
944bf215546Sopenharmony_ci   }
945bf215546Sopenharmony_ci}
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_cibool
948bf215546Sopenharmony_cid3d12_disable_multisampling(nir_shader *s)
949bf215546Sopenharmony_ci{
950bf215546Sopenharmony_ci   if (s->info.stage != MESA_SHADER_FRAGMENT)
951bf215546Sopenharmony_ci      return false;
952bf215546Sopenharmony_ci   bool progress = nir_shader_lower_instructions(s, is_multisampling_instr, lower_multisampling_instr, NULL);
953bf215546Sopenharmony_ci
954bf215546Sopenharmony_ci   nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
955bf215546Sopenharmony_ci      if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
956bf215546Sopenharmony_ci         exec_node_remove(&var->node);
957bf215546Sopenharmony_ci         s->info.outputs_written &= ~(1ull << FRAG_RESULT_SAMPLE_MASK);
958bf215546Sopenharmony_ci         progress = true;
959bf215546Sopenharmony_ci      }
960bf215546Sopenharmony_ci   }
961bf215546Sopenharmony_ci   nir_foreach_variable_with_modes_safe(var, s, nir_var_system_value) {
962bf215546Sopenharmony_ci      if (var->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN ||
963bf215546Sopenharmony_ci          var->data.location == SYSTEM_VALUE_SAMPLE_ID) {
964bf215546Sopenharmony_ci         exec_node_remove(&var->node);
965bf215546Sopenharmony_ci         progress = true;
966bf215546Sopenharmony_ci      }
967bf215546Sopenharmony_ci      var->data.sample = false;
968bf215546Sopenharmony_ci   }
969bf215546Sopenharmony_ci   BITSET_CLEAR(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
970bf215546Sopenharmony_ci   return progress;
971bf215546Sopenharmony_ci}
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_cistruct multistream_subvar_state {
974bf215546Sopenharmony_ci   nir_variable *var;
975bf215546Sopenharmony_ci   uint8_t stream;
976bf215546Sopenharmony_ci   uint8_t num_components;
977bf215546Sopenharmony_ci};
978bf215546Sopenharmony_cistruct multistream_var_state {
979bf215546Sopenharmony_ci   unsigned num_subvars;
980bf215546Sopenharmony_ci   struct multistream_subvar_state subvars[4];
981bf215546Sopenharmony_ci};
982bf215546Sopenharmony_cistruct multistream_state {
983bf215546Sopenharmony_ci   struct multistream_var_state vars[VARYING_SLOT_MAX];
984bf215546Sopenharmony_ci};
985bf215546Sopenharmony_ci
986bf215546Sopenharmony_cistatic bool
987bf215546Sopenharmony_cisplit_multistream_varying_stores(nir_builder *b, nir_instr *instr, void *_state)
988bf215546Sopenharmony_ci{
989bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
990bf215546Sopenharmony_ci      return false;
991bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
992bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_store_deref)
993bf215546Sopenharmony_ci      return false;
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
996bf215546Sopenharmony_ci   if (!nir_deref_mode_is(deref, nir_var_shader_out))
997bf215546Sopenharmony_ci      return false;
998bf215546Sopenharmony_ci
999bf215546Sopenharmony_ci   nir_variable *var = nir_deref_instr_get_variable(deref);
1000bf215546Sopenharmony_ci   assert(var);
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_ci   struct multistream_state *state = _state;
1003bf215546Sopenharmony_ci   struct multistream_var_state *var_state = &state->vars[var->data.location];
1004bf215546Sopenharmony_ci   if (var_state->num_subvars <= 1)
1005bf215546Sopenharmony_ci      return false;
1006bf215546Sopenharmony_ci
1007bf215546Sopenharmony_ci   nir_deref_path path;
1008bf215546Sopenharmony_ci   nir_deref_path_init(&path, deref, b->shader);
1009bf215546Sopenharmony_ci   assert(path.path[0]->deref_type == nir_deref_type_var && path.path[0]->var == var);
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_ci   unsigned first_channel = 0;
1012bf215546Sopenharmony_ci   for (unsigned subvar = 0; subvar < var_state->num_subvars; ++subvar) {
1013bf215546Sopenharmony_ci      b->cursor = nir_after_instr(&path.path[0]->instr);
1014bf215546Sopenharmony_ci      nir_deref_instr *new_path = nir_build_deref_var(b, var_state->subvars[subvar].var);
1015bf215546Sopenharmony_ci
1016bf215546Sopenharmony_ci      for (unsigned i = 1; path.path[i]; ++i) {
1017bf215546Sopenharmony_ci         b->cursor = nir_after_instr(&path.path[i]->instr);
1018bf215546Sopenharmony_ci         new_path = nir_build_deref_follower(b, new_path, path.path[i]);
1019bf215546Sopenharmony_ci      }
1020bf215546Sopenharmony_ci
1021bf215546Sopenharmony_ci      b->cursor = nir_before_instr(instr);
1022bf215546Sopenharmony_ci      unsigned mask_num_channels = (1 << var_state->subvars[subvar].num_components) - 1;
1023bf215546Sopenharmony_ci      unsigned orig_write_mask = nir_intrinsic_write_mask(intr);
1024bf215546Sopenharmony_ci      nir_ssa_def *sub_value = nir_channels(b, intr->src[1].ssa, mask_num_channels << first_channel);
1025bf215546Sopenharmony_ci
1026bf215546Sopenharmony_ci      first_channel += var_state->subvars[subvar].num_components;
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci      unsigned new_write_mask = (orig_write_mask >> first_channel) & mask_num_channels;
1029bf215546Sopenharmony_ci      nir_build_store_deref(b, &new_path->dest.ssa, sub_value, new_write_mask, nir_intrinsic_access(intr));
1030bf215546Sopenharmony_ci   }
1031bf215546Sopenharmony_ci
1032bf215546Sopenharmony_ci   nir_deref_path_finish(&path);
1033bf215546Sopenharmony_ci   nir_instr_free_and_dce(instr);
1034bf215546Sopenharmony_ci   return true;
1035bf215546Sopenharmony_ci}
1036bf215546Sopenharmony_ci
1037bf215546Sopenharmony_cibool
1038bf215546Sopenharmony_cid3d12_split_multistream_varyings(nir_shader *s)
1039bf215546Sopenharmony_ci{
1040bf215546Sopenharmony_ci   if (s->info.stage != MESA_SHADER_GEOMETRY)
1041bf215546Sopenharmony_ci      return false;
1042bf215546Sopenharmony_ci
1043bf215546Sopenharmony_ci   struct multistream_state state;
1044bf215546Sopenharmony_ci   memset(&state, 0, sizeof(state));
1045bf215546Sopenharmony_ci
1046bf215546Sopenharmony_ci   bool progress = false;
1047bf215546Sopenharmony_ci   nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
1048bf215546Sopenharmony_ci      if ((var->data.stream & NIR_STREAM_PACKED) == 0)
1049bf215546Sopenharmony_ci         continue;
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci      struct multistream_var_state *var_state = &state.vars[var->data.location];
1052bf215546Sopenharmony_ci      struct multistream_subvar_state *subvars = var_state->subvars;
1053bf215546Sopenharmony_ci      for (unsigned i = 0; i < glsl_get_vector_elements(var->type); ++i) {
1054bf215546Sopenharmony_ci         unsigned stream = (var->data.stream >> (2 * (i + var->data.location_frac))) & 0x3;
1055bf215546Sopenharmony_ci         if (var_state->num_subvars == 0 || stream != subvars[var_state->num_subvars - 1].stream) {
1056bf215546Sopenharmony_ci            subvars[var_state->num_subvars].stream = stream;
1057bf215546Sopenharmony_ci            subvars[var_state->num_subvars].num_components = 1;
1058bf215546Sopenharmony_ci            var_state->num_subvars++;
1059bf215546Sopenharmony_ci         } else {
1060bf215546Sopenharmony_ci            subvars[var_state->num_subvars - 1].num_components++;
1061bf215546Sopenharmony_ci         }
1062bf215546Sopenharmony_ci      }
1063bf215546Sopenharmony_ci
1064bf215546Sopenharmony_ci      var->data.stream = subvars[0].stream;
1065bf215546Sopenharmony_ci      if (var_state->num_subvars == 1)
1066bf215546Sopenharmony_ci         continue;
1067bf215546Sopenharmony_ci
1068bf215546Sopenharmony_ci      progress = true;
1069bf215546Sopenharmony_ci
1070bf215546Sopenharmony_ci      subvars[0].var = var;
1071bf215546Sopenharmony_ci      var->type = glsl_vector_type(glsl_get_base_type(var->type), subvars[0].num_components);
1072bf215546Sopenharmony_ci      unsigned location_frac = var->data.location_frac + subvars[0].num_components;
1073bf215546Sopenharmony_ci      for (unsigned subvar = 1; subvar < var_state->num_subvars; ++subvar) {
1074bf215546Sopenharmony_ci         char *name = ralloc_asprintf(s, "unpacked:%s_stream%d", var->name, subvars[subvar].stream);
1075bf215546Sopenharmony_ci         nir_variable *new_var = nir_variable_create(s, nir_var_shader_out,
1076bf215546Sopenharmony_ci            glsl_vector_type(glsl_get_base_type(var->type), subvars[subvar].num_components),
1077bf215546Sopenharmony_ci            name);
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci         new_var->data = var->data;
1080bf215546Sopenharmony_ci         new_var->data.stream = subvars[subvar].stream;
1081bf215546Sopenharmony_ci         new_var->data.location_frac = location_frac;
1082bf215546Sopenharmony_ci         location_frac += subvars[subvar].num_components;
1083bf215546Sopenharmony_ci         subvars[subvar].var = new_var;
1084bf215546Sopenharmony_ci      }
1085bf215546Sopenharmony_ci   }
1086bf215546Sopenharmony_ci
1087bf215546Sopenharmony_ci   if (progress) {
1088bf215546Sopenharmony_ci      nir_shader_instructions_pass(s, split_multistream_varying_stores,
1089bf215546Sopenharmony_ci         nir_metadata_block_index | nir_metadata_dominance, &state);
1090bf215546Sopenharmony_ci   } else {
1091bf215546Sopenharmony_ci      nir_shader_preserve_all_metadata(s);
1092bf215546Sopenharmony_ci   }
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci   return progress;
1095bf215546Sopenharmony_ci}
1096bf215546Sopenharmony_ci
1097bf215546Sopenharmony_cistatic void
1098bf215546Sopenharmony_ciwrite_0(nir_builder *b, nir_deref_instr *deref)
1099bf215546Sopenharmony_ci{
1100bf215546Sopenharmony_ci   if (glsl_type_is_array_or_matrix(deref->type)) {
1101bf215546Sopenharmony_ci      for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
1102bf215546Sopenharmony_ci         write_0(b, nir_build_deref_array_imm(b, deref, i));
1103bf215546Sopenharmony_ci   } else if (glsl_type_is_struct(deref->type)) {
1104bf215546Sopenharmony_ci      for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
1105bf215546Sopenharmony_ci         write_0(b, nir_build_deref_struct(b, deref, i));
1106bf215546Sopenharmony_ci   } else {
1107bf215546Sopenharmony_ci      nir_ssa_def *scalar = nir_imm_intN_t(b, 0, glsl_get_bit_size(deref->type));
1108bf215546Sopenharmony_ci      nir_ssa_def *scalar_arr[NIR_MAX_VEC_COMPONENTS];
1109bf215546Sopenharmony_ci      unsigned num_comps = glsl_get_components(deref->type);
1110bf215546Sopenharmony_ci      unsigned writemask = (1 << num_comps) - 1;
1111bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_comps; ++i)
1112bf215546Sopenharmony_ci         scalar_arr[i] = scalar;
1113bf215546Sopenharmony_ci      nir_ssa_def *zero_val = nir_vec(b, scalar_arr, num_comps);
1114bf215546Sopenharmony_ci      nir_store_deref(b, deref, zero_val, writemask);
1115bf215546Sopenharmony_ci   }
1116bf215546Sopenharmony_ci}
1117bf215546Sopenharmony_ci
1118bf215546Sopenharmony_civoid
1119bf215546Sopenharmony_cid3d12_write_0_to_new_varying(nir_shader *s, nir_variable *var)
1120bf215546Sopenharmony_ci{
1121bf215546Sopenharmony_ci   /* Skip per-vertex HS outputs */
1122bf215546Sopenharmony_ci   if (s->info.stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
1123bf215546Sopenharmony_ci      return;
1124bf215546Sopenharmony_ci
1125bf215546Sopenharmony_ci   nir_foreach_function(func, s) {
1126bf215546Sopenharmony_ci      if (!func->impl)
1127bf215546Sopenharmony_ci         continue;
1128bf215546Sopenharmony_ci
1129bf215546Sopenharmony_ci      nir_builder b;
1130bf215546Sopenharmony_ci      nir_builder_init(&b, func->impl);
1131bf215546Sopenharmony_ci
1132bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
1133bf215546Sopenharmony_ci         b.cursor = nir_before_block(block);
1134bf215546Sopenharmony_ci         if (s->info.stage != MESA_SHADER_GEOMETRY) {
1135bf215546Sopenharmony_ci            write_0(&b, nir_build_deref_var(&b, var));
1136bf215546Sopenharmony_ci            break;
1137bf215546Sopenharmony_ci         }
1138bf215546Sopenharmony_ci
1139bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
1140bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_intrinsic)
1141bf215546Sopenharmony_ci               continue;
1142bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1143bf215546Sopenharmony_ci            if (intr->intrinsic != nir_intrinsic_emit_vertex)
1144bf215546Sopenharmony_ci               continue;
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci            b.cursor = nir_before_instr(instr);
1147bf215546Sopenharmony_ci            write_0(&b, nir_build_deref_var(&b, var));
1148bf215546Sopenharmony_ci         }
1149bf215546Sopenharmony_ci      }
1150bf215546Sopenharmony_ci
1151bf215546Sopenharmony_ci      nir_metadata_preserve(func->impl, nir_metadata_block_index | nir_metadata_dominance);
1152bf215546Sopenharmony_ci   }
1153bf215546Sopenharmony_ci}
1154