1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © Microsoft Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "d3d12_compiler.h"
25bf215546Sopenharmony_ci#include "d3d12_context.h"
26bf215546Sopenharmony_ci#include "d3d12_debug.h"
27bf215546Sopenharmony_ci#include "d3d12_screen.h"
28bf215546Sopenharmony_ci#include "d3d12_nir_passes.h"
29bf215546Sopenharmony_ci#include "nir_to_dxil.h"
30bf215546Sopenharmony_ci#include "dxil_nir.h"
31bf215546Sopenharmony_ci#include "dxil_nir_lower_int_cubemaps.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#include "pipe/p_state.h"
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci#include "nir.h"
36bf215546Sopenharmony_ci#include "nir/nir_draw_helpers.h"
37bf215546Sopenharmony_ci#include "nir/tgsi_to_nir.h"
38bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h"
39bf215546Sopenharmony_ci#include "tgsi/tgsi_from_mesa.h"
40bf215546Sopenharmony_ci#include "tgsi/tgsi_ureg.h"
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci#include "util/hash_table.h"
43bf215546Sopenharmony_ci#include "util/u_memory.h"
44bf215546Sopenharmony_ci#include "util/u_prim.h"
45bf215546Sopenharmony_ci#include "util/u_simple_shaders.h"
46bf215546Sopenharmony_ci#include "util/u_dl.h"
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci#include <dxguids/dxguids.h>
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ciextern "C" {
51bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h"
52bf215546Sopenharmony_ci#include "tgsi/tgsi_point_sprite.h"
53bf215546Sopenharmony_ci}
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci#ifdef _WIN32
56bf215546Sopenharmony_ci#include "dxil_validator.h"
57bf215546Sopenharmony_ci#endif
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ciconst void *
60bf215546Sopenharmony_cid3d12_get_compiler_options(struct pipe_screen *screen,
61bf215546Sopenharmony_ci                           enum pipe_shader_ir ir,
62bf215546Sopenharmony_ci                           enum pipe_shader_type shader)
63bf215546Sopenharmony_ci{
64bf215546Sopenharmony_ci   assert(ir == PIPE_SHADER_IR_NIR);
65bf215546Sopenharmony_ci   return &d3d12_screen(screen)->nir_options;
66bf215546Sopenharmony_ci}
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_cistatic uint32_t
69bf215546Sopenharmony_ciresource_dimension(enum glsl_sampler_dim dim)
70bf215546Sopenharmony_ci{
71bf215546Sopenharmony_ci   switch (dim) {
72bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_1D:
73bf215546Sopenharmony_ci      return RESOURCE_DIMENSION_TEXTURE1D;
74bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_2D:
75bf215546Sopenharmony_ci      return RESOURCE_DIMENSION_TEXTURE2D;
76bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_3D:
77bf215546Sopenharmony_ci      return RESOURCE_DIMENSION_TEXTURE3D;
78bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_CUBE:
79bf215546Sopenharmony_ci      return RESOURCE_DIMENSION_TEXTURECUBE;
80bf215546Sopenharmony_ci   default:
81bf215546Sopenharmony_ci      return RESOURCE_DIMENSION_UNKNOWN;
82bf215546Sopenharmony_ci   }
83bf215546Sopenharmony_ci}
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_cistatic bool
86bf215546Sopenharmony_cican_remove_dead_sampler(nir_variable *var, void *data)
87bf215546Sopenharmony_ci{
88bf215546Sopenharmony_ci   const struct glsl_type *base_type = glsl_without_array(var->type);
89bf215546Sopenharmony_ci   return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
90bf215546Sopenharmony_ci}
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_cistatic struct d3d12_shader *
93bf215546Sopenharmony_cicompile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
94bf215546Sopenharmony_ci            struct d3d12_shader_key *key, struct nir_shader *nir)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
97bf215546Sopenharmony_ci   struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
98bf215546Sopenharmony_ci   shader->key = *key;
99bf215546Sopenharmony_ci   shader->nir = nir;
100bf215546Sopenharmony_ci   sel->current = shader;
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_samplers);
103bf215546Sopenharmony_ci   NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_dce);
106bf215546Sopenharmony_ci   struct nir_remove_dead_variables_options dead_var_opts = {};
107bf215546Sopenharmony_ci   dead_var_opts.can_remove_var = can_remove_dead_sampler;
108bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci   if (key->samples_int_textures)
111bf215546Sopenharmony_ci      NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
112bf215546Sopenharmony_ci                 key->tex_wrap_states, key->swizzle_state,
113bf215546Sopenharmony_ci                 screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   if (key->vs.needs_format_emulation)
116bf215546Sopenharmony_ci      dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
119bf215546Sopenharmony_ci   uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
120bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
121bf215546Sopenharmony_ci   shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
122bf215546Sopenharmony_ci                              nir->info.num_ubos > num_ubos_before_lower_to_ubo;
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci   if (key->last_vertex_processing_stage) {
125bf215546Sopenharmony_ci      if (key->invert_depth)
126bf215546Sopenharmony_ci         NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz);
127bf215546Sopenharmony_ci      if (!key->halfz)
128bf215546Sopenharmony_ci         NIR_PASS_V(nir, nir_lower_clip_halfz);
129bf215546Sopenharmony_ci      NIR_PASS_V(nir, d3d12_lower_yflip);
130bf215546Sopenharmony_ci   }
131bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_packed_ubo_loads);
132bf215546Sopenharmony_ci   NIR_PASS_V(nir, d3d12_lower_load_draw_params);
133bf215546Sopenharmony_ci   NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
134bf215546Sopenharmony_ci   NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
135bf215546Sopenharmony_ci   NIR_PASS_V(nir, dxil_nir_lower_bool_input);
136bf215546Sopenharmony_ci   NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil);
137bf215546Sopenharmony_ci   NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil);
138bf215546Sopenharmony_ci   NIR_PASS_V(nir, dxil_nir_lower_double_math);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   if (key->fs.multisample_disabled)
141bf215546Sopenharmony_ci      NIR_PASS_V(nir, d3d12_disable_multisampling);
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci   struct nir_to_dxil_options opts = {};
144bf215546Sopenharmony_ci   opts.interpolate_at_vertex = screen->have_load_at_vertex;
145bf215546Sopenharmony_ci   opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
146bf215546Sopenharmony_ci   opts.no_ubo0 = !shader->has_default_ubo0;
147bf215546Sopenharmony_ci   opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
148bf215546Sopenharmony_ci   opts.provoking_vertex = key->fs.provoking_vertex;
149bf215546Sopenharmony_ci   opts.input_clip_size = key->input_clip_size;
150bf215546Sopenharmony_ci   opts.environment = DXIL_ENVIRONMENT_GL;
151bf215546Sopenharmony_ci   opts.shader_model_max = SHADER_MODEL_6_2;
152bf215546Sopenharmony_ci#ifdef _WIN32
153bf215546Sopenharmony_ci   opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator);
154bf215546Sopenharmony_ci#endif
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci   struct blob tmp;
157bf215546Sopenharmony_ci   if (!nir_to_dxil(nir, &opts, &tmp)) {
158bf215546Sopenharmony_ci      debug_printf("D3D12: nir_to_dxil failed\n");
159bf215546Sopenharmony_ci      return NULL;
160bf215546Sopenharmony_ci   }
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   // Non-ubo variables
163bf215546Sopenharmony_ci   shader->begin_srv_binding = (UINT_MAX);
164bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
165bf215546Sopenharmony_ci      auto type_no_array = glsl_without_array(var->type);
166bf215546Sopenharmony_ci      if (glsl_type_is_texture(type_no_array)) {
167bf215546Sopenharmony_ci         unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
168bf215546Sopenharmony_ci         for (unsigned i = 0; i < count; ++i) {
169bf215546Sopenharmony_ci            shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
170bf215546Sopenharmony_ci         }
171bf215546Sopenharmony_ci         shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
172bf215546Sopenharmony_ci         shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
173bf215546Sopenharmony_ci      }
174bf215546Sopenharmony_ci   }
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci   nir_foreach_image_variable(var, nir) {
177bf215546Sopenharmony_ci      auto type_no_array = glsl_without_array(var->type);
178bf215546Sopenharmony_ci      unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
179bf215546Sopenharmony_ci      for (unsigned i = 0; i < count; ++i) {
180bf215546Sopenharmony_ci         shader->uav_bindings[var->data.driver_location + i].format = var->data.image.format;
181bf215546Sopenharmony_ci         shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
182bf215546Sopenharmony_ci      }
183bf215546Sopenharmony_ci   }
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   // Ubo variables
186bf215546Sopenharmony_ci   if(nir->info.num_ubos) {
187bf215546Sopenharmony_ci      // Ignore state_vars ubo as it is bound as root constants
188bf215546Sopenharmony_ci      unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
189bf215546Sopenharmony_ci      for(unsigned i = shader->has_default_ubo0 ? 0 : 1; i < num_ubo_bindings; ++i) {
190bf215546Sopenharmony_ci         shader->cb_bindings[shader->num_cb_bindings++].binding = i;
191bf215546Sopenharmony_ci      }
192bf215546Sopenharmony_ci   }
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci#ifdef _WIN32
195bf215546Sopenharmony_ci   if (ctx->dxil_validator) {
196bf215546Sopenharmony_ci      if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
197bf215546Sopenharmony_ci         char *err;
198bf215546Sopenharmony_ci         if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
199bf215546Sopenharmony_ci                                   tmp.size, &err) && err) {
200bf215546Sopenharmony_ci            debug_printf(
201bf215546Sopenharmony_ci               "== VALIDATION ERROR =============================================\n"
202bf215546Sopenharmony_ci               "%s\n"
203bf215546Sopenharmony_ci               "== END ==========================================================\n",
204bf215546Sopenharmony_ci               err);
205bf215546Sopenharmony_ci            ralloc_free(err);
206bf215546Sopenharmony_ci         }
207bf215546Sopenharmony_ci      }
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci      if (d3d12_debug & D3D12_DEBUG_DISASS) {
210bf215546Sopenharmony_ci         char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
211bf215546Sopenharmony_ci                                        tmp.size);
212bf215546Sopenharmony_ci         fprintf(stderr,
213bf215546Sopenharmony_ci                 "== BEGIN SHADER ============================================\n"
214bf215546Sopenharmony_ci                 "%s\n"
215bf215546Sopenharmony_ci                 "== END SHADER ==============================================\n",
216bf215546Sopenharmony_ci               str);
217bf215546Sopenharmony_ci         ralloc_free(str);
218bf215546Sopenharmony_ci      }
219bf215546Sopenharmony_ci   }
220bf215546Sopenharmony_ci#endif
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   if (d3d12_debug & D3D12_DEBUG_DXIL) {
225bf215546Sopenharmony_ci      char buf[256];
226bf215546Sopenharmony_ci      static int i;
227bf215546Sopenharmony_ci      snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
228bf215546Sopenharmony_ci      FILE *fp = fopen(buf, "wb");
229bf215546Sopenharmony_ci      fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
230bf215546Sopenharmony_ci      fclose(fp);
231bf215546Sopenharmony_ci      fprintf(stderr, "wrote '%s'...\n", buf);
232bf215546Sopenharmony_ci   }
233bf215546Sopenharmony_ci   return shader;
234bf215546Sopenharmony_ci}
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_cistruct d3d12_selection_context {
237bf215546Sopenharmony_ci   struct d3d12_context *ctx;
238bf215546Sopenharmony_ci   bool needs_point_sprite_lowering;
239bf215546Sopenharmony_ci   bool needs_vertex_reordering;
240bf215546Sopenharmony_ci   unsigned provoking_vertex;
241bf215546Sopenharmony_ci   bool alternate_tri;
242bf215546Sopenharmony_ci   unsigned fill_mode_lowered;
243bf215546Sopenharmony_ci   unsigned cull_mode_lowered;
244bf215546Sopenharmony_ci   bool manual_depth_range;
245bf215546Sopenharmony_ci   unsigned missing_dual_src_outputs;
246bf215546Sopenharmony_ci   unsigned frag_result_color_lowering;
247bf215546Sopenharmony_ci   const unsigned *variable_workgroup_size;
248bf215546Sopenharmony_ci};
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_cistatic unsigned
251bf215546Sopenharmony_cimissing_dual_src_outputs(struct d3d12_context *ctx)
252bf215546Sopenharmony_ci{
253bf215546Sopenharmony_ci   if (!ctx->gfx_pipeline_state.blend->is_dual_src)
254bf215546Sopenharmony_ci      return 0;
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
257bf215546Sopenharmony_ci   nir_shader *s = fs->initial;
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   unsigned indices_seen = 0;
260bf215546Sopenharmony_ci   nir_foreach_function(function, s) {
261bf215546Sopenharmony_ci      if (function->impl) {
262bf215546Sopenharmony_ci         nir_foreach_block(block, function->impl) {
263bf215546Sopenharmony_ci            nir_foreach_instr(instr, block) {
264bf215546Sopenharmony_ci               if (instr->type != nir_instr_type_intrinsic)
265bf215546Sopenharmony_ci                  continue;
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
268bf215546Sopenharmony_ci               if (intr->intrinsic != nir_intrinsic_store_deref)
269bf215546Sopenharmony_ci                  continue;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci               nir_variable *var = nir_intrinsic_get_var(intr, 0);
272bf215546Sopenharmony_ci               if (var->data.mode != nir_var_shader_out)
273bf215546Sopenharmony_ci                  continue;
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci               unsigned index = var->data.index;
276bf215546Sopenharmony_ci               if (var->data.location > FRAG_RESULT_DATA0)
277bf215546Sopenharmony_ci                  index = var->data.location - FRAG_RESULT_DATA0;
278bf215546Sopenharmony_ci               else if (var->data.location != FRAG_RESULT_COLOR &&
279bf215546Sopenharmony_ci                        var->data.location != FRAG_RESULT_DATA0)
280bf215546Sopenharmony_ci                  continue;
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci               indices_seen |= 1u << index;
283bf215546Sopenharmony_ci               if ((indices_seen & 3) == 3)
284bf215546Sopenharmony_ci                  return 0;
285bf215546Sopenharmony_ci            }
286bf215546Sopenharmony_ci         }
287bf215546Sopenharmony_ci      }
288bf215546Sopenharmony_ci   }
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci   return 3 & ~indices_seen;
291bf215546Sopenharmony_ci}
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_cistatic unsigned
294bf215546Sopenharmony_cifrag_result_color_lowering(struct d3d12_context *ctx)
295bf215546Sopenharmony_ci{
296bf215546Sopenharmony_ci   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
297bf215546Sopenharmony_ci   assert(fs);
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci   if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
300bf215546Sopenharmony_ci      return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   return 0;
303bf215546Sopenharmony_ci}
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_cistatic bool
306bf215546Sopenharmony_cimanual_depth_range(struct d3d12_context *ctx)
307bf215546Sopenharmony_ci{
308bf215546Sopenharmony_ci   if (!d3d12_need_zero_one_depth_range(ctx))
309bf215546Sopenharmony_ci      return false;
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   /**
312bf215546Sopenharmony_ci    * If we can't use the D3D12 zero-one depth-range, we might have to apply
313bf215546Sopenharmony_ci    * depth-range ourselves.
314bf215546Sopenharmony_ci    *
315bf215546Sopenharmony_ci    * Because we only need to override the depth-range to zero-one range in
316bf215546Sopenharmony_ci    * the case where we write frag-depth, we only need to apply manual
317bf215546Sopenharmony_ci    * depth-range to gl_FragCoord.z.
318bf215546Sopenharmony_ci    *
319bf215546Sopenharmony_ci    * No extra care is needed to be taken in the case where gl_FragDepth is
320bf215546Sopenharmony_ci    * written conditionally, because the GLSL 4.60 spec states:
321bf215546Sopenharmony_ci    *
322bf215546Sopenharmony_ci    *    If a shader statically assigns a value to gl_FragDepth, and there
323bf215546Sopenharmony_ci    *    is an execution path through the shader that does not set
324bf215546Sopenharmony_ci    *    gl_FragDepth, then the value of the fragment’s depth may be
325bf215546Sopenharmony_ci    *    undefined for executions of the shader that take that path. That
326bf215546Sopenharmony_ci    *    is, if the set of linked fragment shaders statically contain a
327bf215546Sopenharmony_ci    *    write to gl_FragDepth, then it is responsible for always writing
328bf215546Sopenharmony_ci    *    it.
329bf215546Sopenharmony_ci    */
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_ci   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
332bf215546Sopenharmony_ci   return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
333bf215546Sopenharmony_ci}
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_cistatic bool
336bf215546Sopenharmony_cineeds_edge_flag_fix(enum pipe_prim_type mode)
337bf215546Sopenharmony_ci{
338bf215546Sopenharmony_ci   return (mode == PIPE_PRIM_QUADS ||
339bf215546Sopenharmony_ci           mode == PIPE_PRIM_QUAD_STRIP ||
340bf215546Sopenharmony_ci           mode == PIPE_PRIM_POLYGON);
341bf215546Sopenharmony_ci}
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_cistatic unsigned
344bf215546Sopenharmony_cifill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
345bf215546Sopenharmony_ci{
346bf215546Sopenharmony_ci   struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
349bf215546Sopenharmony_ci        !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
350bf215546Sopenharmony_ci       ctx->gfx_pipeline_state.rast == NULL ||
351bf215546Sopenharmony_ci       (dinfo->mode != PIPE_PRIM_TRIANGLES &&
352bf215546Sopenharmony_ci        dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP))
353bf215546Sopenharmony_ci      return PIPE_POLYGON_MODE_FILL;
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci   /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
356bf215546Sopenharmony_ci   if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
357bf215546Sopenharmony_ci         ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
358bf215546Sopenharmony_ci        (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
359bf215546Sopenharmony_ci         ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
360bf215546Sopenharmony_ci       (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
361bf215546Sopenharmony_ci        needs_edge_flag_fix(ctx->initial_api_prim)))
362bf215546Sopenharmony_ci      return PIPE_POLYGON_MODE_LINE;
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
365bf215546Sopenharmony_ci      return PIPE_POLYGON_MODE_POINT;
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci   return PIPE_POLYGON_MODE_FILL;
368bf215546Sopenharmony_ci}
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_cistatic bool
371bf215546Sopenharmony_cihas_stream_out_for_streams(struct d3d12_context *ctx)
372bf215546Sopenharmony_ci{
373bf215546Sopenharmony_ci   unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
374bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
375bf215546Sopenharmony_ci      unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
376bf215546Sopenharmony_ci      if (((1 << stream) & mask) &&
377bf215546Sopenharmony_ci         ctx->so_buffer_views[stream].SizeInBytes)
378bf215546Sopenharmony_ci         return true;
379bf215546Sopenharmony_ci   }
380bf215546Sopenharmony_ci   return false;
381bf215546Sopenharmony_ci}
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_cistatic bool
384bf215546Sopenharmony_cineeds_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
385bf215546Sopenharmony_ci{
386bf215546Sopenharmony_ci   struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
387bf215546Sopenharmony_ci   struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci   if (gs != NULL && !gs->is_variant) {
390bf215546Sopenharmony_ci      /* There is an user GS; Check if it outputs points with PSIZE */
391bf215546Sopenharmony_ci      return (gs->initial->info.gs.output_primitive == GL_POINTS &&
392bf215546Sopenharmony_ci              (gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
393bf215546Sopenharmony_ci                 ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
394bf215546Sopenharmony_ci              (gs->initial->info.gs.active_stream_mask == 1 ||
395bf215546Sopenharmony_ci                 !has_stream_out_for_streams(ctx)));
396bf215546Sopenharmony_ci   } else {
397bf215546Sopenharmony_ci      /* No user GS; check if we are drawing wide points */
398bf215546Sopenharmony_ci      return ((dinfo->mode == PIPE_PRIM_POINTS ||
399bf215546Sopenharmony_ci               fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
400bf215546Sopenharmony_ci              (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
401bf215546Sopenharmony_ci               ctx->gfx_pipeline_state.rast->base.offset_point ||
402bf215546Sopenharmony_ci               (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
403bf215546Sopenharmony_ci                vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
404bf215546Sopenharmony_ci              (vs->initial->info.outputs_written & VARYING_BIT_POS));
405bf215546Sopenharmony_ci   }
406bf215546Sopenharmony_ci}
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_cistatic unsigned
409bf215546Sopenharmony_cicull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
410bf215546Sopenharmony_ci{
411bf215546Sopenharmony_ci   if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
412bf215546Sopenharmony_ci        !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
413bf215546Sopenharmony_ci       ctx->gfx_pipeline_state.rast == NULL ||
414bf215546Sopenharmony_ci       ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
415bf215546Sopenharmony_ci      return PIPE_FACE_NONE;
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci   return ctx->gfx_pipeline_state.rast->base.cull_face;
418bf215546Sopenharmony_ci}
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_cistatic unsigned
421bf215546Sopenharmony_ciget_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
422bf215546Sopenharmony_ci{
423bf215546Sopenharmony_ci   if (dinfo->mode == GL_PATCHES) {
424bf215546Sopenharmony_ci      *alternate = false;
425bf215546Sopenharmony_ci      return 0;
426bf215546Sopenharmony_ci   }
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci   struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
429bf215546Sopenharmony_ci   struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
430bf215546Sopenharmony_ci   struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci   /* Make sure GL prims match Gallium prims */
433bf215546Sopenharmony_ci   STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
434bf215546Sopenharmony_ci   STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES);
435bf215546Sopenharmony_ci   STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP);
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   enum pipe_prim_type mode;
438bf215546Sopenharmony_ci   switch (last_vertex_stage->stage) {
439bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY:
440bf215546Sopenharmony_ci      mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive;
441bf215546Sopenharmony_ci      break;
442bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:
443bf215546Sopenharmony_ci      mode = (enum pipe_prim_type)dinfo->mode;
444bf215546Sopenharmony_ci      break;
445bf215546Sopenharmony_ci   default:
446bf215546Sopenharmony_ci      unreachable("Tesselation shaders are not supported");
447bf215546Sopenharmony_ci   }
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci   bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
450bf215546Sopenharmony_ci                          sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
451bf215546Sopenharmony_ci   *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
452bf215546Sopenharmony_ci                (!gs || gs->is_variant ||
453bf215546Sopenharmony_ci                 gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
454bf215546Sopenharmony_ci   return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
455bf215546Sopenharmony_ci}
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_cistatic bool
458bf215546Sopenharmony_cihas_flat_varyings(struct d3d12_context *ctx)
459bf215546Sopenharmony_ci{
460bf215546Sopenharmony_ci   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   if (!fs || !fs->current)
463bf215546Sopenharmony_ci      return false;
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(input, fs->current->nir,
466bf215546Sopenharmony_ci                                   nir_var_shader_in) {
467bf215546Sopenharmony_ci      if (input->data.interpolation == INTERP_MODE_FLAT &&
468bf215546Sopenharmony_ci          /* Disregard sysvals */
469bf215546Sopenharmony_ci          (input->data.location >= VARYING_SLOT_VAR0 ||
470bf215546Sopenharmony_ci             input->data.location <= VARYING_SLOT_TEX7))
471bf215546Sopenharmony_ci         return true;
472bf215546Sopenharmony_ci   }
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci   return false;
475bf215546Sopenharmony_ci}
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_cistatic bool
478bf215546Sopenharmony_cineeds_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
479bf215546Sopenharmony_ci{
480bf215546Sopenharmony_ci   struct d3d12_context *ctx = sel_ctx->ctx;
481bf215546Sopenharmony_ci   bool flat = has_flat_varyings(ctx);
482bf215546Sopenharmony_ci   bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci   if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
485bf215546Sopenharmony_ci      return false;
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   /* TODO add support for line primitives */
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
490bf215546Sopenharmony_ci      If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
491bf215546Sopenharmony_ci   if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
492bf215546Sopenharmony_ci                                                  sel_ctx->alternate_tri))
493bf215546Sopenharmony_ci      return true;
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci   /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
496bf215546Sopenharmony_ci      strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
497bf215546Sopenharmony_ci      only works when there is no flat shading involved. In that scenario, we don't care about
498bf215546Sopenharmony_ci      the provoking vertex. */
499bf215546Sopenharmony_ci   if (xfb && !flat && sel_ctx->alternate_tri) {
500bf215546Sopenharmony_ci      sel_ctx->provoking_vertex = 0;
501bf215546Sopenharmony_ci      return true;
502bf215546Sopenharmony_ci   }
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci   return false;
505bf215546Sopenharmony_ci}
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_cistatic nir_variable *
508bf215546Sopenharmony_cicreate_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info,
509bf215546Sopenharmony_ci                         unsigned slot, unsigned slot_frac, nir_variable_mode mode, bool patch)
510bf215546Sopenharmony_ci{
511bf215546Sopenharmony_ci   nir_variable *var;
512bf215546Sopenharmony_ci   char tmp[100];
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci   snprintf(tmp, ARRAY_SIZE(tmp),
515bf215546Sopenharmony_ci            mode == nir_var_shader_in ? "in_%d" : "out_%d",
516bf215546Sopenharmony_ci            info->slots[slot].vars[slot_frac].driver_location);
517bf215546Sopenharmony_ci   var = nir_variable_create(nir, mode, info->slots[slot].types[slot_frac], tmp);
518bf215546Sopenharmony_ci   var->data.location = slot;
519bf215546Sopenharmony_ci   var->data.location_frac = slot_frac;
520bf215546Sopenharmony_ci   var->data.driver_location = info->slots[slot].vars[slot_frac].driver_location;
521bf215546Sopenharmony_ci   var->data.interpolation = info->slots[slot].vars[slot_frac].interpolation;
522bf215546Sopenharmony_ci   var->data.patch = info->slots[slot].patch;
523bf215546Sopenharmony_ci   var->data.compact = info->slots[slot].vars[slot_frac].compact;
524bf215546Sopenharmony_ci   if (patch)
525bf215546Sopenharmony_ci      var->data.location += VARYING_SLOT_PATCH0;
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci   if (mode == nir_var_shader_out)
528bf215546Sopenharmony_ci      NIR_PASS_V(nir, d3d12_write_0_to_new_varying, var);
529bf215546Sopenharmony_ci
530bf215546Sopenharmony_ci   return var;
531bf215546Sopenharmony_ci}
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_civoid
534bf215546Sopenharmony_cicreate_varyings_from_info(nir_shader *nir, struct d3d12_varying_info *info,
535bf215546Sopenharmony_ci                          unsigned slot, nir_variable_mode mode, bool patch)
536bf215546Sopenharmony_ci{
537bf215546Sopenharmony_ci   unsigned mask = info->slots[slot].location_frac_mask;
538bf215546Sopenharmony_ci   while (mask)
539bf215546Sopenharmony_ci      create_varying_from_info(nir, info, slot, u_bit_scan(&mask), mode, patch);
540bf215546Sopenharmony_ci}
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_cistatic void
543bf215546Sopenharmony_cifill_varyings(struct d3d12_varying_info *info, nir_shader *s,
544bf215546Sopenharmony_ci              nir_variable_mode modes, uint64_t mask, bool patch)
545bf215546Sopenharmony_ci{
546bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, s, modes) {
547bf215546Sopenharmony_ci      unsigned slot = var->data.location;
548bf215546Sopenharmony_ci      bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
549bf215546Sopenharmony_ci      if (patch ^ is_generic_patch)
550bf215546Sopenharmony_ci         continue;
551bf215546Sopenharmony_ci      if (is_generic_patch)
552bf215546Sopenharmony_ci         slot -= VARYING_SLOT_PATCH0;
553bf215546Sopenharmony_ci      uint64_t slot_bit = BITFIELD64_BIT(slot);
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci      if (!(mask & slot_bit))
556bf215546Sopenharmony_ci         continue;
557bf215546Sopenharmony_ci
558bf215546Sopenharmony_ci      const struct glsl_type *type = var->type;
559bf215546Sopenharmony_ci      if ((s->info.stage == MESA_SHADER_GEOMETRY ||
560bf215546Sopenharmony_ci           s->info.stage == MESA_SHADER_TESS_CTRL) &&
561bf215546Sopenharmony_ci          (modes & nir_var_shader_in) &&
562bf215546Sopenharmony_ci          glsl_type_is_array(type))
563bf215546Sopenharmony_ci         type = glsl_get_array_element(type);
564bf215546Sopenharmony_ci      info->slots[slot].types[var->data.location_frac] = type;
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci      info->slots[slot].patch = var->data.patch;
567bf215546Sopenharmony_ci      auto& var_slot = info->slots[slot].vars[var->data.location_frac];
568bf215546Sopenharmony_ci      var_slot.driver_location = var->data.driver_location;
569bf215546Sopenharmony_ci      var_slot.interpolation = var->data.interpolation;
570bf215546Sopenharmony_ci      var_slot.compact = var->data.compact;
571bf215546Sopenharmony_ci      info->mask |= slot_bit;
572bf215546Sopenharmony_ci      info->slots[slot].location_frac_mask |= (1 << var->data.location_frac);
573bf215546Sopenharmony_ci   }
574bf215546Sopenharmony_ci}
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_cistatic void
577bf215546Sopenharmony_cifill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
578bf215546Sopenharmony_ci{
579bf215546Sopenharmony_ci   if (!fs || !fs->current)
580bf215546Sopenharmony_ci      return;
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(input, fs->current->nir,
583bf215546Sopenharmony_ci                                   nir_var_shader_in) {
584bf215546Sopenharmony_ci      if (input->data.interpolation == INTERP_MODE_FLAT)
585bf215546Sopenharmony_ci         key->flat_varyings |= BITFIELD64_BIT(input->data.location);
586bf215546Sopenharmony_ci   }
587bf215546Sopenharmony_ci}
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_cistatic void
590bf215546Sopenharmony_civalidate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
591bf215546Sopenharmony_ci{
592bf215546Sopenharmony_ci   struct d3d12_context *ctx = sel_ctx->ctx;
593bf215546Sopenharmony_ci   d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
594bf215546Sopenharmony_ci   d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
595bf215546Sopenharmony_ci   struct d3d12_gs_variant_key key = {0};
596bf215546Sopenharmony_ci   bool variant_needed = false;
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   /* Nothing to do if there is a user geometry shader bound */
601bf215546Sopenharmony_ci   if (gs != NULL && !gs->is_variant)
602bf215546Sopenharmony_ci      return;
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci   /* Fill the geometry shader variant key */
605bf215546Sopenharmony_ci   if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
606bf215546Sopenharmony_ci      key.fill_mode = sel_ctx->fill_mode_lowered;
607bf215546Sopenharmony_ci      key.cull_mode = sel_ctx->cull_mode_lowered;
608bf215546Sopenharmony_ci      key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
609bf215546Sopenharmony_ci      if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
610bf215546Sopenharmony_ci         key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
611bf215546Sopenharmony_ci      key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
612bf215546Sopenharmony_ci      fill_flat_varyings(&key, fs);
613bf215546Sopenharmony_ci      if (key.flat_varyings != 0)
614bf215546Sopenharmony_ci         key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
615bf215546Sopenharmony_ci      variant_needed = true;
616bf215546Sopenharmony_ci   } else if (sel_ctx->needs_point_sprite_lowering) {
617bf215546Sopenharmony_ci      key.passthrough = true;
618bf215546Sopenharmony_ci      variant_needed = true;
619bf215546Sopenharmony_ci   } else if (sel_ctx->needs_vertex_reordering) {
620bf215546Sopenharmony_ci      /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
621bf215546Sopenharmony_ci      key.provoking_vertex = sel_ctx->provoking_vertex;
622bf215546Sopenharmony_ci      key.alternate_tri = sel_ctx->alternate_tri;
623bf215546Sopenharmony_ci      variant_needed = true;
624bf215546Sopenharmony_ci   }
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci   if (variant_needed) {
627bf215546Sopenharmony_ci      fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
628bf215546Sopenharmony_ci                    vs->initial->info.outputs_written, false);
629bf215546Sopenharmony_ci   }
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_ci   /* Check if the currently bound geometry shader variant is correct */
632bf215546Sopenharmony_ci   if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0)
633bf215546Sopenharmony_ci      return;
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_ci   /* Find/create the proper variant and bind it */
636bf215546Sopenharmony_ci   gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL;
637bf215546Sopenharmony_ci   ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
638bf215546Sopenharmony_ci}
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_cistatic void
641bf215546Sopenharmony_civalidate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
642bf215546Sopenharmony_ci{
643bf215546Sopenharmony_ci   struct d3d12_context *ctx = sel_ctx->ctx;
644bf215546Sopenharmony_ci   d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
645bf215546Sopenharmony_ci   d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
646bf215546Sopenharmony_ci   d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
647bf215546Sopenharmony_ci   struct d3d12_tcs_variant_key key = {0};
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci   /* Nothing to do if there is a user tess ctrl shader bound */
650bf215546Sopenharmony_ci   if (tcs != NULL && !tcs->is_variant)
651bf215546Sopenharmony_ci      return;
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci   bool variant_needed = tes != nullptr;
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_ci   /* Fill the variant key */
656bf215546Sopenharmony_ci   if (variant_needed) {
657bf215546Sopenharmony_ci      fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
658bf215546Sopenharmony_ci                    vs->initial->info.outputs_written, false);
659bf215546Sopenharmony_ci      key.vertices_out = ctx->patch_vertices;
660bf215546Sopenharmony_ci   }
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci   /* Check if the currently bound tessellation control shader variant is correct */
663bf215546Sopenharmony_ci   if (tcs && memcmp(&tcs->tcs_key, &key, sizeof(key)) == 0)
664bf215546Sopenharmony_ci      return;
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_ci   /* Find/create the proper variant and bind it */
667bf215546Sopenharmony_ci   tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
668bf215546Sopenharmony_ci   ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
669bf215546Sopenharmony_ci}
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_cistatic bool
672bf215546Sopenharmony_cid3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
673bf215546Sopenharmony_ci{
674bf215546Sopenharmony_ci   if (expect->mask != have->mask)
675bf215546Sopenharmony_ci      return false;
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci   if (!expect->mask)
678bf215546Sopenharmony_ci      return true;
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci   /* 6 is a rough (wild) guess for a bulk memcmp cross-over point.  When there
681bf215546Sopenharmony_ci    * are a small number of slots present, individual memcmp is much faster. */
682bf215546Sopenharmony_ci   if (util_bitcount64(expect->mask) < 6) {
683bf215546Sopenharmony_ci      uint64_t mask = expect->mask;
684bf215546Sopenharmony_ci      while (mask) {
685bf215546Sopenharmony_ci         int slot = u_bit_scan64(&mask);
686bf215546Sopenharmony_ci         if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
687bf215546Sopenharmony_ci            return false;
688bf215546Sopenharmony_ci      }
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci      return true;
691bf215546Sopenharmony_ci   }
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_ci   return !memcmp(expect, have, sizeof(struct d3d12_varying_info));
694bf215546Sopenharmony_ci}
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_cistatic bool
697bf215546Sopenharmony_cid3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have)
698bf215546Sopenharmony_ci{
699bf215546Sopenharmony_ci   assert(expect->stage == have->stage);
700bf215546Sopenharmony_ci   assert(expect);
701bf215546Sopenharmony_ci   assert(have);
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   if (expect->hash != have->hash)
704bf215546Sopenharmony_ci      return false;
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci   /* Because we only add varyings we check that a shader has at least the expected in-
707bf215546Sopenharmony_ci    * and outputs. */
708bf215546Sopenharmony_ci
709bf215546Sopenharmony_ci   if (!d3d12_compare_varying_info(&expect->required_varying_inputs,
710bf215546Sopenharmony_ci                                   &have->required_varying_inputs) ||
711bf215546Sopenharmony_ci       expect->next_varying_inputs != have->next_varying_inputs)
712bf215546Sopenharmony_ci      return false;
713bf215546Sopenharmony_ci
714bf215546Sopenharmony_ci   if (!d3d12_compare_varying_info(&expect->required_varying_outputs,
715bf215546Sopenharmony_ci                                   &have->required_varying_outputs) ||
716bf215546Sopenharmony_ci       expect->prev_varying_outputs != have->prev_varying_outputs)
717bf215546Sopenharmony_ci      return false;
718bf215546Sopenharmony_ci
719bf215546Sopenharmony_ci   if (expect->stage == PIPE_SHADER_GEOMETRY) {
720bf215546Sopenharmony_ci      if (expect->gs.writes_psize) {
721bf215546Sopenharmony_ci         if (!have->gs.writes_psize ||
722bf215546Sopenharmony_ci             expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out ||
723bf215546Sopenharmony_ci             expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable ||
724bf215546Sopenharmony_ci             expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left ||
725bf215546Sopenharmony_ci             expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex)
726bf215546Sopenharmony_ci            return false;
727bf215546Sopenharmony_ci      } else if (have->gs.writes_psize) {
728bf215546Sopenharmony_ci         return false;
729bf215546Sopenharmony_ci      }
730bf215546Sopenharmony_ci      if (expect->gs.primitive_id != have->gs.primitive_id ||
731bf215546Sopenharmony_ci          expect->gs.triangle_strip != have->gs.triangle_strip)
732bf215546Sopenharmony_ci         return false;
733bf215546Sopenharmony_ci   } else if (expect->stage == PIPE_SHADER_FRAGMENT) {
734bf215546Sopenharmony_ci      if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering ||
735bf215546Sopenharmony_ci          expect->fs.manual_depth_range != have->fs.manual_depth_range ||
736bf215546Sopenharmony_ci          expect->fs.polygon_stipple != have->fs.polygon_stipple ||
737bf215546Sopenharmony_ci          expect->fs.cast_to_uint != have->fs.cast_to_uint ||
738bf215546Sopenharmony_ci          expect->fs.cast_to_int != have->fs.cast_to_int ||
739bf215546Sopenharmony_ci          expect->fs.remap_front_facing != have->fs.remap_front_facing ||
740bf215546Sopenharmony_ci          expect->fs.missing_dual_src_outputs != have->fs.missing_dual_src_outputs ||
741bf215546Sopenharmony_ci          expect->fs.multisample_disabled != have->fs.multisample_disabled)
742bf215546Sopenharmony_ci         return false;
743bf215546Sopenharmony_ci   } else if (expect->stage == PIPE_SHADER_COMPUTE) {
744bf215546Sopenharmony_ci      if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
745bf215546Sopenharmony_ci                 sizeof(have->cs.workgroup_size)))
746bf215546Sopenharmony_ci         return false;
747bf215546Sopenharmony_ci   } else if (expect->stage == PIPE_SHADER_TESS_CTRL) {
748bf215546Sopenharmony_ci      if (expect->hs.primitive_mode != have->hs.primitive_mode ||
749bf215546Sopenharmony_ci          expect->hs.ccw != have->hs.ccw ||
750bf215546Sopenharmony_ci          expect->hs.point_mode != have->hs.point_mode ||
751bf215546Sopenharmony_ci          expect->hs.spacing != have->hs.spacing ||
752bf215546Sopenharmony_ci          expect->hs.patch_vertices_in != have->hs.patch_vertices_in ||
753bf215546Sopenharmony_ci          memcmp(&expect->hs.required_patch_outputs, &have->hs.required_patch_outputs,
754bf215546Sopenharmony_ci                 sizeof(struct d3d12_varying_info)) ||
755bf215546Sopenharmony_ci          expect->hs.next_patch_inputs != have->hs.next_patch_inputs)
756bf215546Sopenharmony_ci         return false;
757bf215546Sopenharmony_ci   } else if (expect->stage == PIPE_SHADER_TESS_EVAL) {
758bf215546Sopenharmony_ci      if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
759bf215546Sopenharmony_ci          memcmp(&expect->ds.required_patch_inputs, &have->ds.required_patch_inputs,
760bf215546Sopenharmony_ci                 sizeof(struct d3d12_varying_info)) ||
761bf215546Sopenharmony_ci          expect->ds.prev_patch_outputs != have ->ds.prev_patch_outputs)
762bf215546Sopenharmony_ci         return false;
763bf215546Sopenharmony_ci   }
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ci   if (expect->input_clip_size != have->input_clip_size)
766bf215546Sopenharmony_ci      return false;
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci   if (expect->tex_saturate_s != have->tex_saturate_s ||
769bf215546Sopenharmony_ci       expect->tex_saturate_r != have->tex_saturate_r ||
770bf215546Sopenharmony_ci       expect->tex_saturate_t != have->tex_saturate_t)
771bf215546Sopenharmony_ci      return false;
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_ci   if (expect->samples_int_textures != have->samples_int_textures)
774bf215546Sopenharmony_ci      return false;
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci   if (expect->n_texture_states != have->n_texture_states)
777bf215546Sopenharmony_ci      return false;
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_ci   if (expect->n_images != have->n_images)
780bf215546Sopenharmony_ci      return false;
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci   if (memcmp(expect->tex_wrap_states, have->tex_wrap_states,
783bf215546Sopenharmony_ci              expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
784bf215546Sopenharmony_ci      return false;
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci   if (memcmp(expect->swizzle_state, have->swizzle_state,
787bf215546Sopenharmony_ci              expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
788bf215546Sopenharmony_ci      return false;
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_ci   if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
791bf215546Sopenharmony_ci              expect->n_texture_states * sizeof(enum compare_func)))
792bf215546Sopenharmony_ci      return false;
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   if (memcmp(expect->image_format_conversion, have->image_format_conversion,
795bf215546Sopenharmony_ci      expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
796bf215546Sopenharmony_ci      return false;
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci   if (expect->invert_depth != have->invert_depth ||
799bf215546Sopenharmony_ci       expect->halfz != have->halfz)
800bf215546Sopenharmony_ci      return false;
801bf215546Sopenharmony_ci
802bf215546Sopenharmony_ci   if (expect->stage == PIPE_SHADER_VERTEX) {
803bf215546Sopenharmony_ci      if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
804bf215546Sopenharmony_ci         return false;
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci      if (expect->vs.needs_format_emulation) {
807bf215546Sopenharmony_ci         if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
808bf215546Sopenharmony_ci                    PIPE_MAX_ATTRIBS * sizeof (enum pipe_format)))
809bf215546Sopenharmony_ci            return false;
810bf215546Sopenharmony_ci      }
811bf215546Sopenharmony_ci   }
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci   if (expect->fs.provoking_vertex != have->fs.provoking_vertex)
814bf215546Sopenharmony_ci      return false;
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci   return true;
817bf215546Sopenharmony_ci}
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_cistatic uint32_t
820bf215546Sopenharmony_cid3d12_shader_key_hash(const d3d12_shader_key *key)
821bf215546Sopenharmony_ci{
822bf215546Sopenharmony_ci   uint32_t hash;
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ci   hash = (uint32_t)key->stage;
825bf215546Sopenharmony_ci   hash += key->required_varying_inputs.mask;
826bf215546Sopenharmony_ci   hash += key->required_varying_outputs.mask;
827bf215546Sopenharmony_ci   hash += key->next_varying_inputs;
828bf215546Sopenharmony_ci   hash += key->prev_varying_outputs;
829bf215546Sopenharmony_ci   switch (key->stage) {
830bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:
831bf215546Sopenharmony_ci      /* (Probably) not worth the bit extraction for needs_format_emulation and
832bf215546Sopenharmony_ci       * the rest of the the format_conversion data is large.  Don't bother
833bf215546Sopenharmony_ci       * hashing for now until this is shown to be worthwhile. */
834bf215546Sopenharmony_ci       break;
835bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY:
836bf215546Sopenharmony_ci      hash = _mesa_hash_data_with_seed(&key->gs, sizeof(key->gs), hash);
837bf215546Sopenharmony_ci      break;
838bf215546Sopenharmony_ci   case PIPE_SHADER_FRAGMENT:
839bf215546Sopenharmony_ci      hash = _mesa_hash_data_with_seed(&key->fs, sizeof(key->fs), hash);
840bf215546Sopenharmony_ci      break;
841bf215546Sopenharmony_ci   case PIPE_SHADER_COMPUTE:
842bf215546Sopenharmony_ci      hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash);
843bf215546Sopenharmony_ci      break;
844bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_CTRL:
845bf215546Sopenharmony_ci      hash += key->hs.next_patch_inputs;
846bf215546Sopenharmony_ci      break;
847bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_EVAL:
848bf215546Sopenharmony_ci      hash += key->ds.tcs_vertices_out;
849bf215546Sopenharmony_ci      hash += key->ds.prev_patch_outputs;
850bf215546Sopenharmony_ci      break;
851bf215546Sopenharmony_ci   default:
852bf215546Sopenharmony_ci      /* No type specific information to hash for other stages. */
853bf215546Sopenharmony_ci      break;
854bf215546Sopenharmony_ci   }
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   hash += key->n_texture_states;
857bf215546Sopenharmony_ci   hash += key->n_images;
858bf215546Sopenharmony_ci   return hash;
859bf215546Sopenharmony_ci}
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_cistatic void
862bf215546Sopenharmony_cid3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
863bf215546Sopenharmony_ci                      d3d12_shader_key *key, d3d12_shader_selector *sel,
864bf215546Sopenharmony_ci                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
865bf215546Sopenharmony_ci{
866bf215546Sopenharmony_ci   pipe_shader_type stage = sel->stage;
867bf215546Sopenharmony_ci
868bf215546Sopenharmony_ci   uint64_t system_generated_in_values =
869bf215546Sopenharmony_ci         VARYING_BIT_PNTC |
870bf215546Sopenharmony_ci         VARYING_BIT_PRIMITIVE_ID;
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_ci   uint64_t system_out_values =
873bf215546Sopenharmony_ci         VARYING_BIT_CLIP_DIST0 |
874bf215546Sopenharmony_ci         VARYING_BIT_CLIP_DIST1;
875bf215546Sopenharmony_ci
876bf215546Sopenharmony_ci   memset(key, 0, sizeof(d3d12_shader_key));
877bf215546Sopenharmony_ci   key->stage = stage;
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_ci   if (prev) {
880bf215546Sopenharmony_ci      /* We require as inputs what the previous stage has written,
881bf215546Sopenharmony_ci       * except certain system values */
882bf215546Sopenharmony_ci      if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY)
883bf215546Sopenharmony_ci         system_out_values |= VARYING_BIT_POS;
884bf215546Sopenharmony_ci      if (stage == PIPE_SHADER_FRAGMENT)
885bf215546Sopenharmony_ci         system_out_values |= VARYING_BIT_PSIZ | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER;
886bf215546Sopenharmony_ci      uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
887bf215546Sopenharmony_ci      fill_varyings(&key->required_varying_inputs, prev->current->nir,
888bf215546Sopenharmony_ci                    nir_var_shader_out, mask, false);
889bf215546Sopenharmony_ci      key->prev_varying_outputs = prev->current->nir->info.outputs_written;
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci      if (stage == PIPE_SHADER_TESS_EVAL) {
892bf215546Sopenharmony_ci         uint32_t patch_mask = prev->current->nir->info.patch_outputs_written;
893bf215546Sopenharmony_ci         fill_varyings(&key->ds.required_patch_inputs, prev->current->nir,
894bf215546Sopenharmony_ci                       nir_var_shader_out, patch_mask, true);
895bf215546Sopenharmony_ci         key->ds.prev_patch_outputs = patch_mask;
896bf215546Sopenharmony_ci      }
897bf215546Sopenharmony_ci
898bf215546Sopenharmony_ci      /* Set the provoking vertex based on the previous shader output. Only set the
899bf215546Sopenharmony_ci       * key value if the driver actually supports changing the provoking vertex though */
900bf215546Sopenharmony_ci      if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
901bf215546Sopenharmony_ci          !sel_ctx->needs_vertex_reordering &&
902bf215546Sopenharmony_ci          d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
903bf215546Sopenharmony_ci         key->fs.provoking_vertex = sel_ctx->provoking_vertex;
904bf215546Sopenharmony_ci
905bf215546Sopenharmony_ci      /* Get the input clip distance size. The info's clip_distance_array_size corresponds
906bf215546Sopenharmony_ci       * to the output, and in cases of TES or GS you could have differently-sized inputs
907bf215546Sopenharmony_ci       * and outputs. For FS, there is no output, so it's repurposed to mean input.
908bf215546Sopenharmony_ci       */
909bf215546Sopenharmony_ci      if (stage != PIPE_SHADER_FRAGMENT)
910bf215546Sopenharmony_ci         key->input_clip_size = prev->current->nir->info.clip_distance_array_size;
911bf215546Sopenharmony_ci   }
912bf215546Sopenharmony_ci
913bf215546Sopenharmony_ci   /* We require as outputs what the next stage reads,
914bf215546Sopenharmony_ci    * except certain system values */
915bf215546Sopenharmony_ci   if (next) {
916bf215546Sopenharmony_ci      if (!next->is_variant) {
917bf215546Sopenharmony_ci         if (stage == PIPE_SHADER_VERTEX)
918bf215546Sopenharmony_ci            system_generated_in_values |= VARYING_BIT_POS;
919bf215546Sopenharmony_ci         uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
920bf215546Sopenharmony_ci         fill_varyings(&key->required_varying_outputs, next->current->nir,
921bf215546Sopenharmony_ci                       nir_var_shader_in, mask, false);
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci         if (stage == PIPE_SHADER_TESS_CTRL) {
924bf215546Sopenharmony_ci            uint32_t patch_mask = next->current->nir->info.patch_outputs_read;
925bf215546Sopenharmony_ci            fill_varyings(&key->hs.required_patch_outputs, prev->current->nir,
926bf215546Sopenharmony_ci                          nir_var_shader_in, patch_mask, true);
927bf215546Sopenharmony_ci            key->hs.next_patch_inputs = patch_mask;
928bf215546Sopenharmony_ci         }
929bf215546Sopenharmony_ci      }
930bf215546Sopenharmony_ci      key->next_varying_inputs = next->current->nir->info.inputs_read;
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci   }
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci   if (stage == PIPE_SHADER_GEOMETRY ||
935bf215546Sopenharmony_ci       ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
936bf215546Sopenharmony_ci          (!next || next->stage == PIPE_SHADER_FRAGMENT))) {
937bf215546Sopenharmony_ci      key->last_vertex_processing_stage = 1;
938bf215546Sopenharmony_ci      key->invert_depth = sel_ctx->ctx->reverse_depth_range;
939bf215546Sopenharmony_ci      key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ?
940bf215546Sopenharmony_ci         sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false;
941bf215546Sopenharmony_ci      if (sel_ctx->ctx->pstipple.enabled &&
942bf215546Sopenharmony_ci         sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
943bf215546Sopenharmony_ci         key->next_varying_inputs |= VARYING_BIT_POS;
944bf215546Sopenharmony_ci   }
945bf215546Sopenharmony_ci
946bf215546Sopenharmony_ci   if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
947bf215546Sopenharmony_ci      struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
948bf215546Sopenharmony_ci      if (sel_ctx->needs_point_sprite_lowering) {
949bf215546Sopenharmony_ci         key->gs.writes_psize = 1;
950bf215546Sopenharmony_ci         key->gs.point_size_per_vertex = rast->point_size_per_vertex;
951bf215546Sopenharmony_ci         key->gs.sprite_coord_enable = rast->sprite_coord_enable;
952bf215546Sopenharmony_ci         key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
953bf215546Sopenharmony_ci         if (sel_ctx->ctx->flip_y < 0)
954bf215546Sopenharmony_ci            key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
955bf215546Sopenharmony_ci         key->gs.aa_point = rast->point_smooth;
956bf215546Sopenharmony_ci         key->gs.stream_output_factor = 6;
957bf215546Sopenharmony_ci      } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
958bf215546Sopenharmony_ci         key->gs.stream_output_factor = 2;
959bf215546Sopenharmony_ci      } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
960bf215546Sopenharmony_ci         key->gs.triangle_strip = 1;
961bf215546Sopenharmony_ci      }
962bf215546Sopenharmony_ci
963bf215546Sopenharmony_ci      if (sel->is_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
964bf215546Sopenharmony_ci         key->gs.primitive_id = 1;
965bf215546Sopenharmony_ci   } else if (stage == PIPE_SHADER_FRAGMENT) {
966bf215546Sopenharmony_ci      key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
967bf215546Sopenharmony_ci      key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
968bf215546Sopenharmony_ci      key->fs.manual_depth_range = sel_ctx->manual_depth_range;
969bf215546Sopenharmony_ci      key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled &&
970bf215546Sopenharmony_ci         sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable;
971bf215546Sopenharmony_ci      key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
972bf215546Sopenharmony_ci         !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
973bf215546Sopenharmony_ci      if (sel_ctx->ctx->gfx_pipeline_state.blend &&
974bf215546Sopenharmony_ci          sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
975bf215546Sopenharmony_ci          !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
976bf215546Sopenharmony_ci         key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
977bf215546Sopenharmony_ci         key->fs.cast_to_int = !key->fs.cast_to_uint;
978bf215546Sopenharmony_ci      }
979bf215546Sopenharmony_ci   } else if (stage == PIPE_SHADER_TESS_CTRL) {
980bf215546Sopenharmony_ci      if (next && next->current->nir->info.stage == MESA_SHADER_TESS_EVAL) {
981bf215546Sopenharmony_ci         key->hs.primitive_mode = next->current->nir->info.tess._primitive_mode;
982bf215546Sopenharmony_ci         key->hs.ccw = next->current->nir->info.tess.ccw;
983bf215546Sopenharmony_ci         key->hs.point_mode = next->current->nir->info.tess.point_mode;
984bf215546Sopenharmony_ci         key->hs.spacing = next->current->nir->info.tess.spacing;
985bf215546Sopenharmony_ci      } else {
986bf215546Sopenharmony_ci         key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
987bf215546Sopenharmony_ci         key->hs.ccw = true;
988bf215546Sopenharmony_ci         key->hs.point_mode = false;
989bf215546Sopenharmony_ci         key->hs.spacing = TESS_SPACING_EQUAL;
990bf215546Sopenharmony_ci      }
991bf215546Sopenharmony_ci      key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
992bf215546Sopenharmony_ci   } else if (stage == PIPE_SHADER_TESS_EVAL) {
993bf215546Sopenharmony_ci      if (prev && prev->current->nir->info.stage == MESA_SHADER_TESS_CTRL)
994bf215546Sopenharmony_ci         key->ds.tcs_vertices_out = prev->current->nir->info.tess.tcs_vertices_out;
995bf215546Sopenharmony_ci      else
996bf215546Sopenharmony_ci         key->ds.tcs_vertices_out = 32;
997bf215546Sopenharmony_ci   }
998bf215546Sopenharmony_ci
999bf215546Sopenharmony_ci   if (sel->samples_int_textures) {
1000bf215546Sopenharmony_ci      key->samples_int_textures = sel->samples_int_textures;
1001bf215546Sopenharmony_ci      key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1002bf215546Sopenharmony_ci      /* Copy only states with integer textures */
1003bf215546Sopenharmony_ci      for(int i = 0; i < key->n_texture_states; ++i) {
1004bf215546Sopenharmony_ci         auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
1005bf215546Sopenharmony_ci         if (wrap_state.is_int_sampler) {
1006bf215546Sopenharmony_ci            memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
1007bf215546Sopenharmony_ci            key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
1008bf215546Sopenharmony_ci         }
1009bf215546Sopenharmony_ci      }
1010bf215546Sopenharmony_ci   }
1011bf215546Sopenharmony_ci
1012bf215546Sopenharmony_ci   for (unsigned i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) {
1013bf215546Sopenharmony_ci      if (!sel_ctx->ctx->samplers[stage][i] ||
1014bf215546Sopenharmony_ci          sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
1015bf215546Sopenharmony_ci         continue;
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_ci      if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
1018bf215546Sopenharmony_ci         key->tex_saturate_r |= 1 << i;
1019bf215546Sopenharmony_ci      if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
1020bf215546Sopenharmony_ci         key->tex_saturate_s |= 1 << i;
1021bf215546Sopenharmony_ci      if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
1022bf215546Sopenharmony_ci         key->tex_saturate_t |= 1 << i;
1023bf215546Sopenharmony_ci   }
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci   if (sel->compare_with_lod_bias_grad) {
1026bf215546Sopenharmony_ci      key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1027bf215546Sopenharmony_ci      memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
1028bf215546Sopenharmony_ci             key->n_texture_states * sizeof(enum compare_func));
1029bf215546Sopenharmony_ci      memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
1030bf215546Sopenharmony_ci             key->n_texture_states * sizeof(dxil_texture_swizzle_state));
1031bf215546Sopenharmony_ci   }
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci   if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
1034bf215546Sopenharmony_ci      key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
1035bf215546Sopenharmony_ci      if (key->vs.needs_format_emulation) {
1036bf215546Sopenharmony_ci         memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
1037bf215546Sopenharmony_ci                sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format));
1038bf215546Sopenharmony_ci      }
1039bf215546Sopenharmony_ci   }
1040bf215546Sopenharmony_ci
1041bf215546Sopenharmony_ci   if (stage == PIPE_SHADER_FRAGMENT &&
1042bf215546Sopenharmony_ci       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
1043bf215546Sopenharmony_ci       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
1044bf215546Sopenharmony_ci       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
1045bf215546Sopenharmony_ci      key->fs.remap_front_facing = 1;
1046bf215546Sopenharmony_ci   }
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_ci   if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
1049bf215546Sopenharmony_ci      memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
1050bf215546Sopenharmony_ci   }
1051bf215546Sopenharmony_ci
1052bf215546Sopenharmony_ci   key->n_images = sel_ctx->ctx->num_image_views[stage];
1053bf215546Sopenharmony_ci   for (int i = 0; i < key->n_images; ++i) {
1054bf215546Sopenharmony_ci      key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
1055bf215546Sopenharmony_ci      if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
1056bf215546Sopenharmony_ci         key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
1057bf215546Sopenharmony_ci   }
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_ci   key->hash = d3d12_shader_key_hash(key);
1060bf215546Sopenharmony_ci}
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_cistatic void
1063bf215546Sopenharmony_ciselect_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
1064bf215546Sopenharmony_ci                     d3d12_shader_selector *prev, d3d12_shader_selector *next)
1065bf215546Sopenharmony_ci{
1066bf215546Sopenharmony_ci   struct d3d12_context *ctx = sel_ctx->ctx;
1067bf215546Sopenharmony_ci   d3d12_shader_key key;
1068bf215546Sopenharmony_ci   nir_shader *new_nir_variant;
1069bf215546Sopenharmony_ci   unsigned pstipple_binding = UINT32_MAX;
1070bf215546Sopenharmony_ci
1071bf215546Sopenharmony_ci   d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci   /* Check for an existing variant */
1074bf215546Sopenharmony_ci   for (d3d12_shader *variant = sel->first; variant;
1075bf215546Sopenharmony_ci        variant = variant->next_variant) {
1076bf215546Sopenharmony_ci
1077bf215546Sopenharmony_ci      if (d3d12_compare_shader_keys(&key, &variant->key)) {
1078bf215546Sopenharmony_ci         sel->current = variant;
1079bf215546Sopenharmony_ci         return;
1080bf215546Sopenharmony_ci      }
1081bf215546Sopenharmony_ci   }
1082bf215546Sopenharmony_ci
1083bf215546Sopenharmony_ci   /* Clone the NIR shader */
1084bf215546Sopenharmony_ci   new_nir_variant = nir_shader_clone(sel, sel->initial);
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci   /* Apply any needed lowering passes */
1087bf215546Sopenharmony_ci   if (key.gs.writes_psize) {
1088bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1089bf215546Sopenharmony_ci                 !key.gs.sprite_origin_upper_left,
1090bf215546Sopenharmony_ci                 key.gs.point_size_per_vertex,
1091bf215546Sopenharmony_ci                 key.gs.sprite_coord_enable,
1092bf215546Sopenharmony_ci                 key.next_varying_inputs);
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1095bf215546Sopenharmony_ci      nir_shader_gather_info(new_nir_variant, impl);
1096bf215546Sopenharmony_ci   }
1097bf215546Sopenharmony_ci
1098bf215546Sopenharmony_ci   if (key.gs.primitive_id) {
1099bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1100bf215546Sopenharmony_ci
1101bf215546Sopenharmony_ci      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1102bf215546Sopenharmony_ci      nir_shader_gather_info(new_nir_variant, impl);
1103bf215546Sopenharmony_ci   }
1104bf215546Sopenharmony_ci
1105bf215546Sopenharmony_ci   if (key.gs.triangle_strip)
1106bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1107bf215546Sopenharmony_ci
1108bf215546Sopenharmony_ci   if (key.fs.polygon_stipple) {
1109bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1110bf215546Sopenharmony_ci                 &pstipple_binding, 0, false);
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1113bf215546Sopenharmony_ci      nir_shader_gather_info(new_nir_variant, impl);
1114bf215546Sopenharmony_ci   }
1115bf215546Sopenharmony_ci
1116bf215546Sopenharmony_ci   if (key.fs.remap_front_facing) {
1117bf215546Sopenharmony_ci      d3d12_forward_front_face(new_nir_variant);
1118bf215546Sopenharmony_ci
1119bf215546Sopenharmony_ci      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1120bf215546Sopenharmony_ci      nir_shader_gather_info(new_nir_variant, impl);
1121bf215546Sopenharmony_ci   }
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci   if (key.fs.missing_dual_src_outputs) {
1124bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1125bf215546Sopenharmony_ci                 key.fs.missing_dual_src_outputs);
1126bf215546Sopenharmony_ci   } else if (key.fs.frag_result_color_lowering) {
1127bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1128bf215546Sopenharmony_ci                 key.fs.frag_result_color_lowering);
1129bf215546Sopenharmony_ci   }
1130bf215546Sopenharmony_ci
1131bf215546Sopenharmony_ci   if (key.fs.manual_depth_range)
1132bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1133bf215546Sopenharmony_ci
1134bf215546Sopenharmony_ci   if (sel->compare_with_lod_bias_grad) {
1135bf215546Sopenharmony_ci      STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1136bf215546Sopenharmony_ci                    sizeof(nir_lower_tex_shadow_swizzle));
1137bf215546Sopenharmony_ci
1138bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, nir_lower_tex_shadow, key.n_texture_states,
1139bf215546Sopenharmony_ci                 key.sampler_compare_funcs, (nir_lower_tex_shadow_swizzle *)key.swizzle_state);
1140bf215546Sopenharmony_ci   }
1141bf215546Sopenharmony_ci
1142bf215546Sopenharmony_ci   if (key.fs.cast_to_uint)
1143bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1144bf215546Sopenharmony_ci   if (key.fs.cast_to_int)
1145bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1146bf215546Sopenharmony_ci
1147bf215546Sopenharmony_ci   if (key.n_images)
1148bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, key.image_format_conversion);
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_ci   if (sel->workgroup_size_variable) {
1151bf215546Sopenharmony_ci      new_nir_variant->info.workgroup_size[0] = key.cs.workgroup_size[0];
1152bf215546Sopenharmony_ci      new_nir_variant->info.workgroup_size[1] = key.cs.workgroup_size[1];
1153bf215546Sopenharmony_ci      new_nir_variant->info.workgroup_size[2] = key.cs.workgroup_size[2];
1154bf215546Sopenharmony_ci   }
1155bf215546Sopenharmony_ci
1156bf215546Sopenharmony_ci   if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1157bf215546Sopenharmony_ci      new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1158bf215546Sopenharmony_ci      new_nir_variant->info.tess.ccw = key.hs.ccw;
1159bf215546Sopenharmony_ci      new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1160bf215546Sopenharmony_ci      new_nir_variant->info.tess.spacing = key.hs.spacing;
1161bf215546Sopenharmony_ci
1162bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1163bf215546Sopenharmony_ci   } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1164bf215546Sopenharmony_ci      new_nir_variant->info.tess.tcs_vertices_out = key.ds.tcs_vertices_out;
1165bf215546Sopenharmony_ci   }
1166bf215546Sopenharmony_ci
1167bf215546Sopenharmony_ci   {
1168bf215546Sopenharmony_ci      struct nir_lower_tex_options tex_options = { };
1169bf215546Sopenharmony_ci      tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1170bf215546Sopenharmony_ci      tex_options.lower_rect = true;
1171bf215546Sopenharmony_ci      tex_options.lower_rect_offset = true;
1172bf215546Sopenharmony_ci      tex_options.saturate_s = key.tex_saturate_s;
1173bf215546Sopenharmony_ci      tex_options.saturate_r = key.tex_saturate_r;
1174bf215546Sopenharmony_ci      tex_options.saturate_t = key.tex_saturate_t;
1175bf215546Sopenharmony_ci      tex_options.lower_invalid_implicit_lod = true;
1176bf215546Sopenharmony_ci      tex_options.lower_tg4_offsets = true;
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_ci      NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1179bf215546Sopenharmony_ci   }
1180bf215546Sopenharmony_ci
1181bf215546Sopenharmony_ci   /* Add the needed in and outputs, and re-sort */
1182bf215546Sopenharmony_ci   if (prev) {
1183bf215546Sopenharmony_ci      uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read;
1184bf215546Sopenharmony_ci      new_nir_variant->info.inputs_read |= mask;
1185bf215546Sopenharmony_ci      while (mask) {
1186bf215546Sopenharmony_ci         int slot = u_bit_scan64(&mask);
1187bf215546Sopenharmony_ci         create_varyings_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in, false);
1188bf215546Sopenharmony_ci      }
1189bf215546Sopenharmony_ci
1190bf215546Sopenharmony_ci      if (sel->stage == PIPE_SHADER_TESS_EVAL) {
1191bf215546Sopenharmony_ci         uint32_t patch_mask = (uint32_t)key.ds.required_patch_inputs.mask & ~new_nir_variant->info.patch_inputs_read;
1192bf215546Sopenharmony_ci         new_nir_variant->info.patch_inputs_read |= patch_mask;
1193bf215546Sopenharmony_ci         while (patch_mask) {
1194bf215546Sopenharmony_ci            int slot = u_bit_scan(&patch_mask);
1195bf215546Sopenharmony_ci            create_varyings_from_info(new_nir_variant, &key.ds.required_patch_inputs, slot, nir_var_shader_in, true);
1196bf215546Sopenharmony_ci         }
1197bf215546Sopenharmony_ci      }
1198bf215546Sopenharmony_ci      dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
1199bf215546Sopenharmony_ci                                      key.prev_varying_outputs);
1200bf215546Sopenharmony_ci   }
1201bf215546Sopenharmony_ci
1202bf215546Sopenharmony_ci
1203bf215546Sopenharmony_ci   if (next) {
1204bf215546Sopenharmony_ci      uint64_t mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written;
1205bf215546Sopenharmony_ci      new_nir_variant->info.outputs_written |= mask;
1206bf215546Sopenharmony_ci      while (mask) {
1207bf215546Sopenharmony_ci         int slot = u_bit_scan64(&mask);
1208bf215546Sopenharmony_ci         create_varyings_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out, false);
1209bf215546Sopenharmony_ci      }
1210bf215546Sopenharmony_ci
1211bf215546Sopenharmony_ci      if (sel->stage == PIPE_SHADER_TESS_CTRL) {
1212bf215546Sopenharmony_ci         uint32_t patch_mask = (uint32_t)key.hs.required_patch_outputs.mask & ~new_nir_variant->info.patch_outputs_written;
1213bf215546Sopenharmony_ci         new_nir_variant->info.patch_outputs_written |= patch_mask;
1214bf215546Sopenharmony_ci         while (patch_mask) {
1215bf215546Sopenharmony_ci            int slot = u_bit_scan(&patch_mask);
1216bf215546Sopenharmony_ci            create_varyings_from_info(new_nir_variant, &key.ds.required_patch_inputs, slot, nir_var_shader_out, true);
1217bf215546Sopenharmony_ci         }
1218bf215546Sopenharmony_ci      }
1219bf215546Sopenharmony_ci      dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
1220bf215546Sopenharmony_ci                                     key.next_varying_inputs);
1221bf215546Sopenharmony_ci   }
1222bf215546Sopenharmony_ci
1223bf215546Sopenharmony_ci   d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1224bf215546Sopenharmony_ci   assert(new_variant);
1225bf215546Sopenharmony_ci
1226bf215546Sopenharmony_ci   /* keep track of polygon stipple texture binding */
1227bf215546Sopenharmony_ci   new_variant->pstipple_binding = pstipple_binding;
1228bf215546Sopenharmony_ci
1229bf215546Sopenharmony_ci   /* prepend the new shader in the selector chain and pick it */
1230bf215546Sopenharmony_ci   new_variant->next_variant = sel->first;
1231bf215546Sopenharmony_ci   sel->current = sel->first = new_variant;
1232bf215546Sopenharmony_ci}
1233bf215546Sopenharmony_ci
1234bf215546Sopenharmony_cistatic d3d12_shader_selector *
1235bf215546Sopenharmony_ciget_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1236bf215546Sopenharmony_ci{
1237bf215546Sopenharmony_ci   switch (current) {
1238bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:
1239bf215546Sopenharmony_ci      return NULL;
1240bf215546Sopenharmony_ci   case PIPE_SHADER_FRAGMENT:
1241bf215546Sopenharmony_ci      if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1242bf215546Sopenharmony_ci         return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1243bf215546Sopenharmony_ci      FALLTHROUGH;
1244bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY:
1245bf215546Sopenharmony_ci      if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1246bf215546Sopenharmony_ci         return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1247bf215546Sopenharmony_ci      FALLTHROUGH;
1248bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_EVAL:
1249bf215546Sopenharmony_ci      if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1250bf215546Sopenharmony_ci         return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1251bf215546Sopenharmony_ci      FALLTHROUGH;
1252bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_CTRL:
1253bf215546Sopenharmony_ci      return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1254bf215546Sopenharmony_ci   default:
1255bf215546Sopenharmony_ci      unreachable("shader type not supported");
1256bf215546Sopenharmony_ci   }
1257bf215546Sopenharmony_ci}
1258bf215546Sopenharmony_ci
1259bf215546Sopenharmony_cistatic d3d12_shader_selector *
1260bf215546Sopenharmony_ciget_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1261bf215546Sopenharmony_ci{
1262bf215546Sopenharmony_ci   switch (current) {
1263bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:
1264bf215546Sopenharmony_ci      if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1265bf215546Sopenharmony_ci         return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1266bf215546Sopenharmony_ci      FALLTHROUGH;
1267bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_CTRL:
1268bf215546Sopenharmony_ci      if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1269bf215546Sopenharmony_ci         return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1270bf215546Sopenharmony_ci      FALLTHROUGH;
1271bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_EVAL:
1272bf215546Sopenharmony_ci      if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1273bf215546Sopenharmony_ci         return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1274bf215546Sopenharmony_ci      FALLTHROUGH;
1275bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY:
1276bf215546Sopenharmony_ci      return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1277bf215546Sopenharmony_ci   case PIPE_SHADER_FRAGMENT:
1278bf215546Sopenharmony_ci      return NULL;
1279bf215546Sopenharmony_ci   default:
1280bf215546Sopenharmony_ci      unreachable("shader type not supported");
1281bf215546Sopenharmony_ci   }
1282bf215546Sopenharmony_ci}
1283bf215546Sopenharmony_ci
1284bf215546Sopenharmony_cienum tex_scan_flags {
1285bf215546Sopenharmony_ci   TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1286bf215546Sopenharmony_ci   TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1287bf215546Sopenharmony_ci   TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
1288bf215546Sopenharmony_ci};
1289bf215546Sopenharmony_ci
1290bf215546Sopenharmony_cistatic unsigned
1291bf215546Sopenharmony_ciscan_texture_use(nir_shader *nir)
1292bf215546Sopenharmony_ci{
1293bf215546Sopenharmony_ci   unsigned result = 0;
1294bf215546Sopenharmony_ci   nir_foreach_function(func, nir) {
1295bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
1296bf215546Sopenharmony_ci         nir_foreach_instr(instr, block) {
1297bf215546Sopenharmony_ci            if (instr->type == nir_instr_type_tex) {
1298bf215546Sopenharmony_ci               auto tex = nir_instr_as_tex(instr);
1299bf215546Sopenharmony_ci               switch (tex->op) {
1300bf215546Sopenharmony_ci               case nir_texop_txb:
1301bf215546Sopenharmony_ci               case nir_texop_txl:
1302bf215546Sopenharmony_ci               case nir_texop_txd:
1303bf215546Sopenharmony_ci                  if (tex->is_shadow)
1304bf215546Sopenharmony_ci                     result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1305bf215546Sopenharmony_ci                  FALLTHROUGH;
1306bf215546Sopenharmony_ci               case nir_texop_tex:
1307bf215546Sopenharmony_ci                  if (tex->dest_type & (nir_type_int | nir_type_uint))
1308bf215546Sopenharmony_ci                     result |= TEX_SAMPLE_INTEGER_TEXTURE;
1309bf215546Sopenharmony_ci               default:
1310bf215546Sopenharmony_ci                  ;
1311bf215546Sopenharmony_ci               }
1312bf215546Sopenharmony_ci            }
1313bf215546Sopenharmony_ci            if (TEX_SCAN_ALL_FLAGS == result)
1314bf215546Sopenharmony_ci               return result;
1315bf215546Sopenharmony_ci         }
1316bf215546Sopenharmony_ci      }
1317bf215546Sopenharmony_ci   }
1318bf215546Sopenharmony_ci   return result;
1319bf215546Sopenharmony_ci}
1320bf215546Sopenharmony_ci
1321bf215546Sopenharmony_cistatic uint64_t
1322bf215546Sopenharmony_ciupdate_so_info(struct pipe_stream_output_info *so_info,
1323bf215546Sopenharmony_ci               uint64_t outputs_written)
1324bf215546Sopenharmony_ci{
1325bf215546Sopenharmony_ci   uint64_t so_outputs = 0;
1326bf215546Sopenharmony_ci   uint8_t reverse_map[64] = {0};
1327bf215546Sopenharmony_ci   unsigned slot = 0;
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_ci   while (outputs_written)
1330bf215546Sopenharmony_ci      reverse_map[slot++] = u_bit_scan64(&outputs_written);
1331bf215546Sopenharmony_ci
1332bf215546Sopenharmony_ci   for (unsigned i = 0; i < so_info->num_outputs; i++) {
1333bf215546Sopenharmony_ci      struct pipe_stream_output *output = &so_info->output[i];
1334bf215546Sopenharmony_ci
1335bf215546Sopenharmony_ci      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1336bf215546Sopenharmony_ci      output->register_index = reverse_map[output->register_index];
1337bf215546Sopenharmony_ci
1338bf215546Sopenharmony_ci      so_outputs |= 1ull << output->register_index;
1339bf215546Sopenharmony_ci   }
1340bf215546Sopenharmony_ci
1341bf215546Sopenharmony_ci   return so_outputs;
1342bf215546Sopenharmony_ci}
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_cistatic struct d3d12_shader_selector *
1345bf215546Sopenharmony_cid3d12_create_shader_impl(struct d3d12_context *ctx,
1346bf215546Sopenharmony_ci                         struct d3d12_shader_selector *sel,
1347bf215546Sopenharmony_ci                         struct nir_shader *nir,
1348bf215546Sopenharmony_ci                         struct d3d12_shader_selector *prev,
1349bf215546Sopenharmony_ci                         struct d3d12_shader_selector *next)
1350bf215546Sopenharmony_ci{
1351bf215546Sopenharmony_ci   unsigned tex_scan_result = scan_texture_use(nir);
1352bf215546Sopenharmony_ci   sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1353bf215546Sopenharmony_ci   sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1354bf215546Sopenharmony_ci   sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1355bf215546Sopenharmony_ci
1356bf215546Sopenharmony_ci   /* Integer cube maps are not supported in DirectX because sampling is not supported
1357bf215546Sopenharmony_ci    * on integer textures and TextureLoad is not supported for cube maps, so we have to
1358bf215546Sopenharmony_ci    * lower integer cube maps to be handled like 2D textures arrays*/
1359bf215546Sopenharmony_ci   NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true);
1360bf215546Sopenharmony_ci
1361bf215546Sopenharmony_ci   /* Keep this initial shader as the blue print for possible variants */
1362bf215546Sopenharmony_ci   sel->initial = nir;
1363bf215546Sopenharmony_ci
1364bf215546Sopenharmony_ci   /*
1365bf215546Sopenharmony_ci    * We must compile some shader here, because if the previous or a next shaders exists later
1366bf215546Sopenharmony_ci    * when the shaders are bound, then the key evaluation in the shader selector will access
1367bf215546Sopenharmony_ci    * the current variant of these  prev and next shader, and we can only assign
1368bf215546Sopenharmony_ci    * a current variant when it has been successfully compiled.
1369bf215546Sopenharmony_ci    *
1370bf215546Sopenharmony_ci    * For shaders that require lowering because certain instructions are not available
1371bf215546Sopenharmony_ci    * and their emulation is state depended (like sampling an integer texture that must be
1372bf215546Sopenharmony_ci    * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
1373bf215546Sopenharmony_ci    * we must go through the shader selector here to create a compilable variant.
1374bf215546Sopenharmony_ci    * For shaders that are not depended on the state this is just compiling the original
1375bf215546Sopenharmony_ci    * shader.
1376bf215546Sopenharmony_ci    *
1377bf215546Sopenharmony_ci    * TODO: get rid of having to compiling the shader here if it can be forseen that it will
1378bf215546Sopenharmony_ci    * be thrown away (i.e. it depends on states that are likely to change before the shader is
1379bf215546Sopenharmony_ci    * used for the first time)
1380bf215546Sopenharmony_ci    */
1381bf215546Sopenharmony_ci   struct d3d12_selection_context sel_ctx = {0};
1382bf215546Sopenharmony_ci   sel_ctx.ctx = ctx;
1383bf215546Sopenharmony_ci   select_shader_variant(&sel_ctx, sel, prev, next);
1384bf215546Sopenharmony_ci
1385bf215546Sopenharmony_ci   if (!sel->current) {
1386bf215546Sopenharmony_ci      ralloc_free(sel);
1387bf215546Sopenharmony_ci      return NULL;
1388bf215546Sopenharmony_ci   }
1389bf215546Sopenharmony_ci
1390bf215546Sopenharmony_ci   return sel;
1391bf215546Sopenharmony_ci}
1392bf215546Sopenharmony_ci
1393bf215546Sopenharmony_cistruct d3d12_shader_selector *
1394bf215546Sopenharmony_cid3d12_create_shader(struct d3d12_context *ctx,
1395bf215546Sopenharmony_ci                    pipe_shader_type stage,
1396bf215546Sopenharmony_ci                    const struct pipe_shader_state *shader)
1397bf215546Sopenharmony_ci{
1398bf215546Sopenharmony_ci   struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1399bf215546Sopenharmony_ci   sel->stage = stage;
1400bf215546Sopenharmony_ci
1401bf215546Sopenharmony_ci   struct nir_shader *nir = NULL;
1402bf215546Sopenharmony_ci
1403bf215546Sopenharmony_ci   if (shader->type == PIPE_SHADER_IR_NIR) {
1404bf215546Sopenharmony_ci      nir = (nir_shader *)shader->ir.nir;
1405bf215546Sopenharmony_ci   } else {
1406bf215546Sopenharmony_ci      assert(shader->type == PIPE_SHADER_IR_TGSI);
1407bf215546Sopenharmony_ci      nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1408bf215546Sopenharmony_ci   }
1409bf215546Sopenharmony_ci
1410bf215546Sopenharmony_ci   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1411bf215546Sopenharmony_ci   memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1412bf215546Sopenharmony_ci   update_so_info(&sel->so_info, nir->info.outputs_written);
1413bf215546Sopenharmony_ci
1414bf215546Sopenharmony_ci   assert(nir != NULL);
1415bf215546Sopenharmony_ci   d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1416bf215546Sopenharmony_ci   d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1417bf215546Sopenharmony_ci
1418bf215546Sopenharmony_ci   NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1419bf215546Sopenharmony_ci   NIR_PASS_V(nir, d3d12_split_multistream_varyings);
1420bf215546Sopenharmony_ci
1421bf215546Sopenharmony_ci   if (nir->info.stage != MESA_SHADER_VERTEX)
1422bf215546Sopenharmony_ci      nir->info.inputs_read =
1423bf215546Sopenharmony_ci            dxil_reassign_driver_locations(nir, nir_var_shader_in,
1424bf215546Sopenharmony_ci                                            prev ? prev->current->nir->info.outputs_written : 0);
1425bf215546Sopenharmony_ci   else
1426bf215546Sopenharmony_ci      nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
1427bf215546Sopenharmony_ci
1428bf215546Sopenharmony_ci   if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1429bf215546Sopenharmony_ci      nir->info.outputs_written =
1430bf215546Sopenharmony_ci            dxil_reassign_driver_locations(nir, nir_var_shader_out,
1431bf215546Sopenharmony_ci                                            next ? next->current->nir->info.inputs_read : 0);
1432bf215546Sopenharmony_ci   } else {
1433bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1434bf215546Sopenharmony_ci      NIR_PASS_V(nir, d3d12_lower_sample_pos);
1435bf215546Sopenharmony_ci      dxil_sort_ps_outputs(nir);
1436bf215546Sopenharmony_ci   }
1437bf215546Sopenharmony_ci
1438bf215546Sopenharmony_ci   return d3d12_create_shader_impl(ctx, sel, nir, prev, next);
1439bf215546Sopenharmony_ci}
1440bf215546Sopenharmony_ci
1441bf215546Sopenharmony_cistruct d3d12_shader_selector *
1442bf215546Sopenharmony_cid3d12_create_compute_shader(struct d3d12_context *ctx,
1443bf215546Sopenharmony_ci                            const struct pipe_compute_state *shader)
1444bf215546Sopenharmony_ci{
1445bf215546Sopenharmony_ci   struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1446bf215546Sopenharmony_ci   sel->stage = PIPE_SHADER_COMPUTE;
1447bf215546Sopenharmony_ci
1448bf215546Sopenharmony_ci   struct nir_shader *nir = NULL;
1449bf215546Sopenharmony_ci
1450bf215546Sopenharmony_ci   if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1451bf215546Sopenharmony_ci      nir = (nir_shader *)shader->prog;
1452bf215546Sopenharmony_ci   } else {
1453bf215546Sopenharmony_ci      assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1454bf215546Sopenharmony_ci      nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1455bf215546Sopenharmony_ci   }
1456bf215546Sopenharmony_ci
1457bf215546Sopenharmony_ci   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1458bf215546Sopenharmony_ci
1459bf215546Sopenharmony_ci   NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1460bf215546Sopenharmony_ci
1461bf215546Sopenharmony_ci   return d3d12_create_shader_impl(ctx, sel, nir, nullptr, nullptr);
1462bf215546Sopenharmony_ci}
1463bf215546Sopenharmony_ci
1464bf215546Sopenharmony_civoid
1465bf215546Sopenharmony_cid3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1466bf215546Sopenharmony_ci{
1467bf215546Sopenharmony_ci   static unsigned order[] = {
1468bf215546Sopenharmony_ci      PIPE_SHADER_VERTEX,
1469bf215546Sopenharmony_ci      PIPE_SHADER_TESS_CTRL,
1470bf215546Sopenharmony_ci      PIPE_SHADER_TESS_EVAL,
1471bf215546Sopenharmony_ci      PIPE_SHADER_GEOMETRY,
1472bf215546Sopenharmony_ci      PIPE_SHADER_FRAGMENT
1473bf215546Sopenharmony_ci   };
1474bf215546Sopenharmony_ci   struct d3d12_selection_context sel_ctx;
1475bf215546Sopenharmony_ci
1476bf215546Sopenharmony_ci   sel_ctx.ctx = ctx;
1477bf215546Sopenharmony_ci   sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1478bf215546Sopenharmony_ci   sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1479bf215546Sopenharmony_ci   sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1480bf215546Sopenharmony_ci   sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1481bf215546Sopenharmony_ci   sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1482bf215546Sopenharmony_ci   sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx);
1483bf215546Sopenharmony_ci   sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1484bf215546Sopenharmony_ci   sel_ctx.manual_depth_range = manual_depth_range(ctx);
1485bf215546Sopenharmony_ci
1486bf215546Sopenharmony_ci   validate_geometry_shader_variant(&sel_ctx);
1487bf215546Sopenharmony_ci   validate_tess_ctrl_shader_variant(&sel_ctx);
1488bf215546Sopenharmony_ci
1489bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(order); ++i) {
1490bf215546Sopenharmony_ci      auto sel = ctx->gfx_stages[order[i]];
1491bf215546Sopenharmony_ci      if (!sel)
1492bf215546Sopenharmony_ci         continue;
1493bf215546Sopenharmony_ci
1494bf215546Sopenharmony_ci      d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1495bf215546Sopenharmony_ci      d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1496bf215546Sopenharmony_ci
1497bf215546Sopenharmony_ci      select_shader_variant(&sel_ctx, sel, prev, next);
1498bf215546Sopenharmony_ci   }
1499bf215546Sopenharmony_ci}
1500bf215546Sopenharmony_ci
1501bf215546Sopenharmony_cistatic const unsigned *
1502bf215546Sopenharmony_ciworkgroup_size_variable(struct d3d12_context *ctx,
1503bf215546Sopenharmony_ci                        const struct pipe_grid_info *info)
1504bf215546Sopenharmony_ci{
1505bf215546Sopenharmony_ci   if (ctx->compute_state->workgroup_size_variable)
1506bf215546Sopenharmony_ci      return info->block;
1507bf215546Sopenharmony_ci   return nullptr;
1508bf215546Sopenharmony_ci}
1509bf215546Sopenharmony_ci
1510bf215546Sopenharmony_civoid
1511bf215546Sopenharmony_cid3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1512bf215546Sopenharmony_ci{
1513bf215546Sopenharmony_ci   struct d3d12_selection_context sel_ctx = {};
1514bf215546Sopenharmony_ci
1515bf215546Sopenharmony_ci   sel_ctx.ctx = ctx;
1516bf215546Sopenharmony_ci   sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1517bf215546Sopenharmony_ci
1518bf215546Sopenharmony_ci   select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1519bf215546Sopenharmony_ci}
1520bf215546Sopenharmony_ci
1521bf215546Sopenharmony_civoid
1522bf215546Sopenharmony_cid3d12_shader_free(struct d3d12_shader_selector *sel)
1523bf215546Sopenharmony_ci{
1524bf215546Sopenharmony_ci   auto shader = sel->first;
1525bf215546Sopenharmony_ci   while (shader) {
1526bf215546Sopenharmony_ci      free(shader->bytecode);
1527bf215546Sopenharmony_ci      shader = shader->next_variant;
1528bf215546Sopenharmony_ci   }
1529bf215546Sopenharmony_ci   ralloc_free(sel->initial);
1530bf215546Sopenharmony_ci   ralloc_free(sel);
1531bf215546Sopenharmony_ci}
1532