1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2018 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir_opcodes.h"
25bf215546Sopenharmony_ci#include "zink_context.h"
26bf215546Sopenharmony_ci#include "zink_compiler.h"
27bf215546Sopenharmony_ci#include "zink_program.h"
28bf215546Sopenharmony_ci#include "zink_screen.h"
29bf215546Sopenharmony_ci#include "nir_to_spirv/nir_to_spirv.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "pipe/p_state.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#include "nir.h"
34bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include "nir/tgsi_to_nir.h"
37bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h"
38bf215546Sopenharmony_ci#include "tgsi/tgsi_from_mesa.h"
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci#include "util/u_memory.h"
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci#include "compiler/spirv/nir_spirv.h"
43bf215546Sopenharmony_ci#include "vulkan/util/vk_util.h"
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_cibool
46bf215546Sopenharmony_cizink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_cistatic void
49bf215546Sopenharmony_cicreate_vs_pushconst(nir_shader *nir)
50bf215546Sopenharmony_ci{
51bf215546Sopenharmony_ci   nir_variable *vs_pushconst;
52bf215546Sopenharmony_ci   /* create compatible layout for the ntv push constant loader */
53bf215546Sopenharmony_ci   struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
54bf215546Sopenharmony_ci   fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
55bf215546Sopenharmony_ci   fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
56bf215546Sopenharmony_ci   fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
57bf215546Sopenharmony_ci   fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
58bf215546Sopenharmony_ci   fields[1].name = ralloc_asprintf(nir, "draw_id");
59bf215546Sopenharmony_ci   fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
60bf215546Sopenharmony_ci   vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
61bf215546Sopenharmony_ci                                                 glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
62bf215546Sopenharmony_ci   vs_pushconst->data.location = INT_MAX; //doesn't really matter
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_cistatic void
66bf215546Sopenharmony_cicreate_cs_pushconst(nir_shader *nir)
67bf215546Sopenharmony_ci{
68bf215546Sopenharmony_ci   nir_variable *cs_pushconst;
69bf215546Sopenharmony_ci   /* create compatible layout for the ntv push constant loader */
70bf215546Sopenharmony_ci   struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
71bf215546Sopenharmony_ci   fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
72bf215546Sopenharmony_ci   fields[0].name = ralloc_asprintf(nir, "work_dim");
73bf215546Sopenharmony_ci   fields[0].offset = 0;
74bf215546Sopenharmony_ci   cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
75bf215546Sopenharmony_ci                                                 glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
76bf215546Sopenharmony_ci   cs_pushconst->data.location = INT_MAX; //doesn't really matter
77bf215546Sopenharmony_ci}
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_cistatic bool
80bf215546Sopenharmony_cireads_work_dim(nir_shader *shader)
81bf215546Sopenharmony_ci{
82bf215546Sopenharmony_ci   return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
83bf215546Sopenharmony_ci}
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_cistatic bool
86bf215546Sopenharmony_cilower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
87bf215546Sopenharmony_ci{
88bf215546Sopenharmony_ci   if (in->type != nir_instr_type_intrinsic)
89bf215546Sopenharmony_ci      return false;
90bf215546Sopenharmony_ci   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
91bf215546Sopenharmony_ci   if (instr->intrinsic != nir_intrinsic_load_work_dim)
92bf215546Sopenharmony_ci      return false;
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   if (instr->intrinsic == nir_intrinsic_load_work_dim) {
95bf215546Sopenharmony_ci      b->cursor = nir_after_instr(&instr->instr);
96bf215546Sopenharmony_ci      nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
97bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
98bf215546Sopenharmony_ci      nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
99bf215546Sopenharmony_ci      load->num_components = 1;
100bf215546Sopenharmony_ci      nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
101bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &load->instr);
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
104bf215546Sopenharmony_ci   }
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   return true;
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_cistatic bool
110bf215546Sopenharmony_cilower_work_dim(nir_shader *shader)
111bf215546Sopenharmony_ci{
112bf215546Sopenharmony_ci   if (shader->info.stage != MESA_SHADER_KERNEL)
113bf215546Sopenharmony_ci      return false;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   if (!reads_work_dim(shader))
116bf215546Sopenharmony_ci      return false;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
119bf215546Sopenharmony_ci}
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_cistatic bool
122bf215546Sopenharmony_cilower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
123bf215546Sopenharmony_ci{
124bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
125bf215546Sopenharmony_ci      return false;
126bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
127bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_deref)
128bf215546Sopenharmony_ci      return false;
129bf215546Sopenharmony_ci   nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
130bf215546Sopenharmony_ci   if (var->data.mode != nir_var_shader_in)
131bf215546Sopenharmony_ci      return false;
132bf215546Sopenharmony_ci   if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
133bf215546Sopenharmony_ci      return false;
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci   /* create second variable for the split */
136bf215546Sopenharmony_ci   nir_variable *var2 = nir_variable_clone(var, b->shader);
137bf215546Sopenharmony_ci   /* split new variable into second slot */
138bf215546Sopenharmony_ci   var2->data.driver_location++;
139bf215546Sopenharmony_ci   nir_shader_add_variable(b->shader, var2);
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci   unsigned total_num_components = glsl_get_vector_elements(var->type);
142bf215546Sopenharmony_ci   /* new variable is the second half of the dvec */
143bf215546Sopenharmony_ci   var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
144bf215546Sopenharmony_ci   /* clamp original variable to a dvec2 */
145bf215546Sopenharmony_ci   var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci   b->cursor = nir_after_instr(instr);
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci   /* this is the first load instruction for the first half of the dvec3/4 components */
150bf215546Sopenharmony_ci   nir_ssa_def *load = nir_load_var(b, var);
151bf215546Sopenharmony_ci   /* this is the second load instruction for the second half of the dvec3/4 components */
152bf215546Sopenharmony_ci   nir_ssa_def *load2 = nir_load_var(b, var2);
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   nir_ssa_def *def[4];
155bf215546Sopenharmony_ci   /* create a new dvec3/4 comprised of all the loaded components from both variables */
156bf215546Sopenharmony_ci   def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
157bf215546Sopenharmony_ci   def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
158bf215546Sopenharmony_ci   def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
159bf215546Sopenharmony_ci   if (total_num_components == 4)
160bf215546Sopenharmony_ci      def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
161bf215546Sopenharmony_ci   nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
162bf215546Sopenharmony_ci   /* use the assembled dvec3/4 for all other uses of the load */
163bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
164bf215546Sopenharmony_ci                                  new_vec->parent_instr);
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   /* remove the original instr and its deref chain */
167bf215546Sopenharmony_ci   nir_instr *parent = intr->src[0].ssa->parent_instr;
168bf215546Sopenharmony_ci   nir_instr_remove(instr);
169bf215546Sopenharmony_ci   nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   return true;
172bf215546Sopenharmony_ci}
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci/* mesa/gallium always provides UINT versions of 64bit formats:
175bf215546Sopenharmony_ci * - rewrite loads as 32bit vec loads
176bf215546Sopenharmony_ci * - cast back to 64bit
177bf215546Sopenharmony_ci */
178bf215546Sopenharmony_cistatic bool
179bf215546Sopenharmony_cilower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
180bf215546Sopenharmony_ci{
181bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
182bf215546Sopenharmony_ci      return false;
183bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
184bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_deref)
185bf215546Sopenharmony_ci      return false;
186bf215546Sopenharmony_ci   nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
187bf215546Sopenharmony_ci   if (var->data.mode != nir_var_shader_in)
188bf215546Sopenharmony_ci      return false;
189bf215546Sopenharmony_ci   if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
190bf215546Sopenharmony_ci      return false;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   unsigned num_components = glsl_get_vector_elements(var->type);
193bf215546Sopenharmony_ci   enum glsl_base_type base_type;
194bf215546Sopenharmony_ci   switch (glsl_get_base_type(var->type)) {
195bf215546Sopenharmony_ci   case GLSL_TYPE_UINT64:
196bf215546Sopenharmony_ci      base_type = GLSL_TYPE_UINT;
197bf215546Sopenharmony_ci      break;
198bf215546Sopenharmony_ci   case GLSL_TYPE_INT64:
199bf215546Sopenharmony_ci      base_type = GLSL_TYPE_INT;
200bf215546Sopenharmony_ci      break;
201bf215546Sopenharmony_ci   case GLSL_TYPE_DOUBLE:
202bf215546Sopenharmony_ci      base_type = GLSL_TYPE_FLOAT;
203bf215546Sopenharmony_ci      break;
204bf215546Sopenharmony_ci   default:
205bf215546Sopenharmony_ci      unreachable("unknown 64-bit vertex attribute format!");
206bf215546Sopenharmony_ci   }
207bf215546Sopenharmony_ci   var->type = glsl_vector_type(base_type, num_components * 2);
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   b->cursor = nir_after_instr(instr);
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci   nir_ssa_def *load = nir_load_var(b, var);
212bf215546Sopenharmony_ci   nir_ssa_def *casted[2];
213bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_components; i++)
214bf215546Sopenharmony_ci     casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
215bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   /* remove the original instr and its deref chain */
218bf215546Sopenharmony_ci   nir_instr *parent = intr->src[0].ssa->parent_instr;
219bf215546Sopenharmony_ci   nir_instr_remove(instr);
220bf215546Sopenharmony_ci   nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   return true;
223bf215546Sopenharmony_ci}
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci/* "64-bit three- and four-component vectors consume two consecutive locations."
226bf215546Sopenharmony_ci *  - 14.1.4. Location Assignment
227bf215546Sopenharmony_ci *
228bf215546Sopenharmony_ci * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
229bf215546Sopenharmony_ci * are assigned to consecutive locations, loaded separately, and then assembled back into a
230bf215546Sopenharmony_ci * composite value that's used in place of the original loaded ssa src
231bf215546Sopenharmony_ci */
232bf215546Sopenharmony_cistatic bool
233bf215546Sopenharmony_cilower_64bit_vertex_attribs(nir_shader *shader)
234bf215546Sopenharmony_ci{
235bf215546Sopenharmony_ci   if (shader->info.stage != MESA_SHADER_VERTEX)
236bf215546Sopenharmony_ci      return false;
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
239bf215546Sopenharmony_ci   progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
240bf215546Sopenharmony_ci   return progress;
241bf215546Sopenharmony_ci}
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_cistatic bool
244bf215546Sopenharmony_cilower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
245bf215546Sopenharmony_ci{
246bf215546Sopenharmony_ci   if (in->type != nir_instr_type_intrinsic)
247bf215546Sopenharmony_ci      return false;
248bf215546Sopenharmony_ci   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
249bf215546Sopenharmony_ci   if (instr->intrinsic != nir_intrinsic_load_base_vertex)
250bf215546Sopenharmony_ci      return false;
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   b->cursor = nir_after_instr(&instr->instr);
253bf215546Sopenharmony_ci   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
254bf215546Sopenharmony_ci   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
255bf215546Sopenharmony_ci   nir_intrinsic_set_range(load, 4);
256bf215546Sopenharmony_ci   load->num_components = 1;
257bf215546Sopenharmony_ci   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
258bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &load->instr);
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci   nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
261bf215546Sopenharmony_ci                                          nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
262bf215546Sopenharmony_ci                                          &instr->dest.ssa,
263bf215546Sopenharmony_ci                                          nir_imm_int(b, 0),
264bf215546Sopenharmony_ci                                          NULL);
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
267bf215546Sopenharmony_ci                                  composite->parent_instr);
268bf215546Sopenharmony_ci   return true;
269bf215546Sopenharmony_ci}
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_cistatic bool
272bf215546Sopenharmony_cilower_basevertex(nir_shader *shader)
273bf215546Sopenharmony_ci{
274bf215546Sopenharmony_ci   if (shader->info.stage != MESA_SHADER_VERTEX)
275bf215546Sopenharmony_ci      return false;
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci   if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
278bf215546Sopenharmony_ci      return false;
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
281bf215546Sopenharmony_ci}
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_cistatic bool
285bf215546Sopenharmony_cilower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
286bf215546Sopenharmony_ci{
287bf215546Sopenharmony_ci   if (in->type != nir_instr_type_intrinsic)
288bf215546Sopenharmony_ci      return false;
289bf215546Sopenharmony_ci   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
290bf215546Sopenharmony_ci   if (instr->intrinsic != nir_intrinsic_load_draw_id)
291bf215546Sopenharmony_ci      return false;
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&instr->instr);
294bf215546Sopenharmony_ci   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
295bf215546Sopenharmony_ci   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
296bf215546Sopenharmony_ci   nir_intrinsic_set_range(load, 4);
297bf215546Sopenharmony_ci   load->num_components = 1;
298bf215546Sopenharmony_ci   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
299bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &load->instr);
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci   return true;
304bf215546Sopenharmony_ci}
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_cistatic bool
307bf215546Sopenharmony_cilower_drawid(nir_shader *shader)
308bf215546Sopenharmony_ci{
309bf215546Sopenharmony_ci   if (shader->info.stage != MESA_SHADER_VERTEX)
310bf215546Sopenharmony_ci      return false;
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
313bf215546Sopenharmony_ci      return false;
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
316bf215546Sopenharmony_ci}
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_cistatic bool
319bf215546Sopenharmony_cilower_dual_blend(nir_shader *shader)
320bf215546Sopenharmony_ci{
321bf215546Sopenharmony_ci   bool progress = false;
322bf215546Sopenharmony_ci   nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
323bf215546Sopenharmony_ci   if (var) {
324bf215546Sopenharmony_ci      var->data.location = FRAG_RESULT_DATA0;
325bf215546Sopenharmony_ci      var->data.index = 1;
326bf215546Sopenharmony_ci      progress = true;
327bf215546Sopenharmony_ci   }
328bf215546Sopenharmony_ci   nir_shader_preserve_all_metadata(shader);
329bf215546Sopenharmony_ci   return progress;
330bf215546Sopenharmony_ci}
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_civoid
333bf215546Sopenharmony_cizink_screen_init_compiler(struct zink_screen *screen)
334bf215546Sopenharmony_ci{
335bf215546Sopenharmony_ci   static const struct nir_shader_compiler_options
336bf215546Sopenharmony_ci   default_options = {
337bf215546Sopenharmony_ci      .lower_ffma16 = true,
338bf215546Sopenharmony_ci      .lower_ffma32 = true,
339bf215546Sopenharmony_ci      .lower_ffma64 = true,
340bf215546Sopenharmony_ci      .lower_scmp = true,
341bf215546Sopenharmony_ci      .lower_fdph = true,
342bf215546Sopenharmony_ci      .lower_flrp32 = true,
343bf215546Sopenharmony_ci      .lower_fpow = true,
344bf215546Sopenharmony_ci      .lower_fsat = true,
345bf215546Sopenharmony_ci      .lower_extract_byte = true,
346bf215546Sopenharmony_ci      .lower_extract_word = true,
347bf215546Sopenharmony_ci      .lower_insert_byte = true,
348bf215546Sopenharmony_ci      .lower_insert_word = true,
349bf215546Sopenharmony_ci      .lower_mul_high = true,
350bf215546Sopenharmony_ci      .lower_rotate = true,
351bf215546Sopenharmony_ci      .lower_uadd_carry = true,
352bf215546Sopenharmony_ci      .lower_uadd_sat = true,
353bf215546Sopenharmony_ci      .lower_usub_sat = true,
354bf215546Sopenharmony_ci      .lower_vector_cmp = true,
355bf215546Sopenharmony_ci      .lower_int64_options = 0,
356bf215546Sopenharmony_ci      .lower_doubles_options = 0,
357bf215546Sopenharmony_ci      .lower_uniforms_to_ubo = true,
358bf215546Sopenharmony_ci      .has_fsub = true,
359bf215546Sopenharmony_ci      .has_isub = true,
360bf215546Sopenharmony_ci      .has_txs = true,
361bf215546Sopenharmony_ci      .lower_mul_2x32_64 = true,
362bf215546Sopenharmony_ci      .support_16bit_alu = true, /* not quite what it sounds like */
363bf215546Sopenharmony_ci   };
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci   screen->nir_options = default_options;
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci   if (!screen->info.feats.features.shaderInt64)
368bf215546Sopenharmony_ci      screen->nir_options.lower_int64_options = ~0;
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci   if (!screen->info.feats.features.shaderFloat64) {
371bf215546Sopenharmony_ci      screen->nir_options.lower_doubles_options = ~0;
372bf215546Sopenharmony_ci      screen->nir_options.lower_flrp64 = true;
373bf215546Sopenharmony_ci      screen->nir_options.lower_ffma64 = true;
374bf215546Sopenharmony_ci   }
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci   /*
377bf215546Sopenharmony_ci       The OpFRem and OpFMod instructions use cheap approximations of remainder,
378bf215546Sopenharmony_ci       and the error can be large due to the discontinuity in trunc() and floor().
379bf215546Sopenharmony_ci       This can produce mathematically unexpected results in some cases, such as
380bf215546Sopenharmony_ci       FMod(x,x) computing x rather than 0, and can also cause the result to have
381bf215546Sopenharmony_ci       a different sign than the infinitely precise result.
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci       -Table 84. Precision of core SPIR-V Instructions
384bf215546Sopenharmony_ci       * for drivers that are known to have imprecise fmod for doubles, lower dmod
385bf215546Sopenharmony_ci    */
386bf215546Sopenharmony_ci   if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
387bf215546Sopenharmony_ci       screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
388bf215546Sopenharmony_ci       screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
389bf215546Sopenharmony_ci      screen->nir_options.lower_doubles_options = nir_lower_dmod;
390bf215546Sopenharmony_ci}
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ciconst void *
393bf215546Sopenharmony_cizink_get_compiler_options(struct pipe_screen *pscreen,
394bf215546Sopenharmony_ci                          enum pipe_shader_ir ir,
395bf215546Sopenharmony_ci                          enum pipe_shader_type shader)
396bf215546Sopenharmony_ci{
397bf215546Sopenharmony_ci   assert(ir == PIPE_SHADER_IR_NIR);
398bf215546Sopenharmony_ci   return &zink_screen(pscreen)->nir_options;
399bf215546Sopenharmony_ci}
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_cistruct nir_shader *
402bf215546Sopenharmony_cizink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
403bf215546Sopenharmony_ci{
404bf215546Sopenharmony_ci   if (zink_debug & ZINK_DEBUG_TGSI) {
405bf215546Sopenharmony_ci      fprintf(stderr, "TGSI shader:\n---8<---\n");
406bf215546Sopenharmony_ci      tgsi_dump_to_file(tokens, 0, stderr);
407bf215546Sopenharmony_ci      fprintf(stderr, "---8<---\n\n");
408bf215546Sopenharmony_ci   }
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci   return tgsi_to_nir(tokens, screen, false);
411bf215546Sopenharmony_ci}
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_cistatic bool
415bf215546Sopenharmony_cidest_is_64bit(nir_dest *dest, void *state)
416bf215546Sopenharmony_ci{
417bf215546Sopenharmony_ci   bool *lower = (bool *)state;
418bf215546Sopenharmony_ci   if (dest && (nir_dest_bit_size(*dest) == 64)) {
419bf215546Sopenharmony_ci      *lower = true;
420bf215546Sopenharmony_ci      return false;
421bf215546Sopenharmony_ci   }
422bf215546Sopenharmony_ci   return true;
423bf215546Sopenharmony_ci}
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_cistatic bool
426bf215546Sopenharmony_cisrc_is_64bit(nir_src *src, void *state)
427bf215546Sopenharmony_ci{
428bf215546Sopenharmony_ci   bool *lower = (bool *)state;
429bf215546Sopenharmony_ci   if (src && (nir_src_bit_size(*src) == 64)) {
430bf215546Sopenharmony_ci      *lower = true;
431bf215546Sopenharmony_ci      return false;
432bf215546Sopenharmony_ci   }
433bf215546Sopenharmony_ci   return true;
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_cistatic bool
437bf215546Sopenharmony_cifilter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
438bf215546Sopenharmony_ci{
439bf215546Sopenharmony_ci   bool lower = false;
440bf215546Sopenharmony_ci   /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
441bf215546Sopenharmony_ci    * doesn't have const variants, so do the ugly const_cast here. */
442bf215546Sopenharmony_ci   nir_instr *instr = (nir_instr *)const_instr;
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   nir_foreach_dest(instr, dest_is_64bit, &lower);
445bf215546Sopenharmony_ci   if (lower)
446bf215546Sopenharmony_ci      return true;
447bf215546Sopenharmony_ci   nir_foreach_src(instr, src_is_64bit, &lower);
448bf215546Sopenharmony_ci   return lower;
449bf215546Sopenharmony_ci}
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_cistatic bool
452bf215546Sopenharmony_cifilter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
453bf215546Sopenharmony_ci{
454bf215546Sopenharmony_ci   nir_instr *instr = (nir_instr *)const_instr;
455bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_instr_as_alu(instr);
456bf215546Sopenharmony_ci   switch (alu->op) {
457bf215546Sopenharmony_ci   case nir_op_pack_64_2x32_split:
458bf215546Sopenharmony_ci   case nir_op_pack_32_2x16_split:
459bf215546Sopenharmony_ci   case nir_op_unpack_32_2x16_split_x:
460bf215546Sopenharmony_ci   case nir_op_unpack_32_2x16_split_y:
461bf215546Sopenharmony_ci   case nir_op_unpack_64_2x32_split_x:
462bf215546Sopenharmony_ci   case nir_op_unpack_64_2x32_split_y:
463bf215546Sopenharmony_ci      return true;
464bf215546Sopenharmony_ci   default:
465bf215546Sopenharmony_ci      break;
466bf215546Sopenharmony_ci   }
467bf215546Sopenharmony_ci   return false;
468bf215546Sopenharmony_ci}
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_cistruct bo_vars {
472bf215546Sopenharmony_ci   nir_variable *uniforms[5];
473bf215546Sopenharmony_ci   nir_variable *ubo[5];
474bf215546Sopenharmony_ci   nir_variable *ssbo[5];
475bf215546Sopenharmony_ci   uint32_t first_ubo;
476bf215546Sopenharmony_ci   uint32_t first_ssbo;
477bf215546Sopenharmony_ci};
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_cistatic struct bo_vars
480bf215546Sopenharmony_ciget_bo_vars(struct zink_shader *zs, nir_shader *shader)
481bf215546Sopenharmony_ci{
482bf215546Sopenharmony_ci   struct bo_vars bo;
483bf215546Sopenharmony_ci   memset(&bo, 0, sizeof(bo));
484bf215546Sopenharmony_ci   if (zs->ubos_used)
485bf215546Sopenharmony_ci      bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
486bf215546Sopenharmony_ci   assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
487bf215546Sopenharmony_ci   if (zs->ssbos_used)
488bf215546Sopenharmony_ci      bo.first_ssbo = ffs(zs->ssbos_used) - 1;
489bf215546Sopenharmony_ci   assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
490bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
491bf215546Sopenharmony_ci      unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
492bf215546Sopenharmony_ci      if (var->data.mode == nir_var_mem_ssbo) {
493bf215546Sopenharmony_ci         assert(!bo.ssbo[idx]);
494bf215546Sopenharmony_ci         bo.ssbo[idx] = var;
495bf215546Sopenharmony_ci      } else {
496bf215546Sopenharmony_ci         if (var->data.driver_location) {
497bf215546Sopenharmony_ci            assert(!bo.ubo[idx]);
498bf215546Sopenharmony_ci            bo.ubo[idx] = var;
499bf215546Sopenharmony_ci         } else {
500bf215546Sopenharmony_ci            assert(!bo.uniforms[idx]);
501bf215546Sopenharmony_ci            bo.uniforms[idx] = var;
502bf215546Sopenharmony_ci         }
503bf215546Sopenharmony_ci      }
504bf215546Sopenharmony_ci   }
505bf215546Sopenharmony_ci   return bo;
506bf215546Sopenharmony_ci}
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_cistatic bool
509bf215546Sopenharmony_cibound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
510bf215546Sopenharmony_ci{
511bf215546Sopenharmony_ci   struct bo_vars *bo = data;
512bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
513bf215546Sopenharmony_ci      return false;
514bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
515bf215546Sopenharmony_ci   nir_variable *var = NULL;
516bf215546Sopenharmony_ci   nir_ssa_def *offset = NULL;
517bf215546Sopenharmony_ci   bool is_load = true;
518bf215546Sopenharmony_ci   b->cursor = nir_before_instr(instr);
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci   switch (intr->intrinsic) {
521bf215546Sopenharmony_ci   case nir_intrinsic_store_ssbo:
522bf215546Sopenharmony_ci      var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
523bf215546Sopenharmony_ci      offset = intr->src[2].ssa;
524bf215546Sopenharmony_ci      is_load = false;
525bf215546Sopenharmony_ci      break;
526bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo:
527bf215546Sopenharmony_ci      var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
528bf215546Sopenharmony_ci      offset = intr->src[1].ssa;
529bf215546Sopenharmony_ci      break;
530bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo:
531bf215546Sopenharmony_ci      if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
532bf215546Sopenharmony_ci         var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
533bf215546Sopenharmony_ci      else
534bf215546Sopenharmony_ci         var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
535bf215546Sopenharmony_ci      offset = intr->src[1].ssa;
536bf215546Sopenharmony_ci      break;
537bf215546Sopenharmony_ci   default:
538bf215546Sopenharmony_ci      return false;
539bf215546Sopenharmony_ci   }
540bf215546Sopenharmony_ci   nir_src offset_src = nir_src_for_ssa(offset);
541bf215546Sopenharmony_ci   if (!nir_src_is_const(offset_src))
542bf215546Sopenharmony_ci      return false;
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci   unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
545bf215546Sopenharmony_ci   const struct glsl_type *strct_type = glsl_get_array_element(var->type);
546bf215546Sopenharmony_ci   unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
547bf215546Sopenharmony_ci   bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
548bf215546Sopenharmony_ci   if (has_unsized || offset_bytes + intr->num_components - 1 < size)
549bf215546Sopenharmony_ci      return false;
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci   unsigned rewrites = 0;
552bf215546Sopenharmony_ci   nir_ssa_def *result[2];
553bf215546Sopenharmony_ci   for (unsigned i = 0; i < intr->num_components; i++) {
554bf215546Sopenharmony_ci      if (offset_bytes + i >= size) {
555bf215546Sopenharmony_ci         rewrites++;
556bf215546Sopenharmony_ci         if (is_load)
557bf215546Sopenharmony_ci            result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
558bf215546Sopenharmony_ci      }
559bf215546Sopenharmony_ci   }
560bf215546Sopenharmony_ci   assert(rewrites == intr->num_components);
561bf215546Sopenharmony_ci   if (is_load) {
562bf215546Sopenharmony_ci      nir_ssa_def *load = nir_vec(b, result, intr->num_components);
563bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
564bf215546Sopenharmony_ci   }
565bf215546Sopenharmony_ci   nir_instr_remove(instr);
566bf215546Sopenharmony_ci   return true;
567bf215546Sopenharmony_ci}
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_cistatic bool
570bf215546Sopenharmony_cibound_bo_access(nir_shader *shader, struct zink_shader *zs)
571bf215546Sopenharmony_ci{
572bf215546Sopenharmony_ci   struct bo_vars bo = get_bo_vars(zs, shader);
573bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
574bf215546Sopenharmony_ci}
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_cistatic void
577bf215546Sopenharmony_cioptimize_nir(struct nir_shader *s, struct zink_shader *zs)
578bf215546Sopenharmony_ci{
579bf215546Sopenharmony_ci   bool progress;
580bf215546Sopenharmony_ci   do {
581bf215546Sopenharmony_ci      progress = false;
582bf215546Sopenharmony_ci      if (s->options->lower_int64_options)
583bf215546Sopenharmony_ci         NIR_PASS_V(s, nir_lower_int64);
584bf215546Sopenharmony_ci      NIR_PASS_V(s, nir_lower_vars_to_ssa);
585bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
586bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_copy_prop_vars);
587bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_copy_prop);
588bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_remove_phis);
589bf215546Sopenharmony_ci      if (s->options->lower_int64_options) {
590bf215546Sopenharmony_ci         NIR_PASS(progress, s, nir_lower_64bit_phis);
591bf215546Sopenharmony_ci         NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
592bf215546Sopenharmony_ci      }
593bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_dce);
594bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_dead_cf);
595bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
596bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_cse);
597bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
598bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_algebraic);
599bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_constant_folding);
600bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_undef);
601bf215546Sopenharmony_ci      NIR_PASS(progress, s, zink_nir_lower_b2b);
602bf215546Sopenharmony_ci      if (zs)
603bf215546Sopenharmony_ci         NIR_PASS(progress, s, bound_bo_access, zs);
604bf215546Sopenharmony_ci   } while (progress);
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_ci   do {
607bf215546Sopenharmony_ci      progress = false;
608bf215546Sopenharmony_ci      NIR_PASS(progress, s, nir_opt_algebraic_late);
609bf215546Sopenharmony_ci      if (progress) {
610bf215546Sopenharmony_ci         NIR_PASS_V(s, nir_copy_prop);
611bf215546Sopenharmony_ci         NIR_PASS_V(s, nir_opt_dce);
612bf215546Sopenharmony_ci         NIR_PASS_V(s, nir_opt_cse);
613bf215546Sopenharmony_ci      }
614bf215546Sopenharmony_ci   } while (progress);
615bf215546Sopenharmony_ci}
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci/* - copy the lowered fbfetch variable
618bf215546Sopenharmony_ci * - set the new one up as an input attachment for descriptor 0.6
619bf215546Sopenharmony_ci * - load it as an image
620bf215546Sopenharmony_ci * - overwrite the previous load
621bf215546Sopenharmony_ci */
622bf215546Sopenharmony_cistatic bool
623bf215546Sopenharmony_cilower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
624bf215546Sopenharmony_ci{
625bf215546Sopenharmony_ci   bool ms = data != NULL;
626bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
627bf215546Sopenharmony_ci      return false;
628bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
629bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_deref)
630bf215546Sopenharmony_ci      return false;
631bf215546Sopenharmony_ci   nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
632bf215546Sopenharmony_ci   if (!var->data.fb_fetch_output)
633bf215546Sopenharmony_ci      return false;
634bf215546Sopenharmony_ci   b->cursor = nir_after_instr(instr);
635bf215546Sopenharmony_ci   nir_variable *fbfetch = nir_variable_clone(var, b->shader);
636bf215546Sopenharmony_ci   /* If Dim is SubpassData, ... Image Format must be Unknown
637bf215546Sopenharmony_ci    * - SPIRV OpTypeImage specification
638bf215546Sopenharmony_ci    */
639bf215546Sopenharmony_ci   fbfetch->data.image.format = 0;
640bf215546Sopenharmony_ci   fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
641bf215546Sopenharmony_ci   fbfetch->data.mode = nir_var_uniform;
642bf215546Sopenharmony_ci   fbfetch->data.binding = ZINK_FBFETCH_BINDING;
643bf215546Sopenharmony_ci   fbfetch->data.binding = ZINK_FBFETCH_BINDING;
644bf215546Sopenharmony_ci   fbfetch->data.sample = ms;
645bf215546Sopenharmony_ci   enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
646bf215546Sopenharmony_ci   fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
647bf215546Sopenharmony_ci   nir_shader_add_variable(b->shader, fbfetch);
648bf215546Sopenharmony_ci   nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
649bf215546Sopenharmony_ci   nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
650bf215546Sopenharmony_ci   nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
651bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
652bf215546Sopenharmony_ci   return true;
653bf215546Sopenharmony_ci}
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_cistatic bool
656bf215546Sopenharmony_cilower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
657bf215546Sopenharmony_ci{
658bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(var, shader) {
659bf215546Sopenharmony_ci      if (var->data.fb_fetch_output) {
660bf215546Sopenharmony_ci         *fbfetch = var;
661bf215546Sopenharmony_ci         break;
662bf215546Sopenharmony_ci      }
663bf215546Sopenharmony_ci   }
664bf215546Sopenharmony_ci   assert(*fbfetch);
665bf215546Sopenharmony_ci   if (!*fbfetch)
666bf215546Sopenharmony_ci      return false;
667bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
668bf215546Sopenharmony_ci}
669bf215546Sopenharmony_ci
670bf215546Sopenharmony_ci/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
671bf215546Sopenharmony_cistatic bool
672bf215546Sopenharmony_cicheck_psiz(struct nir_shader *s)
673bf215546Sopenharmony_ci{
674bf215546Sopenharmony_ci   bool have_psiz = false;
675bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(var, s) {
676bf215546Sopenharmony_ci      if (var->data.location == VARYING_SLOT_PSIZ) {
677bf215546Sopenharmony_ci         /* genuine PSIZ outputs will have this set */
678bf215546Sopenharmony_ci         have_psiz |= !!var->data.explicit_location;
679bf215546Sopenharmony_ci      }
680bf215546Sopenharmony_ci   }
681bf215546Sopenharmony_ci   return have_psiz;
682bf215546Sopenharmony_ci}
683bf215546Sopenharmony_ci
684bf215546Sopenharmony_cistatic nir_variable *
685bf215546Sopenharmony_cifind_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
686bf215546Sopenharmony_ci{
687bf215546Sopenharmony_ci   unsigned found = 0;
688bf215546Sopenharmony_ci   if (!location_frac && location != VARYING_SLOT_PSIZ) {
689bf215546Sopenharmony_ci      nir_foreach_shader_out_variable(var, nir) {
690bf215546Sopenharmony_ci         if (var->data.location == location)
691bf215546Sopenharmony_ci            found++;
692bf215546Sopenharmony_ci      }
693bf215546Sopenharmony_ci   }
694bf215546Sopenharmony_ci   if (found) {
695bf215546Sopenharmony_ci      /* multiple variables found for this location: find the biggest one */
696bf215546Sopenharmony_ci      nir_variable *out = NULL;
697bf215546Sopenharmony_ci      unsigned slots = 0;
698bf215546Sopenharmony_ci      nir_foreach_shader_out_variable(var, nir) {
699bf215546Sopenharmony_ci         if (var->data.location == location) {
700bf215546Sopenharmony_ci            unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
701bf215546Sopenharmony_ci            if (count_slots > slots) {
702bf215546Sopenharmony_ci               slots = count_slots;
703bf215546Sopenharmony_ci               out = var;
704bf215546Sopenharmony_ci            }
705bf215546Sopenharmony_ci         }
706bf215546Sopenharmony_ci      }
707bf215546Sopenharmony_ci      return out;
708bf215546Sopenharmony_ci   } else {
709bf215546Sopenharmony_ci      /* only one variable found or this is location_frac */
710bf215546Sopenharmony_ci      nir_foreach_shader_out_variable(var, nir) {
711bf215546Sopenharmony_ci         if (var->data.location == location &&
712bf215546Sopenharmony_ci             (var->data.location_frac == location_frac ||
713bf215546Sopenharmony_ci              (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
714bf215546Sopenharmony_ci            if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
715bf215546Sopenharmony_ci               return var;
716bf215546Sopenharmony_ci         }
717bf215546Sopenharmony_ci      }
718bf215546Sopenharmony_ci   }
719bf215546Sopenharmony_ci   return NULL;
720bf215546Sopenharmony_ci}
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_cistatic bool
723bf215546Sopenharmony_ciis_inlined(const bool *inlined, const struct pipe_stream_output *output)
724bf215546Sopenharmony_ci{
725bf215546Sopenharmony_ci   for (unsigned i = 0; i < output->num_components; i++)
726bf215546Sopenharmony_ci      if (!inlined[output->start_component + i])
727bf215546Sopenharmony_ci         return false;
728bf215546Sopenharmony_ci   return true;
729bf215546Sopenharmony_ci}
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_cistatic void
732bf215546Sopenharmony_ciupdate_psiz_location(nir_shader *nir, nir_variable *psiz)
733bf215546Sopenharmony_ci{
734bf215546Sopenharmony_ci   uint32_t last_output = util_last_bit64(nir->info.outputs_written);
735bf215546Sopenharmony_ci   if (last_output < VARYING_SLOT_VAR0)
736bf215546Sopenharmony_ci      last_output = VARYING_SLOT_VAR0;
737bf215546Sopenharmony_ci   else
738bf215546Sopenharmony_ci      last_output++;
739bf215546Sopenharmony_ci   /* this should get fixed up by slot remapping */
740bf215546Sopenharmony_ci   psiz->data.location = last_output;
741bf215546Sopenharmony_ci}
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_cistatic const struct glsl_type *
744bf215546Sopenharmony_ciclamp_slot_type(const struct glsl_type *type, unsigned slot)
745bf215546Sopenharmony_ci{
746bf215546Sopenharmony_ci   /* could be dvec/dmat/mat: each member is the same */
747bf215546Sopenharmony_ci   const struct glsl_type *plain = glsl_without_array_or_matrix(type);
748bf215546Sopenharmony_ci   /* determine size of each member type */
749bf215546Sopenharmony_ci   unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
750bf215546Sopenharmony_ci   /* normalize slot idx to current type's size */
751bf215546Sopenharmony_ci   slot %= slot_count;
752bf215546Sopenharmony_ci   unsigned slot_components = glsl_get_components(plain);
753bf215546Sopenharmony_ci   if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
754bf215546Sopenharmony_ci      slot_components *= 2;
755bf215546Sopenharmony_ci   /* create a vec4 mask of the selected slot's components out of all the components */
756bf215546Sopenharmony_ci   uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
757bf215546Sopenharmony_ci   /* return a vecN of the selected components */
758bf215546Sopenharmony_ci   slot_components = util_bitcount(mask);
759bf215546Sopenharmony_ci   return glsl_vec_type(slot_components);
760bf215546Sopenharmony_ci}
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_cistatic const struct glsl_type *
763bf215546Sopenharmony_ciunroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
764bf215546Sopenharmony_ci{
765bf215546Sopenharmony_ci   const struct glsl_type *type = slot_type;
766bf215546Sopenharmony_ci   unsigned slot_count = 0;
767bf215546Sopenharmony_ci   unsigned cur_slot = 0;
768bf215546Sopenharmony_ci   /* iterate over all the members in the struct, stopping once the slot idx is reached */
769bf215546Sopenharmony_ci   for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
770bf215546Sopenharmony_ci      /* use array type for slot counting but return array member type for unroll */
771bf215546Sopenharmony_ci      const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
772bf215546Sopenharmony_ci      type = glsl_without_array(arraytype);
773bf215546Sopenharmony_ci      slot_count = glsl_count_vec4_slots(arraytype, false, false);
774bf215546Sopenharmony_ci   }
775bf215546Sopenharmony_ci   *slot_idx -= (cur_slot - slot_count);
776bf215546Sopenharmony_ci   if (!glsl_type_is_struct_or_ifc(type))
777bf215546Sopenharmony_ci      /* this is a fully unrolled struct: find the number of vec components to output */
778bf215546Sopenharmony_ci      type = clamp_slot_type(type, *slot_idx);
779bf215546Sopenharmony_ci   return type;
780bf215546Sopenharmony_ci}
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_cistatic unsigned
783bf215546Sopenharmony_ciget_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
784bf215546Sopenharmony_ci{
785bf215546Sopenharmony_ci   assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
786bf215546Sopenharmony_ci   const struct glsl_type *orig_type = var->type;
787bf215546Sopenharmony_ci   const struct glsl_type *type = glsl_without_array(var->type);
788bf215546Sopenharmony_ci   unsigned slot_idx = slot - so_slot;
789bf215546Sopenharmony_ci   if (type != orig_type)
790bf215546Sopenharmony_ci      slot_idx %= glsl_count_vec4_slots(type, false, false);
791bf215546Sopenharmony_ci   /* need to find the vec4 that's being exported by this slot */
792bf215546Sopenharmony_ci   while (glsl_type_is_struct_or_ifc(type))
793bf215546Sopenharmony_ci      type = unroll_struct_type(type, &slot_idx);
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci   /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
796bf215546Sopenharmony_ci   unsigned num_components = glsl_get_components(glsl_without_array(type));
797bf215546Sopenharmony_ci   const struct glsl_type *arraytype = orig_type;
798bf215546Sopenharmony_ci   while (glsl_type_is_array(arraytype) && !glsl_type_is_struct_or_ifc(glsl_without_array(arraytype))) {
799bf215546Sopenharmony_ci      num_components *= glsl_array_size(arraytype);
800bf215546Sopenharmony_ci      arraytype = glsl_get_array_element(arraytype);
801bf215546Sopenharmony_ci   }
802bf215546Sopenharmony_ci   assert(num_components);
803bf215546Sopenharmony_ci   /* gallium handles xfb in terms of 32bit units */
804bf215546Sopenharmony_ci   if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
805bf215546Sopenharmony_ci      num_components *= 2;
806bf215546Sopenharmony_ci   return num_components;
807bf215546Sopenharmony_ci}
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_cistatic const struct pipe_stream_output *
810bf215546Sopenharmony_cifind_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
811bf215546Sopenharmony_ci{
812bf215546Sopenharmony_ci   for (unsigned i = 0; i < so_info->num_outputs; i++) {
813bf215546Sopenharmony_ci      const struct pipe_stream_output *packed_output = &so_info->output[i];
814bf215546Sopenharmony_ci      if (reverse_map[packed_output->register_index] == slot)
815bf215546Sopenharmony_ci         return packed_output;
816bf215546Sopenharmony_ci   }
817bf215546Sopenharmony_ci   return NULL;
818bf215546Sopenharmony_ci}
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_cistatic void
821bf215546Sopenharmony_ciupdate_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
822bf215546Sopenharmony_ci               uint64_t outputs_written, bool have_psiz)
823bf215546Sopenharmony_ci{
824bf215546Sopenharmony_ci   uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
825bf215546Sopenharmony_ci   unsigned slot = 0;
826bf215546Sopenharmony_ci   /* semi-copied from iris */
827bf215546Sopenharmony_ci   while (outputs_written) {
828bf215546Sopenharmony_ci      int bit = u_bit_scan64(&outputs_written);
829bf215546Sopenharmony_ci      /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
830bf215546Sopenharmony_ci      if (bit == VARYING_SLOT_PSIZ && !have_psiz)
831bf215546Sopenharmony_ci         continue;
832bf215546Sopenharmony_ci      reverse_map[slot++] = bit;
833bf215546Sopenharmony_ci   }
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci   bool have_fake_psiz = false;
836bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(var, zs->nir) {
837bf215546Sopenharmony_ci      if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
838bf215546Sopenharmony_ci         have_fake_psiz = true;
839bf215546Sopenharmony_ci   }
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci   bool inlined[VARYING_SLOT_MAX][4] = {0};
842bf215546Sopenharmony_ci   uint64_t packed = 0;
843bf215546Sopenharmony_ci   uint8_t packed_components[VARYING_SLOT_MAX] = {0};
844bf215546Sopenharmony_ci   uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
845bf215546Sopenharmony_ci   uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
846bf215546Sopenharmony_ci   uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
847bf215546Sopenharmony_ci   nir_variable *psiz = NULL;
848bf215546Sopenharmony_ci   for (unsigned i = 0; i < so_info->num_outputs; i++) {
849bf215546Sopenharmony_ci      const struct pipe_stream_output *output = &so_info->output[i];
850bf215546Sopenharmony_ci      unsigned slot = reverse_map[output->register_index];
851bf215546Sopenharmony_ci      /* always set stride to be used during draw */
852bf215546Sopenharmony_ci      zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
853bf215546Sopenharmony_ci      if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
854bf215546Sopenharmony_ci         nir_variable *var = NULL;
855bf215546Sopenharmony_ci         unsigned so_slot;
856bf215546Sopenharmony_ci         while (!var)
857bf215546Sopenharmony_ci            var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
858bf215546Sopenharmony_ci         if (var->data.location == VARYING_SLOT_PSIZ)
859bf215546Sopenharmony_ci            psiz = var;
860bf215546Sopenharmony_ci         so_slot = slot + 1;
861bf215546Sopenharmony_ci         slot = reverse_map[output->register_index];
862bf215546Sopenharmony_ci         if (var->data.explicit_xfb_buffer) {
863bf215546Sopenharmony_ci            /* handle dvec3 where gallium splits streamout over 2 registers */
864bf215546Sopenharmony_ci            for (unsigned j = 0; j < output->num_components; j++)
865bf215546Sopenharmony_ci               inlined[slot][output->start_component + j] = true;
866bf215546Sopenharmony_ci         }
867bf215546Sopenharmony_ci         if (is_inlined(inlined[slot], output))
868bf215546Sopenharmony_ci            continue;
869bf215546Sopenharmony_ci         bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
870bf215546Sopenharmony_ci         unsigned num_components = get_slot_components(var, slot, so_slot);
871bf215546Sopenharmony_ci         /* if this is the entire variable, try to blast it out during the initial declaration
872bf215546Sopenharmony_ci          * structs must be handled later to ensure accurate analysis
873bf215546Sopenharmony_ci          */
874bf215546Sopenharmony_ci         if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
875bf215546Sopenharmony_ci            var->data.explicit_xfb_buffer = 1;
876bf215546Sopenharmony_ci            var->data.xfb.buffer = output->output_buffer;
877bf215546Sopenharmony_ci            var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
878bf215546Sopenharmony_ci            var->data.offset = output->dst_offset * 4;
879bf215546Sopenharmony_ci            var->data.stream = output->stream;
880bf215546Sopenharmony_ci            for (unsigned j = 0; j < output->num_components; j++)
881bf215546Sopenharmony_ci               inlined[slot][output->start_component + j] = true;
882bf215546Sopenharmony_ci         } else {
883bf215546Sopenharmony_ci            /* otherwise store some metadata for later */
884bf215546Sopenharmony_ci            packed |= BITFIELD64_BIT(slot);
885bf215546Sopenharmony_ci            packed_components[slot] += output->num_components;
886bf215546Sopenharmony_ci            packed_streams[slot] |= BITFIELD_BIT(output->stream);
887bf215546Sopenharmony_ci            packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
888bf215546Sopenharmony_ci            for (unsigned j = 0; j < output->num_components; j++)
889bf215546Sopenharmony_ci               packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
890bf215546Sopenharmony_ci         }
891bf215546Sopenharmony_ci      }
892bf215546Sopenharmony_ci   }
893bf215546Sopenharmony_ci
894bf215546Sopenharmony_ci   /* if this was flagged as a packed output before, and if all the components are
895bf215546Sopenharmony_ci    * being output with the same stream on the same buffer with increasing offsets, this entire variable
896bf215546Sopenharmony_ci    * can be consolidated into a single output to conserve locations
897bf215546Sopenharmony_ci    */
898bf215546Sopenharmony_ci   for (unsigned i = 0; i < so_info->num_outputs; i++) {
899bf215546Sopenharmony_ci      const struct pipe_stream_output *output = &so_info->output[i];
900bf215546Sopenharmony_ci      unsigned slot = reverse_map[output->register_index];
901bf215546Sopenharmony_ci      if (is_inlined(inlined[slot], output))
902bf215546Sopenharmony_ci         continue;
903bf215546Sopenharmony_ci      if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
904bf215546Sopenharmony_ci         nir_variable *var = NULL;
905bf215546Sopenharmony_ci         while (!var)
906bf215546Sopenharmony_ci            var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
907bf215546Sopenharmony_ci         /* this is a lowered 64bit variable that can't be exported due to packing */
908bf215546Sopenharmony_ci         if (var->data.is_xfb)
909bf215546Sopenharmony_ci            goto out;
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci         unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
912bf215546Sopenharmony_ci         /* for each variable, iterate over all the variable's slots and inline the outputs */
913bf215546Sopenharmony_ci         for (unsigned j = 0; j < num_slots; j++) {
914bf215546Sopenharmony_ci            slot = var->data.location + j;
915bf215546Sopenharmony_ci            const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
916bf215546Sopenharmony_ci            if (!packed_output)
917bf215546Sopenharmony_ci               goto out;
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_ci            /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
920bf215546Sopenharmony_ci            if (!(packed & BITFIELD64_BIT(slot)) ||
921bf215546Sopenharmony_ci                util_bitcount(packed_streams[slot]) != 1 ||
922bf215546Sopenharmony_ci                util_bitcount(packed_buffers[slot]) != 1)
923bf215546Sopenharmony_ci               goto out;
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci            /* if all the components the variable exports to this slot aren't captured, skip consolidation */
926bf215546Sopenharmony_ci            unsigned num_components = get_slot_components(var, slot, var->data.location);
927bf215546Sopenharmony_ci            if (glsl_type_is_array(var->type) && !glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
928bf215546Sopenharmony_ci               num_components /= glsl_array_size(var->type);
929bf215546Sopenharmony_ci            if (num_components != packed_components[slot])
930bf215546Sopenharmony_ci               goto out;
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci            /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
933bf215546Sopenharmony_ci            uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
934bf215546Sopenharmony_ci            for (unsigned k = 1; k < num_components; k++) {
935bf215546Sopenharmony_ci               /* if the offsets are not incrementing as expected, skip consolidation */
936bf215546Sopenharmony_ci               if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
937bf215546Sopenharmony_ci                  goto out;
938bf215546Sopenharmony_ci               prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
939bf215546Sopenharmony_ci            }
940bf215546Sopenharmony_ci         }
941bf215546Sopenharmony_ci         /* this output can be consolidated: blast out all the data inlined */
942bf215546Sopenharmony_ci         var->data.explicit_xfb_buffer = 1;
943bf215546Sopenharmony_ci         var->data.xfb.buffer = output->output_buffer;
944bf215546Sopenharmony_ci         var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
945bf215546Sopenharmony_ci         var->data.offset = output->dst_offset * 4;
946bf215546Sopenharmony_ci         var->data.stream = output->stream;
947bf215546Sopenharmony_ci         /* GLSL specifies that interface blocks are split per-buffer in XFB */
948bf215546Sopenharmony_ci         if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
949bf215546Sopenharmony_ci            zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
950bf215546Sopenharmony_ci         /* mark all slot components inlined to skip subsequent loop iterations */
951bf215546Sopenharmony_ci         for (unsigned j = 0; j < num_slots; j++) {
952bf215546Sopenharmony_ci            slot = var->data.location + j;
953bf215546Sopenharmony_ci            for (unsigned k = 0; k < packed_components[slot]; k++)
954bf215546Sopenharmony_ci               inlined[slot][k] = true;
955bf215546Sopenharmony_ci            packed &= ~BITFIELD64_BIT(slot);
956bf215546Sopenharmony_ci         }
957bf215546Sopenharmony_ci         continue;
958bf215546Sopenharmony_ci      }
959bf215546Sopenharmony_ciout:
960bf215546Sopenharmony_ci      /* these are packed/explicit varyings which can't be exported with normal output */
961bf215546Sopenharmony_ci      zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
962bf215546Sopenharmony_ci      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
963bf215546Sopenharmony_ci      zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
964bf215546Sopenharmony_ci   }
965bf215546Sopenharmony_ci   zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
966bf215546Sopenharmony_ci   /* ensure this doesn't get output in the shader by unsetting location */
967bf215546Sopenharmony_ci   if (have_fake_psiz && psiz)
968bf215546Sopenharmony_ci      update_psiz_location(zs->nir, psiz);
969bf215546Sopenharmony_ci}
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_cistruct decompose_state {
972bf215546Sopenharmony_ci  nir_variable **split;
973bf215546Sopenharmony_ci  bool needs_w;
974bf215546Sopenharmony_ci};
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_cistatic bool
977bf215546Sopenharmony_cilower_attrib(nir_builder *b, nir_instr *instr, void *data)
978bf215546Sopenharmony_ci{
979bf215546Sopenharmony_ci   struct decompose_state *state = data;
980bf215546Sopenharmony_ci   nir_variable **split = state->split;
981bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
982bf215546Sopenharmony_ci      return false;
983bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
984bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_deref)
985bf215546Sopenharmony_ci      return false;
986bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
987bf215546Sopenharmony_ci   nir_variable *var = nir_deref_instr_get_variable(deref);
988bf215546Sopenharmony_ci   if (var != split[0])
989bf215546Sopenharmony_ci      return false;
990bf215546Sopenharmony_ci   unsigned num_components = glsl_get_vector_elements(split[0]->type);
991bf215546Sopenharmony_ci   b->cursor = nir_after_instr(instr);
992bf215546Sopenharmony_ci   nir_ssa_def *loads[4];
993bf215546Sopenharmony_ci   for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
994bf215546Sopenharmony_ci      loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
995bf215546Sopenharmony_ci   if (state->needs_w) {
996bf215546Sopenharmony_ci      /* oob load w comopnent to get correct value for int/float */
997bf215546Sopenharmony_ci      loads[3] = nir_channel(b, loads[0], 3);
998bf215546Sopenharmony_ci      loads[0] = nir_channel(b, loads[0], 0);
999bf215546Sopenharmony_ci   }
1000bf215546Sopenharmony_ci   nir_ssa_def *new_load = nir_vec(b, loads, num_components);
1001bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
1002bf215546Sopenharmony_ci   nir_instr_remove_v(instr);
1003bf215546Sopenharmony_ci   return true;
1004bf215546Sopenharmony_ci}
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_cistatic bool
1007bf215546Sopenharmony_cidecompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
1008bf215546Sopenharmony_ci{
1009bf215546Sopenharmony_ci   uint32_t bits = 0;
1010bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
1011bf215546Sopenharmony_ci      bits |= BITFIELD_BIT(var->data.driver_location);
1012bf215546Sopenharmony_ci   bits = ~bits;
1013bf215546Sopenharmony_ci   u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
1014bf215546Sopenharmony_ci      nir_variable *split[5];
1015bf215546Sopenharmony_ci      struct decompose_state state;
1016bf215546Sopenharmony_ci      state.split = split;
1017bf215546Sopenharmony_ci      nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
1018bf215546Sopenharmony_ci      assert(var);
1019bf215546Sopenharmony_ci      split[0] = var;
1020bf215546Sopenharmony_ci      bits |= BITFIELD_BIT(var->data.driver_location);
1021bf215546Sopenharmony_ci      const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
1022bf215546Sopenharmony_ci      unsigned num_components = glsl_get_vector_elements(var->type);
1023bf215546Sopenharmony_ci      state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
1024bf215546Sopenharmony_ci      for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
1025bf215546Sopenharmony_ci         split[i+1] = nir_variable_clone(var, nir);
1026bf215546Sopenharmony_ci         split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
1027bf215546Sopenharmony_ci         if (decomposed_attrs_without_w & BITFIELD_BIT(location))
1028bf215546Sopenharmony_ci            split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
1029bf215546Sopenharmony_ci         else
1030bf215546Sopenharmony_ci            split[i+1]->type = new_type;
1031bf215546Sopenharmony_ci         split[i+1]->data.driver_location = ffs(bits) - 1;
1032bf215546Sopenharmony_ci         bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
1033bf215546Sopenharmony_ci         nir_shader_add_variable(nir, split[i+1]);
1034bf215546Sopenharmony_ci      }
1035bf215546Sopenharmony_ci      var->data.mode = nir_var_shader_temp;
1036bf215546Sopenharmony_ci      nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
1037bf215546Sopenharmony_ci   }
1038bf215546Sopenharmony_ci   nir_fixup_deref_modes(nir);
1039bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1040bf215546Sopenharmony_ci   optimize_nir(nir, NULL);
1041bf215546Sopenharmony_ci   return true;
1042bf215546Sopenharmony_ci}
1043bf215546Sopenharmony_ci
1044bf215546Sopenharmony_cistatic bool
1045bf215546Sopenharmony_cirewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1046bf215546Sopenharmony_ci{
1047bf215546Sopenharmony_ci   struct zink_screen *screen = data;
1048bf215546Sopenharmony_ci   const bool has_int64 = screen->info.feats.features.shaderInt64;
1049bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
1050bf215546Sopenharmony_ci      return false;
1051bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1052bf215546Sopenharmony_ci   b->cursor = nir_before_instr(instr);
1053bf215546Sopenharmony_ci   switch (intr->intrinsic) {
1054bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fadd:
1055bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_add:
1056bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umin:
1057bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imin:
1058bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umax:
1059bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imax:
1060bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_and:
1061bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_or:
1062bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_xor:
1063bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_exchange:
1064bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_comp_swap: {
1065bf215546Sopenharmony_ci      /* convert offset to uintN_t[idx] */
1066bf215546Sopenharmony_ci      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
1067bf215546Sopenharmony_ci      nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1068bf215546Sopenharmony_ci      return true;
1069bf215546Sopenharmony_ci   }
1070bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo:
1071bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo: {
1072bf215546Sopenharmony_ci      /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
1073bf215546Sopenharmony_ci      bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
1074bf215546Sopenharmony_ci                        nir_src_is_const(intr->src[0]) &&
1075bf215546Sopenharmony_ci                        nir_src_as_uint(intr->src[0]) == 0 &&
1076bf215546Sopenharmony_ci                        nir_dest_bit_size(intr->dest) == 64 &&
1077bf215546Sopenharmony_ci                        nir_intrinsic_align_offset(intr) % 8 != 0;
1078bf215546Sopenharmony_ci      force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
1079bf215546Sopenharmony_ci      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
1080bf215546Sopenharmony_ci      nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1081bf215546Sopenharmony_ci      /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1082bf215546Sopenharmony_ci      if (force_2x32) {
1083bf215546Sopenharmony_ci         /* this is always scalarized */
1084bf215546Sopenharmony_ci         assert(intr->dest.ssa.num_components == 1);
1085bf215546Sopenharmony_ci         /* rewrite as 2x32 */
1086bf215546Sopenharmony_ci         nir_ssa_def *load[2];
1087bf215546Sopenharmony_ci         for (unsigned i = 0; i < 2; i++) {
1088bf215546Sopenharmony_ci            if (intr->intrinsic == nir_intrinsic_load_ssbo)
1089bf215546Sopenharmony_ci               load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
1090bf215546Sopenharmony_ci            else
1091bf215546Sopenharmony_ci               load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
1092bf215546Sopenharmony_ci            nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
1093bf215546Sopenharmony_ci         }
1094bf215546Sopenharmony_ci         /* cast back to 64bit */
1095bf215546Sopenharmony_ci         nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
1096bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
1097bf215546Sopenharmony_ci         nir_instr_remove(instr);
1098bf215546Sopenharmony_ci      }
1099bf215546Sopenharmony_ci      return true;
1100bf215546Sopenharmony_ci   }
1101bf215546Sopenharmony_ci   case nir_intrinsic_load_shared:
1102bf215546Sopenharmony_ci      b->cursor = nir_before_instr(instr);
1103bf215546Sopenharmony_ci      bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
1104bf215546Sopenharmony_ci      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
1105bf215546Sopenharmony_ci      nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
1106bf215546Sopenharmony_ci      /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1107bf215546Sopenharmony_ci      if (force_2x32) {
1108bf215546Sopenharmony_ci         /* this is always scalarized */
1109bf215546Sopenharmony_ci         assert(intr->dest.ssa.num_components == 1);
1110bf215546Sopenharmony_ci         /* rewrite as 2x32 */
1111bf215546Sopenharmony_ci         nir_ssa_def *load[2];
1112bf215546Sopenharmony_ci         for (unsigned i = 0; i < 2; i++)
1113bf215546Sopenharmony_ci            load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
1114bf215546Sopenharmony_ci         /* cast back to 64bit */
1115bf215546Sopenharmony_ci         nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
1116bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
1117bf215546Sopenharmony_ci         nir_instr_remove(instr);
1118bf215546Sopenharmony_ci         return true;
1119bf215546Sopenharmony_ci      }
1120bf215546Sopenharmony_ci      break;
1121bf215546Sopenharmony_ci   case nir_intrinsic_store_ssbo: {
1122bf215546Sopenharmony_ci      b->cursor = nir_before_instr(instr);
1123bf215546Sopenharmony_ci      bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
1124bf215546Sopenharmony_ci      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
1125bf215546Sopenharmony_ci      nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
1126bf215546Sopenharmony_ci      /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1127bf215546Sopenharmony_ci      if (force_2x32) {
1128bf215546Sopenharmony_ci         /* this is always scalarized */
1129bf215546Sopenharmony_ci         assert(intr->src[0].ssa->num_components == 1);
1130bf215546Sopenharmony_ci         nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
1131bf215546Sopenharmony_ci         for (unsigned i = 0; i < 2; i++)
1132bf215546Sopenharmony_ci            nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
1133bf215546Sopenharmony_ci         nir_instr_remove(instr);
1134bf215546Sopenharmony_ci      }
1135bf215546Sopenharmony_ci      return true;
1136bf215546Sopenharmony_ci   }
1137bf215546Sopenharmony_ci   case nir_intrinsic_store_shared: {
1138bf215546Sopenharmony_ci      b->cursor = nir_before_instr(instr);
1139bf215546Sopenharmony_ci      bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
1140bf215546Sopenharmony_ci      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
1141bf215546Sopenharmony_ci      nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1142bf215546Sopenharmony_ci      /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1143bf215546Sopenharmony_ci      if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
1144bf215546Sopenharmony_ci         /* this is always scalarized */
1145bf215546Sopenharmony_ci         assert(intr->src[0].ssa->num_components == 1);
1146bf215546Sopenharmony_ci         nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
1147bf215546Sopenharmony_ci         for (unsigned i = 0; i < 2; i++)
1148bf215546Sopenharmony_ci            nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
1149bf215546Sopenharmony_ci         nir_instr_remove(instr);
1150bf215546Sopenharmony_ci      }
1151bf215546Sopenharmony_ci      return true;
1152bf215546Sopenharmony_ci   }
1153bf215546Sopenharmony_ci   default:
1154bf215546Sopenharmony_ci      break;
1155bf215546Sopenharmony_ci   }
1156bf215546Sopenharmony_ci   return false;
1157bf215546Sopenharmony_ci}
1158bf215546Sopenharmony_ci
1159bf215546Sopenharmony_cistatic bool
1160bf215546Sopenharmony_cirewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
1161bf215546Sopenharmony_ci{
1162bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
1163bf215546Sopenharmony_ci}
1164bf215546Sopenharmony_ci
1165bf215546Sopenharmony_cistatic nir_variable *
1166bf215546Sopenharmony_ciget_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
1167bf215546Sopenharmony_ci{
1168bf215546Sopenharmony_ci   nir_variable *var, **ptr;
1169bf215546Sopenharmony_ci   unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_ci   if (ssbo)
1172bf215546Sopenharmony_ci      ptr = &bo->ssbo[bit_size >> 4];
1173bf215546Sopenharmony_ci   else {
1174bf215546Sopenharmony_ci      if (!idx) {
1175bf215546Sopenharmony_ci         ptr = &bo->uniforms[bit_size >> 4];
1176bf215546Sopenharmony_ci      } else
1177bf215546Sopenharmony_ci         ptr = &bo->ubo[bit_size >> 4];
1178bf215546Sopenharmony_ci   }
1179bf215546Sopenharmony_ci   var = *ptr;
1180bf215546Sopenharmony_ci   if (!var) {
1181bf215546Sopenharmony_ci      if (ssbo)
1182bf215546Sopenharmony_ci         var = bo->ssbo[32 >> 4];
1183bf215546Sopenharmony_ci      else {
1184bf215546Sopenharmony_ci         if (!idx)
1185bf215546Sopenharmony_ci            var = bo->uniforms[32 >> 4];
1186bf215546Sopenharmony_ci         else
1187bf215546Sopenharmony_ci            var = bo->ubo[32 >> 4];
1188bf215546Sopenharmony_ci      }
1189bf215546Sopenharmony_ci      var = nir_variable_clone(var, shader);
1190bf215546Sopenharmony_ci      *ptr = var;
1191bf215546Sopenharmony_ci      nir_shader_add_variable(shader, var);
1192bf215546Sopenharmony_ci
1193bf215546Sopenharmony_ci      struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
1194bf215546Sopenharmony_ci      fields[0].name = ralloc_strdup(shader, "base");
1195bf215546Sopenharmony_ci      fields[1].name = ralloc_strdup(shader, "unsized");
1196bf215546Sopenharmony_ci      unsigned array_size = glsl_get_length(var->type);
1197bf215546Sopenharmony_ci      const struct glsl_type *bare_type = glsl_without_array(var->type);
1198bf215546Sopenharmony_ci      const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
1199bf215546Sopenharmony_ci      unsigned length = glsl_get_length(array_type);
1200bf215546Sopenharmony_ci      const struct glsl_type *type;
1201bf215546Sopenharmony_ci      const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
1202bf215546Sopenharmony_ci      if (bit_size > 32) {
1203bf215546Sopenharmony_ci         assert(bit_size == 64);
1204bf215546Sopenharmony_ci         type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
1205bf215546Sopenharmony_ci      } else {
1206bf215546Sopenharmony_ci         type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
1207bf215546Sopenharmony_ci      }
1208bf215546Sopenharmony_ci      fields[0].type = type;
1209bf215546Sopenharmony_ci      fields[1].type = unsized;
1210bf215546Sopenharmony_ci      var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
1211bf215546Sopenharmony_ci      var->data.driver_location = idx;
1212bf215546Sopenharmony_ci   }
1213bf215546Sopenharmony_ci   return var;
1214bf215546Sopenharmony_ci}
1215bf215546Sopenharmony_ci
1216bf215546Sopenharmony_cistatic void
1217bf215546Sopenharmony_cirewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
1218bf215546Sopenharmony_ci{
1219bf215546Sopenharmony_ci   nir_intrinsic_op op;
1220bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1221bf215546Sopenharmony_ci   switch (intr->intrinsic) {
1222bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_add:
1223bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_add;
1224bf215546Sopenharmony_ci      break;
1225bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umin:
1226bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_umin;
1227bf215546Sopenharmony_ci      break;
1228bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imin:
1229bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_imin;
1230bf215546Sopenharmony_ci      break;
1231bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umax:
1232bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_umax;
1233bf215546Sopenharmony_ci      break;
1234bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imax:
1235bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_imax;
1236bf215546Sopenharmony_ci      break;
1237bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_and:
1238bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_and;
1239bf215546Sopenharmony_ci      break;
1240bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_or:
1241bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_or;
1242bf215546Sopenharmony_ci      break;
1243bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_xor:
1244bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_xor;
1245bf215546Sopenharmony_ci      break;
1246bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_exchange:
1247bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_exchange;
1248bf215546Sopenharmony_ci      break;
1249bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_comp_swap:
1250bf215546Sopenharmony_ci      op = nir_intrinsic_deref_atomic_comp_swap;
1251bf215546Sopenharmony_ci      break;
1252bf215546Sopenharmony_ci   default:
1253bf215546Sopenharmony_ci      unreachable("unknown intrinsic");
1254bf215546Sopenharmony_ci   }
1255bf215546Sopenharmony_ci   nir_ssa_def *offset = intr->src[1].ssa;
1256bf215546Sopenharmony_ci   nir_src *src = &intr->src[0];
1257bf215546Sopenharmony_ci   nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
1258bf215546Sopenharmony_ci   nir_deref_instr *deref_var = nir_build_deref_var(b, var);
1259bf215546Sopenharmony_ci   nir_ssa_def *idx = src->ssa;
1260bf215546Sopenharmony_ci   if (bo->first_ssbo)
1261bf215546Sopenharmony_ci      idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
1262bf215546Sopenharmony_ci   nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
1263bf215546Sopenharmony_ci   nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
1264bf215546Sopenharmony_ci
1265bf215546Sopenharmony_ci   /* generate new atomic deref ops for every component */
1266bf215546Sopenharmony_ci   nir_ssa_def *result[4];
1267bf215546Sopenharmony_ci   unsigned num_components = nir_dest_num_components(intr->dest);
1268bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_components; i++) {
1269bf215546Sopenharmony_ci      nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1270bf215546Sopenharmony_ci      nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
1271bf215546Sopenharmony_ci      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
1272bf215546Sopenharmony_ci      new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
1273bf215546Sopenharmony_ci      /* deref ops have no offset src, so copy the srcs after it */
1274bf215546Sopenharmony_ci      for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
1275bf215546Sopenharmony_ci         nir_src_copy(&new_instr->src[i - 1], &intr->src[i]);
1276bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &new_instr->instr);
1277bf215546Sopenharmony_ci
1278bf215546Sopenharmony_ci      result[i] = &new_instr->dest.ssa;
1279bf215546Sopenharmony_ci      offset = nir_iadd_imm(b, offset, 1);
1280bf215546Sopenharmony_ci   }
1281bf215546Sopenharmony_ci
1282bf215546Sopenharmony_ci   nir_ssa_def *load = nir_vec(b, result, num_components);
1283bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1284bf215546Sopenharmony_ci   nir_instr_remove(instr);
1285bf215546Sopenharmony_ci}
1286bf215546Sopenharmony_ci
1287bf215546Sopenharmony_cistatic bool
1288bf215546Sopenharmony_ciremove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1289bf215546Sopenharmony_ci{
1290bf215546Sopenharmony_ci   struct bo_vars *bo = data;
1291bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
1292bf215546Sopenharmony_ci      return false;
1293bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1294bf215546Sopenharmony_ci   nir_variable *var = NULL;
1295bf215546Sopenharmony_ci   nir_ssa_def *offset = NULL;
1296bf215546Sopenharmony_ci   bool is_load = true;
1297bf215546Sopenharmony_ci   b->cursor = nir_before_instr(instr);
1298bf215546Sopenharmony_ci   nir_src *src;
1299bf215546Sopenharmony_ci   bool ssbo = true;
1300bf215546Sopenharmony_ci   switch (intr->intrinsic) {
1301bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_add:
1302bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umin:
1303bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imin:
1304bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umax:
1305bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imax:
1306bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_and:
1307bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_or:
1308bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_xor:
1309bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_exchange:
1310bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_comp_swap:
1311bf215546Sopenharmony_ci      rewrite_atomic_ssbo_instr(b, instr, bo);
1312bf215546Sopenharmony_ci      return true;
1313bf215546Sopenharmony_ci   case nir_intrinsic_store_ssbo:
1314bf215546Sopenharmony_ci      src = &intr->src[1];
1315bf215546Sopenharmony_ci      var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
1316bf215546Sopenharmony_ci      offset = intr->src[2].ssa;
1317bf215546Sopenharmony_ci      is_load = false;
1318bf215546Sopenharmony_ci      break;
1319bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo:
1320bf215546Sopenharmony_ci      src = &intr->src[0];
1321bf215546Sopenharmony_ci      var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
1322bf215546Sopenharmony_ci      offset = intr->src[1].ssa;
1323bf215546Sopenharmony_ci      break;
1324bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo:
1325bf215546Sopenharmony_ci      src = &intr->src[0];
1326bf215546Sopenharmony_ci      var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
1327bf215546Sopenharmony_ci      offset = intr->src[1].ssa;
1328bf215546Sopenharmony_ci      ssbo = false;
1329bf215546Sopenharmony_ci      break;
1330bf215546Sopenharmony_ci   default:
1331bf215546Sopenharmony_ci      return false;
1332bf215546Sopenharmony_ci   }
1333bf215546Sopenharmony_ci   assert(var);
1334bf215546Sopenharmony_ci   assert(offset);
1335bf215546Sopenharmony_ci   nir_deref_instr *deref_var = nir_build_deref_var(b, var);
1336bf215546Sopenharmony_ci   nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
1337bf215546Sopenharmony_ci   if (!ssbo && bo->first_ubo && var->data.driver_location)
1338bf215546Sopenharmony_ci      idx = nir_iadd_imm(b, idx, -bo->first_ubo);
1339bf215546Sopenharmony_ci   else if (ssbo && bo->first_ssbo)
1340bf215546Sopenharmony_ci      idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
1341bf215546Sopenharmony_ci   nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
1342bf215546Sopenharmony_ci   nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
1343bf215546Sopenharmony_ci   assert(intr->num_components <= 2);
1344bf215546Sopenharmony_ci   if (is_load) {
1345bf215546Sopenharmony_ci      nir_ssa_def *result[2];
1346bf215546Sopenharmony_ci      for (unsigned i = 0; i < intr->num_components; i++) {
1347bf215546Sopenharmony_ci         nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1348bf215546Sopenharmony_ci         result[i] = nir_load_deref(b, deref_arr);
1349bf215546Sopenharmony_ci         if (intr->intrinsic == nir_intrinsic_load_ssbo)
1350bf215546Sopenharmony_ci            nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
1351bf215546Sopenharmony_ci         offset = nir_iadd_imm(b, offset, 1);
1352bf215546Sopenharmony_ci      }
1353bf215546Sopenharmony_ci      nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1354bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1355bf215546Sopenharmony_ci   } else {
1356bf215546Sopenharmony_ci      nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1357bf215546Sopenharmony_ci      nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
1358bf215546Sopenharmony_ci   }
1359bf215546Sopenharmony_ci   nir_instr_remove(instr);
1360bf215546Sopenharmony_ci   return true;
1361bf215546Sopenharmony_ci}
1362bf215546Sopenharmony_ci
1363bf215546Sopenharmony_cistatic bool
1364bf215546Sopenharmony_ciremove_bo_access(nir_shader *shader, struct zink_shader *zs)
1365bf215546Sopenharmony_ci{
1366bf215546Sopenharmony_ci   struct bo_vars bo = get_bo_vars(zs, shader);
1367bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
1368bf215546Sopenharmony_ci}
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_cistatic void
1371bf215546Sopenharmony_ciassign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
1372bf215546Sopenharmony_ci{
1373bf215546Sopenharmony_ci   unsigned slot = var->data.location;
1374bf215546Sopenharmony_ci   switch (slot) {
1375bf215546Sopenharmony_ci   case -1:
1376bf215546Sopenharmony_ci   case VARYING_SLOT_POS:
1377bf215546Sopenharmony_ci   case VARYING_SLOT_PNTC:
1378bf215546Sopenharmony_ci   case VARYING_SLOT_PSIZ:
1379bf215546Sopenharmony_ci   case VARYING_SLOT_LAYER:
1380bf215546Sopenharmony_ci   case VARYING_SLOT_PRIMITIVE_ID:
1381bf215546Sopenharmony_ci   case VARYING_SLOT_CLIP_DIST0:
1382bf215546Sopenharmony_ci   case VARYING_SLOT_CULL_DIST0:
1383bf215546Sopenharmony_ci   case VARYING_SLOT_VIEWPORT:
1384bf215546Sopenharmony_ci   case VARYING_SLOT_FACE:
1385bf215546Sopenharmony_ci   case VARYING_SLOT_TESS_LEVEL_OUTER:
1386bf215546Sopenharmony_ci   case VARYING_SLOT_TESS_LEVEL_INNER:
1387bf215546Sopenharmony_ci      /* use a sentinel value to avoid counting later */
1388bf215546Sopenharmony_ci      var->data.driver_location = UINT_MAX;
1389bf215546Sopenharmony_ci      break;
1390bf215546Sopenharmony_ci
1391bf215546Sopenharmony_ci   default:
1392bf215546Sopenharmony_ci      if (var->data.patch) {
1393bf215546Sopenharmony_ci         assert(slot >= VARYING_SLOT_PATCH0);
1394bf215546Sopenharmony_ci         slot -= VARYING_SLOT_PATCH0;
1395bf215546Sopenharmony_ci      }
1396bf215546Sopenharmony_ci      if (slot_map[slot] == 0xff) {
1397bf215546Sopenharmony_ci         assert(*reserved < MAX_VARYING);
1398bf215546Sopenharmony_ci         unsigned num_slots;
1399bf215546Sopenharmony_ci         if (nir_is_arrayed_io(var, stage))
1400bf215546Sopenharmony_ci            num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
1401bf215546Sopenharmony_ci         else
1402bf215546Sopenharmony_ci            num_slots = glsl_count_vec4_slots(var->type, false, false);
1403bf215546Sopenharmony_ci         assert(*reserved + num_slots <= MAX_VARYING);
1404bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_slots; i++)
1405bf215546Sopenharmony_ci            slot_map[slot + i] = (*reserved)++;
1406bf215546Sopenharmony_ci      }
1407bf215546Sopenharmony_ci      slot = slot_map[slot];
1408bf215546Sopenharmony_ci      assert(slot < MAX_VARYING);
1409bf215546Sopenharmony_ci      var->data.driver_location = slot;
1410bf215546Sopenharmony_ci   }
1411bf215546Sopenharmony_ci}
1412bf215546Sopenharmony_ci
1413bf215546Sopenharmony_ciALWAYS_INLINE static bool
1414bf215546Sopenharmony_ciis_texcoord(gl_shader_stage stage, const nir_variable *var)
1415bf215546Sopenharmony_ci{
1416bf215546Sopenharmony_ci   if (stage != MESA_SHADER_FRAGMENT)
1417bf215546Sopenharmony_ci      return false;
1418bf215546Sopenharmony_ci   return var->data.location >= VARYING_SLOT_TEX0 &&
1419bf215546Sopenharmony_ci          var->data.location <= VARYING_SLOT_TEX7;
1420bf215546Sopenharmony_ci}
1421bf215546Sopenharmony_ci
1422bf215546Sopenharmony_cistatic bool
1423bf215546Sopenharmony_ciassign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
1424bf215546Sopenharmony_ci{
1425bf215546Sopenharmony_ci   unsigned slot = var->data.location;
1426bf215546Sopenharmony_ci   switch (slot) {
1427bf215546Sopenharmony_ci   case VARYING_SLOT_POS:
1428bf215546Sopenharmony_ci   case VARYING_SLOT_PNTC:
1429bf215546Sopenharmony_ci   case VARYING_SLOT_PSIZ:
1430bf215546Sopenharmony_ci   case VARYING_SLOT_LAYER:
1431bf215546Sopenharmony_ci   case VARYING_SLOT_PRIMITIVE_ID:
1432bf215546Sopenharmony_ci   case VARYING_SLOT_CLIP_DIST0:
1433bf215546Sopenharmony_ci   case VARYING_SLOT_CULL_DIST0:
1434bf215546Sopenharmony_ci   case VARYING_SLOT_VIEWPORT:
1435bf215546Sopenharmony_ci   case VARYING_SLOT_FACE:
1436bf215546Sopenharmony_ci   case VARYING_SLOT_TESS_LEVEL_OUTER:
1437bf215546Sopenharmony_ci   case VARYING_SLOT_TESS_LEVEL_INNER:
1438bf215546Sopenharmony_ci      /* use a sentinel value to avoid counting later */
1439bf215546Sopenharmony_ci      var->data.driver_location = UINT_MAX;
1440bf215546Sopenharmony_ci      break;
1441bf215546Sopenharmony_ci   default:
1442bf215546Sopenharmony_ci      if (var->data.patch) {
1443bf215546Sopenharmony_ci         assert(slot >= VARYING_SLOT_PATCH0);
1444bf215546Sopenharmony_ci         slot -= VARYING_SLOT_PATCH0;
1445bf215546Sopenharmony_ci      }
1446bf215546Sopenharmony_ci      if (slot_map[slot] == (unsigned char)-1) {
1447bf215546Sopenharmony_ci         if (stage != MESA_SHADER_TESS_CTRL && !is_texcoord(stage, var))
1448bf215546Sopenharmony_ci            /* dead io */
1449bf215546Sopenharmony_ci            return false;
1450bf215546Sopenharmony_ci         /* - texcoords can't be eliminated in fs due to GL_COORD_REPLACE
1451bf215546Sopenharmony_ci          * - patch variables may be read in the workgroup
1452bf215546Sopenharmony_ci          */
1453bf215546Sopenharmony_ci         slot_map[slot] = (*reserved)++;
1454bf215546Sopenharmony_ci      }
1455bf215546Sopenharmony_ci      var->data.driver_location = slot_map[slot];
1456bf215546Sopenharmony_ci   }
1457bf215546Sopenharmony_ci   return true;
1458bf215546Sopenharmony_ci}
1459bf215546Sopenharmony_ci
1460bf215546Sopenharmony_ci
1461bf215546Sopenharmony_cistatic bool
1462bf215546Sopenharmony_cirewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
1463bf215546Sopenharmony_ci{
1464bf215546Sopenharmony_ci   nir_variable *var = data;
1465bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
1466bf215546Sopenharmony_ci      return false;
1467bf215546Sopenharmony_ci
1468bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1469bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_deref)
1470bf215546Sopenharmony_ci      return false;
1471bf215546Sopenharmony_ci   nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
1472bf215546Sopenharmony_ci   if (deref_var != var)
1473bf215546Sopenharmony_ci      return false;
1474bf215546Sopenharmony_ci   nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
1475bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
1476bf215546Sopenharmony_ci   return true;
1477bf215546Sopenharmony_ci}
1478bf215546Sopenharmony_ci
1479bf215546Sopenharmony_civoid
1480bf215546Sopenharmony_cizink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
1481bf215546Sopenharmony_ci{
1482bf215546Sopenharmony_ci   unsigned reserved = 0;
1483bf215546Sopenharmony_ci   unsigned char slot_map[VARYING_SLOT_MAX];
1484bf215546Sopenharmony_ci   memset(slot_map, -1, sizeof(slot_map));
1485bf215546Sopenharmony_ci   bool do_fixup = false;
1486bf215546Sopenharmony_ci   nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
1487bf215546Sopenharmony_ci   if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
1488bf215546Sopenharmony_ci      /* remove injected pointsize from all but the last vertex stage */
1489bf215546Sopenharmony_ci      nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
1490bf215546Sopenharmony_ci      if (var && !var->data.explicit_location) {
1491bf215546Sopenharmony_ci         var->data.mode = nir_var_shader_temp;
1492bf215546Sopenharmony_ci         nir_fixup_deref_modes(producer);
1493bf215546Sopenharmony_ci         NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1494bf215546Sopenharmony_ci         optimize_nir(producer, NULL);
1495bf215546Sopenharmony_ci      }
1496bf215546Sopenharmony_ci   }
1497bf215546Sopenharmony_ci   if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
1498bf215546Sopenharmony_ci      /* never assign from tcs -> tes, always invert */
1499bf215546Sopenharmony_ci      nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
1500bf215546Sopenharmony_ci         assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
1501bf215546Sopenharmony_ci      nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
1502bf215546Sopenharmony_ci         if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
1503bf215546Sopenharmony_ci            /* this is an output, nothing more needs to be done for it to be dropped */
1504bf215546Sopenharmony_ci            do_fixup = true;
1505bf215546Sopenharmony_ci      }
1506bf215546Sopenharmony_ci   } else {
1507bf215546Sopenharmony_ci      nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
1508bf215546Sopenharmony_ci         assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
1509bf215546Sopenharmony_ci      nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
1510bf215546Sopenharmony_ci         if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
1511bf215546Sopenharmony_ci            do_fixup = true;
1512bf215546Sopenharmony_ci            /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
1513bf215546Sopenharmony_ci            nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
1514bf215546Sopenharmony_ci         }
1515bf215546Sopenharmony_ci      }
1516bf215546Sopenharmony_ci   }
1517bf215546Sopenharmony_ci   if (!do_fixup)
1518bf215546Sopenharmony_ci      return;
1519bf215546Sopenharmony_ci   nir_fixup_deref_modes(nir);
1520bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1521bf215546Sopenharmony_ci   optimize_nir(nir, NULL);
1522bf215546Sopenharmony_ci}
1523bf215546Sopenharmony_ci
1524bf215546Sopenharmony_ci/* all types that hit this function contain something that is 64bit */
1525bf215546Sopenharmony_cistatic const struct glsl_type *
1526bf215546Sopenharmony_cirewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var)
1527bf215546Sopenharmony_ci{
1528bf215546Sopenharmony_ci   if (glsl_type_is_array(type)) {
1529bf215546Sopenharmony_ci      const struct glsl_type *child = glsl_get_array_element(type);
1530bf215546Sopenharmony_ci      unsigned elements = glsl_array_size(type);
1531bf215546Sopenharmony_ci      unsigned stride = glsl_get_explicit_stride(type);
1532bf215546Sopenharmony_ci      return glsl_array_type(rewrite_64bit_type(nir, child, var), elements, stride);
1533bf215546Sopenharmony_ci   }
1534bf215546Sopenharmony_ci   /* rewrite structs recursively */
1535bf215546Sopenharmony_ci   if (glsl_type_is_struct_or_ifc(type)) {
1536bf215546Sopenharmony_ci      unsigned nmembers = glsl_get_length(type);
1537bf215546Sopenharmony_ci      struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
1538bf215546Sopenharmony_ci      unsigned xfb_offset = 0;
1539bf215546Sopenharmony_ci      for (unsigned i = 0; i < nmembers; i++) {
1540bf215546Sopenharmony_ci         const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
1541bf215546Sopenharmony_ci         fields[i] = *f;
1542bf215546Sopenharmony_ci         xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
1543bf215546Sopenharmony_ci         if (i < nmembers - 1 && xfb_offset % 8 &&
1544bf215546Sopenharmony_ci             glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1))) {
1545bf215546Sopenharmony_ci            var->data.is_xfb = true;
1546bf215546Sopenharmony_ci         }
1547bf215546Sopenharmony_ci         fields[i].type = rewrite_64bit_type(nir, f->type, var);
1548bf215546Sopenharmony_ci      }
1549bf215546Sopenharmony_ci      return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
1550bf215546Sopenharmony_ci   }
1551bf215546Sopenharmony_ci   if (!glsl_type_is_64bit(type))
1552bf215546Sopenharmony_ci      return type;
1553bf215546Sopenharmony_ci   enum glsl_base_type base_type;
1554bf215546Sopenharmony_ci   switch (glsl_get_base_type(type)) {
1555bf215546Sopenharmony_ci   case GLSL_TYPE_UINT64:
1556bf215546Sopenharmony_ci      base_type = GLSL_TYPE_UINT;
1557bf215546Sopenharmony_ci      break;
1558bf215546Sopenharmony_ci   case GLSL_TYPE_INT64:
1559bf215546Sopenharmony_ci      base_type = GLSL_TYPE_INT;
1560bf215546Sopenharmony_ci      break;
1561bf215546Sopenharmony_ci   case GLSL_TYPE_DOUBLE:
1562bf215546Sopenharmony_ci      base_type = GLSL_TYPE_FLOAT;
1563bf215546Sopenharmony_ci      break;
1564bf215546Sopenharmony_ci   default:
1565bf215546Sopenharmony_ci      unreachable("unknown 64-bit vertex attribute format!");
1566bf215546Sopenharmony_ci   }
1567bf215546Sopenharmony_ci   if (glsl_type_is_scalar(type))
1568bf215546Sopenharmony_ci      return glsl_vector_type(base_type, 2);
1569bf215546Sopenharmony_ci   unsigned num_components;
1570bf215546Sopenharmony_ci   if (glsl_type_is_matrix(type)) {
1571bf215546Sopenharmony_ci      /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
1572bf215546Sopenharmony_ci      unsigned vec_components = glsl_get_vector_elements(type);
1573bf215546Sopenharmony_ci      if (vec_components == 3)
1574bf215546Sopenharmony_ci         vec_components = 4;
1575bf215546Sopenharmony_ci      num_components = vec_components * 2 * glsl_get_matrix_columns(type);
1576bf215546Sopenharmony_ci   } else {
1577bf215546Sopenharmony_ci      num_components = glsl_get_vector_elements(type) * 2;
1578bf215546Sopenharmony_ci      if (num_components <= 4)
1579bf215546Sopenharmony_ci         return glsl_vector_type(base_type, num_components);
1580bf215546Sopenharmony_ci   }
1581bf215546Sopenharmony_ci   /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
1582bf215546Sopenharmony_ci   struct glsl_struct_field fields[8] = {0};
1583bf215546Sopenharmony_ci   unsigned remaining = num_components;
1584bf215546Sopenharmony_ci   unsigned nfields = 0;
1585bf215546Sopenharmony_ci   for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
1586bf215546Sopenharmony_ci      assert(i < ARRAY_SIZE(fields));
1587bf215546Sopenharmony_ci      fields[i].name = "";
1588bf215546Sopenharmony_ci      fields[i].offset = i * 16;
1589bf215546Sopenharmony_ci      fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
1590bf215546Sopenharmony_ci   }
1591bf215546Sopenharmony_ci   char buf[64];
1592bf215546Sopenharmony_ci   snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
1593bf215546Sopenharmony_ci   return glsl_struct_type(fields, nfields, buf, true);
1594bf215546Sopenharmony_ci}
1595bf215546Sopenharmony_ci
1596bf215546Sopenharmony_cistatic const struct glsl_type *
1597bf215546Sopenharmony_cideref_is_matrix(nir_deref_instr *deref)
1598bf215546Sopenharmony_ci{
1599bf215546Sopenharmony_ci   if (glsl_type_is_matrix(deref->type))
1600bf215546Sopenharmony_ci      return deref->type;
1601bf215546Sopenharmony_ci   nir_deref_instr *parent = nir_deref_instr_parent(deref);
1602bf215546Sopenharmony_ci   if (parent)
1603bf215546Sopenharmony_ci      return deref_is_matrix(parent);
1604bf215546Sopenharmony_ci   return NULL;
1605bf215546Sopenharmony_ci}
1606bf215546Sopenharmony_ci
1607bf215546Sopenharmony_ci/* rewrite all input/output variables using 32bit types and load/stores */
1608bf215546Sopenharmony_cistatic bool
1609bf215546Sopenharmony_cilower_64bit_vars(nir_shader *shader)
1610bf215546Sopenharmony_ci{
1611bf215546Sopenharmony_ci   bool progress = false;
1612bf215546Sopenharmony_ci   struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
1613bf215546Sopenharmony_ci   struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
1614bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out) {
1615bf215546Sopenharmony_ci      if (!glsl_type_contains_64bit(var->type))
1616bf215546Sopenharmony_ci         continue;
1617bf215546Sopenharmony_ci      var->type = rewrite_64bit_type(shader, var->type, var);
1618bf215546Sopenharmony_ci      /* once type is rewritten, rewrite all loads and stores */
1619bf215546Sopenharmony_ci      nir_foreach_function(function, shader) {
1620bf215546Sopenharmony_ci         bool func_progress = false;
1621bf215546Sopenharmony_ci         if (!function->impl)
1622bf215546Sopenharmony_ci            continue;
1623bf215546Sopenharmony_ci         nir_builder b;
1624bf215546Sopenharmony_ci         nir_builder_init(&b, function->impl);
1625bf215546Sopenharmony_ci         nir_foreach_block(block, function->impl) {
1626bf215546Sopenharmony_ci            nir_foreach_instr_safe(instr, block) {
1627bf215546Sopenharmony_ci               switch (instr->type) {
1628bf215546Sopenharmony_ci               case nir_instr_type_deref: {
1629bf215546Sopenharmony_ci                  nir_deref_instr *deref = nir_instr_as_deref(instr);
1630bf215546Sopenharmony_ci                  if (!(deref->modes & (nir_var_shader_in | nir_var_shader_out)))
1631bf215546Sopenharmony_ci                     continue;
1632bf215546Sopenharmony_ci                  if (nir_deref_instr_get_variable(deref) != var)
1633bf215546Sopenharmony_ci                     continue;
1634bf215546Sopenharmony_ci
1635bf215546Sopenharmony_ci                  /* matrix types are special: store the original deref type for later use */
1636bf215546Sopenharmony_ci                  const struct glsl_type *matrix = deref_is_matrix(deref);
1637bf215546Sopenharmony_ci                  nir_deref_instr *parent = nir_deref_instr_parent(deref);
1638bf215546Sopenharmony_ci                  if (!matrix) {
1639bf215546Sopenharmony_ci                     /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
1640bf215546Sopenharmony_ci                     hash_table_foreach(derefs, he) {
1641bf215546Sopenharmony_ci                        /* propagate parent matrix type to row deref */
1642bf215546Sopenharmony_ci                        if (he->key == parent)
1643bf215546Sopenharmony_ci                           matrix = he->data;
1644bf215546Sopenharmony_ci                     }
1645bf215546Sopenharmony_ci                  }
1646bf215546Sopenharmony_ci                  if (matrix)
1647bf215546Sopenharmony_ci                     _mesa_hash_table_insert(derefs, deref, (void*)matrix);
1648bf215546Sopenharmony_ci                  if (deref->deref_type == nir_deref_type_var)
1649bf215546Sopenharmony_ci                     deref->type = var->type;
1650bf215546Sopenharmony_ci                  else
1651bf215546Sopenharmony_ci                     deref->type = rewrite_64bit_type(shader, deref->type, var);
1652bf215546Sopenharmony_ci               }
1653bf215546Sopenharmony_ci               break;
1654bf215546Sopenharmony_ci               case nir_instr_type_intrinsic: {
1655bf215546Sopenharmony_ci                  nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1656bf215546Sopenharmony_ci                  if (intr->intrinsic != nir_intrinsic_store_deref &&
1657bf215546Sopenharmony_ci                      intr->intrinsic != nir_intrinsic_load_deref)
1658bf215546Sopenharmony_ci                     break;
1659bf215546Sopenharmony_ci                  if (nir_intrinsic_get_var(intr, 0) != var)
1660bf215546Sopenharmony_ci                     break;
1661bf215546Sopenharmony_ci                  if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
1662bf215546Sopenharmony_ci                      (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
1663bf215546Sopenharmony_ci                     break;
1664bf215546Sopenharmony_ci                  b.cursor = nir_before_instr(instr);
1665bf215546Sopenharmony_ci                  nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1666bf215546Sopenharmony_ci                  unsigned num_components = intr->num_components * 2;
1667bf215546Sopenharmony_ci                  nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
1668bf215546Sopenharmony_ci                  /* this is the stored matrix type from the deref */
1669bf215546Sopenharmony_ci                  struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
1670bf215546Sopenharmony_ci                  const struct glsl_type *matrix = he ? he->data : NULL;
1671bf215546Sopenharmony_ci                  func_progress = true;
1672bf215546Sopenharmony_ci                  if (intr->intrinsic == nir_intrinsic_store_deref) {
1673bf215546Sopenharmony_ci                     /* first, unpack the src data to 32bit vec2 components */
1674bf215546Sopenharmony_ci                     for (unsigned i = 0; i < intr->num_components; i++) {
1675bf215546Sopenharmony_ci                        nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
1676bf215546Sopenharmony_ci                        comp[i * 2] = nir_channel(&b, ssa, 0);
1677bf215546Sopenharmony_ci                        comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
1678bf215546Sopenharmony_ci                     }
1679bf215546Sopenharmony_ci                     unsigned wrmask = nir_intrinsic_write_mask(intr);
1680bf215546Sopenharmony_ci                     unsigned mask = 0;
1681bf215546Sopenharmony_ci                     /* expand writemask for doubled components */
1682bf215546Sopenharmony_ci                     for (unsigned i = 0; i < intr->num_components; i++) {
1683bf215546Sopenharmony_ci                        if (wrmask & BITFIELD_BIT(i))
1684bf215546Sopenharmony_ci                           mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
1685bf215546Sopenharmony_ci                     }
1686bf215546Sopenharmony_ci                     if (matrix) {
1687bf215546Sopenharmony_ci                        /* matrix types always come from array (row) derefs */
1688bf215546Sopenharmony_ci                        assert(deref->deref_type == nir_deref_type_array);
1689bf215546Sopenharmony_ci                        nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
1690bf215546Sopenharmony_ci                        /* let optimization clean up consts later */
1691bf215546Sopenharmony_ci                        nir_ssa_def *index = deref->arr.index.ssa;
1692bf215546Sopenharmony_ci                        /* this might be an indirect array index:
1693bf215546Sopenharmony_ci                         * - iterate over matrix columns
1694bf215546Sopenharmony_ci                         * - add if blocks for each column
1695bf215546Sopenharmony_ci                         * - perform the store in the block
1696bf215546Sopenharmony_ci                         */
1697bf215546Sopenharmony_ci                        for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
1698bf215546Sopenharmony_ci                           nir_push_if(&b, nir_ieq_imm(&b, index, idx));
1699bf215546Sopenharmony_ci                           unsigned vec_components = glsl_get_vector_elements(matrix);
1700bf215546Sopenharmony_ci                           /* always clamp dvec3 to 4 components */
1701bf215546Sopenharmony_ci                           if (vec_components == 3)
1702bf215546Sopenharmony_ci                              vec_components = 4;
1703bf215546Sopenharmony_ci                           unsigned start_component = idx * vec_components * 2;
1704bf215546Sopenharmony_ci                           /* struct member */
1705bf215546Sopenharmony_ci                           unsigned member = start_component / 4;
1706bf215546Sopenharmony_ci                           /* number of components remaining */
1707bf215546Sopenharmony_ci                           unsigned remaining = num_components;
1708bf215546Sopenharmony_ci                           for (unsigned i = 0; i < num_components; member++) {
1709bf215546Sopenharmony_ci                              if (!(mask & BITFIELD_BIT(i)))
1710bf215546Sopenharmony_ci                                 continue;
1711bf215546Sopenharmony_ci                              assert(member < glsl_get_length(var_deref->type));
1712bf215546Sopenharmony_ci                              /* deref the rewritten struct to the appropriate vec4/vec2 */
1713bf215546Sopenharmony_ci                              nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
1714bf215546Sopenharmony_ci                              unsigned incr = MIN2(remaining, 4);
1715bf215546Sopenharmony_ci                              /* assemble the write component vec */
1716bf215546Sopenharmony_ci                              nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
1717bf215546Sopenharmony_ci                              /* use the number of components being written as the writemask */
1718bf215546Sopenharmony_ci                              if (glsl_get_vector_elements(strct->type) > val->num_components)
1719bf215546Sopenharmony_ci                                 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
1720bf215546Sopenharmony_ci                              nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
1721bf215546Sopenharmony_ci                              remaining -= incr;
1722bf215546Sopenharmony_ci                              i += incr;
1723bf215546Sopenharmony_ci                           }
1724bf215546Sopenharmony_ci                           nir_pop_if(&b, NULL);
1725bf215546Sopenharmony_ci                        }
1726bf215546Sopenharmony_ci                        _mesa_set_add(deletes, &deref->instr);
1727bf215546Sopenharmony_ci                     } else if (num_components <= 4) {
1728bf215546Sopenharmony_ci                        /* simple store case: just write out the components */
1729bf215546Sopenharmony_ci                        nir_ssa_def *dest = nir_vec(&b, comp, num_components);
1730bf215546Sopenharmony_ci                        nir_store_deref(&b, deref, dest, mask);
1731bf215546Sopenharmony_ci                     } else {
1732bf215546Sopenharmony_ci                        /* writing > 4 components: access the struct and write to the appropriate vec4 members */
1733bf215546Sopenharmony_ci                        for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
1734bf215546Sopenharmony_ci                           if (!(mask & BITFIELD_MASK(4)))
1735bf215546Sopenharmony_ci                              continue;
1736bf215546Sopenharmony_ci                           nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
1737bf215546Sopenharmony_ci                           nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
1738bf215546Sopenharmony_ci                           if (glsl_get_vector_elements(strct->type) > dest->num_components)
1739bf215546Sopenharmony_ci                              dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
1740bf215546Sopenharmony_ci                           nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
1741bf215546Sopenharmony_ci                           mask >>= 4;
1742bf215546Sopenharmony_ci                        }
1743bf215546Sopenharmony_ci                     }
1744bf215546Sopenharmony_ci                  } else {
1745bf215546Sopenharmony_ci                     nir_ssa_def *dest = NULL;
1746bf215546Sopenharmony_ci                     if (matrix) {
1747bf215546Sopenharmony_ci                        /* matrix types always come from array (row) derefs */
1748bf215546Sopenharmony_ci                        assert(deref->deref_type == nir_deref_type_array);
1749bf215546Sopenharmony_ci                        nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
1750bf215546Sopenharmony_ci                        /* let optimization clean up consts later */
1751bf215546Sopenharmony_ci                        nir_ssa_def *index = deref->arr.index.ssa;
1752bf215546Sopenharmony_ci                        /* this might be an indirect array index:
1753bf215546Sopenharmony_ci                         * - iterate over matrix columns
1754bf215546Sopenharmony_ci                         * - add if blocks for each column
1755bf215546Sopenharmony_ci                         * - phi the loads using the array index
1756bf215546Sopenharmony_ci                         */
1757bf215546Sopenharmony_ci                        unsigned cols = glsl_get_matrix_columns(matrix);
1758bf215546Sopenharmony_ci                        nir_ssa_def *dests[4];
1759bf215546Sopenharmony_ci                        for (unsigned idx = 0; idx < cols; idx++) {
1760bf215546Sopenharmony_ci                           /* don't add an if for the final row: this will be handled in the else */
1761bf215546Sopenharmony_ci                           if (idx < cols - 1)
1762bf215546Sopenharmony_ci                              nir_push_if(&b, nir_ieq_imm(&b, index, idx));
1763bf215546Sopenharmony_ci                           unsigned vec_components = glsl_get_vector_elements(matrix);
1764bf215546Sopenharmony_ci                           /* always clamp dvec3 to 4 components */
1765bf215546Sopenharmony_ci                           if (vec_components == 3)
1766bf215546Sopenharmony_ci                              vec_components = 4;
1767bf215546Sopenharmony_ci                           unsigned start_component = idx * vec_components * 2;
1768bf215546Sopenharmony_ci                           /* struct member */
1769bf215546Sopenharmony_ci                           unsigned member = start_component / 4;
1770bf215546Sopenharmony_ci                           /* number of components remaining */
1771bf215546Sopenharmony_ci                           unsigned remaining = num_components;
1772bf215546Sopenharmony_ci                           /* component index */
1773bf215546Sopenharmony_ci                           unsigned comp_idx = 0;
1774bf215546Sopenharmony_ci                           for (unsigned i = 0; i < num_components; member++) {
1775bf215546Sopenharmony_ci                              assert(member < glsl_get_length(var_deref->type));
1776bf215546Sopenharmony_ci                              nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
1777bf215546Sopenharmony_ci                              nir_ssa_def *load = nir_load_deref(&b, strct);
1778bf215546Sopenharmony_ci                              unsigned incr = MIN2(remaining, 4);
1779bf215546Sopenharmony_ci                              /* repack the loads to 64bit */
1780bf215546Sopenharmony_ci                              for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
1781bf215546Sopenharmony_ci                                 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
1782bf215546Sopenharmony_ci                              remaining -= incr;
1783bf215546Sopenharmony_ci                              i += incr;
1784bf215546Sopenharmony_ci                           }
1785bf215546Sopenharmony_ci                           dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
1786bf215546Sopenharmony_ci                           if (idx < cols - 1)
1787bf215546Sopenharmony_ci                              nir_push_else(&b, NULL);
1788bf215546Sopenharmony_ci                        }
1789bf215546Sopenharmony_ci                        /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
1790bf215546Sopenharmony_ci                        for (unsigned idx = cols - 1; idx >= 1; idx--) {
1791bf215546Sopenharmony_ci                           nir_pop_if(&b, NULL);
1792bf215546Sopenharmony_ci                           dest = nir_if_phi(&b, dests[idx - 1], dest);
1793bf215546Sopenharmony_ci                        }
1794bf215546Sopenharmony_ci                        _mesa_set_add(deletes, &deref->instr);
1795bf215546Sopenharmony_ci                     } else if (num_components <= 4) {
1796bf215546Sopenharmony_ci                        /* simple load case */
1797bf215546Sopenharmony_ci                        nir_ssa_def *load = nir_load_deref(&b, deref);
1798bf215546Sopenharmony_ci                        /* pack 32bit loads into 64bit: this will automagically get optimized out later */
1799bf215546Sopenharmony_ci                        for (unsigned i = 0; i < intr->num_components; i++) {
1800bf215546Sopenharmony_ci                           comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
1801bf215546Sopenharmony_ci                        }
1802bf215546Sopenharmony_ci                        dest = nir_vec(&b, comp, intr->num_components);
1803bf215546Sopenharmony_ci                     } else {
1804bf215546Sopenharmony_ci                        /* writing > 4 components: access the struct and load the appropriate vec4 members */
1805bf215546Sopenharmony_ci                        for (unsigned i = 0; i < 2; i++, num_components -= 4) {
1806bf215546Sopenharmony_ci                           nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
1807bf215546Sopenharmony_ci                           nir_ssa_def *load = nir_load_deref(&b, strct);
1808bf215546Sopenharmony_ci                           comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
1809bf215546Sopenharmony_ci                           if (num_components > 2)
1810bf215546Sopenharmony_ci                              comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
1811bf215546Sopenharmony_ci                        }
1812bf215546Sopenharmony_ci                        dest = nir_vec(&b, comp, intr->num_components);
1813bf215546Sopenharmony_ci                     }
1814bf215546Sopenharmony_ci                     nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
1815bf215546Sopenharmony_ci                  }
1816bf215546Sopenharmony_ci                  _mesa_set_add(deletes, instr);
1817bf215546Sopenharmony_ci                  break;
1818bf215546Sopenharmony_ci               }
1819bf215546Sopenharmony_ci               break;
1820bf215546Sopenharmony_ci               default: break;
1821bf215546Sopenharmony_ci               }
1822bf215546Sopenharmony_ci            }
1823bf215546Sopenharmony_ci         }
1824bf215546Sopenharmony_ci         if (func_progress)
1825bf215546Sopenharmony_ci            nir_metadata_preserve(function->impl, nir_metadata_none);
1826bf215546Sopenharmony_ci         /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
1827bf215546Sopenharmony_ci         set_foreach_remove(deletes, he)
1828bf215546Sopenharmony_ci            nir_instr_remove((void*)he->key);
1829bf215546Sopenharmony_ci      }
1830bf215546Sopenharmony_ci      progress = true;
1831bf215546Sopenharmony_ci   }
1832bf215546Sopenharmony_ci   ralloc_free(deletes);
1833bf215546Sopenharmony_ci   ralloc_free(derefs);
1834bf215546Sopenharmony_ci   if (progress) {
1835bf215546Sopenharmony_ci      nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
1836bf215546Sopenharmony_ci      nir_lower_phis_to_scalar(shader, false);
1837bf215546Sopenharmony_ci      optimize_nir(shader, NULL);
1838bf215546Sopenharmony_ci   }
1839bf215546Sopenharmony_ci   return progress;
1840bf215546Sopenharmony_ci}
1841bf215546Sopenharmony_ci
1842bf215546Sopenharmony_cistatic bool
1843bf215546Sopenharmony_cisplit_blocks(nir_shader *nir)
1844bf215546Sopenharmony_ci{
1845bf215546Sopenharmony_ci   bool progress = false;
1846bf215546Sopenharmony_ci   bool changed = true;
1847bf215546Sopenharmony_ci   do {
1848bf215546Sopenharmony_ci      progress = false;
1849bf215546Sopenharmony_ci      nir_foreach_shader_out_variable(var, nir) {
1850bf215546Sopenharmony_ci         const struct glsl_type *base_type = glsl_without_array(var->type);
1851bf215546Sopenharmony_ci         nir_variable *members[32]; //can't have more than this without breaking NIR
1852bf215546Sopenharmony_ci         if (!glsl_type_is_struct(base_type))
1853bf215546Sopenharmony_ci            continue;
1854bf215546Sopenharmony_ci         /* TODO: arrays? */
1855bf215546Sopenharmony_ci         if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
1856bf215546Sopenharmony_ci            continue;
1857bf215546Sopenharmony_ci         if (glsl_count_attribute_slots(var->type, false) == 1)
1858bf215546Sopenharmony_ci            continue;
1859bf215546Sopenharmony_ci         unsigned offset = 0;
1860bf215546Sopenharmony_ci         for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
1861bf215546Sopenharmony_ci            members[i] = nir_variable_clone(var, nir);
1862bf215546Sopenharmony_ci            members[i]->type = glsl_get_struct_field(var->type, i);
1863bf215546Sopenharmony_ci            members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
1864bf215546Sopenharmony_ci            members[i]->data.location += offset;
1865bf215546Sopenharmony_ci            offset += glsl_count_attribute_slots(members[i]->type, false);
1866bf215546Sopenharmony_ci            nir_shader_add_variable(nir, members[i]);
1867bf215546Sopenharmony_ci         }
1868bf215546Sopenharmony_ci         nir_foreach_function(function, nir) {
1869bf215546Sopenharmony_ci            bool func_progress = false;
1870bf215546Sopenharmony_ci            if (!function->impl)
1871bf215546Sopenharmony_ci               continue;
1872bf215546Sopenharmony_ci            nir_builder b;
1873bf215546Sopenharmony_ci            nir_builder_init(&b, function->impl);
1874bf215546Sopenharmony_ci            nir_foreach_block(block, function->impl) {
1875bf215546Sopenharmony_ci               nir_foreach_instr_safe(instr, block) {
1876bf215546Sopenharmony_ci                  switch (instr->type) {
1877bf215546Sopenharmony_ci                  case nir_instr_type_deref: {
1878bf215546Sopenharmony_ci                  nir_deref_instr *deref = nir_instr_as_deref(instr);
1879bf215546Sopenharmony_ci                  if (!(deref->modes & nir_var_shader_out))
1880bf215546Sopenharmony_ci                     continue;
1881bf215546Sopenharmony_ci                  if (nir_deref_instr_get_variable(deref) != var)
1882bf215546Sopenharmony_ci                     continue;
1883bf215546Sopenharmony_ci                  if (deref->deref_type != nir_deref_type_struct)
1884bf215546Sopenharmony_ci                     continue;
1885bf215546Sopenharmony_ci                  nir_deref_instr *parent = nir_deref_instr_parent(deref);
1886bf215546Sopenharmony_ci                  if (parent->deref_type != nir_deref_type_var)
1887bf215546Sopenharmony_ci                     continue;
1888bf215546Sopenharmony_ci                  deref->modes = nir_var_shader_temp;
1889bf215546Sopenharmony_ci                  parent->modes = nir_var_shader_temp;
1890bf215546Sopenharmony_ci                  b.cursor = nir_before_instr(instr);
1891bf215546Sopenharmony_ci                  nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
1892bf215546Sopenharmony_ci                  nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
1893bf215546Sopenharmony_ci                  nir_instr_remove(&deref->instr);
1894bf215546Sopenharmony_ci                  func_progress = true;
1895bf215546Sopenharmony_ci                  break;
1896bf215546Sopenharmony_ci                  }
1897bf215546Sopenharmony_ci                  default: break;
1898bf215546Sopenharmony_ci                  }
1899bf215546Sopenharmony_ci               }
1900bf215546Sopenharmony_ci            }
1901bf215546Sopenharmony_ci            if (func_progress)
1902bf215546Sopenharmony_ci               nir_metadata_preserve(function->impl, nir_metadata_none);
1903bf215546Sopenharmony_ci         }
1904bf215546Sopenharmony_ci         var->data.mode = nir_var_shader_temp;
1905bf215546Sopenharmony_ci         changed = true;
1906bf215546Sopenharmony_ci         progress = true;
1907bf215546Sopenharmony_ci      }
1908bf215546Sopenharmony_ci   } while (progress);
1909bf215546Sopenharmony_ci   return changed;
1910bf215546Sopenharmony_ci}
1911bf215546Sopenharmony_ci
1912bf215546Sopenharmony_cistatic void
1913bf215546Sopenharmony_cizink_shader_dump(void *words, size_t size, const char *file)
1914bf215546Sopenharmony_ci{
1915bf215546Sopenharmony_ci   FILE *fp = fopen(file, "wb");
1916bf215546Sopenharmony_ci   if (fp) {
1917bf215546Sopenharmony_ci      fwrite(words, 1, size, fp);
1918bf215546Sopenharmony_ci      fclose(fp);
1919bf215546Sopenharmony_ci      fprintf(stderr, "wrote '%s'...\n", file);
1920bf215546Sopenharmony_ci   }
1921bf215546Sopenharmony_ci}
1922bf215546Sopenharmony_ci
1923bf215546Sopenharmony_ciVkShaderModule
1924bf215546Sopenharmony_cizink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv)
1925bf215546Sopenharmony_ci{
1926bf215546Sopenharmony_ci   VkShaderModule mod;
1927bf215546Sopenharmony_ci   VkShaderModuleCreateInfo smci = {0};
1928bf215546Sopenharmony_ci
1929bf215546Sopenharmony_ci   if (!spirv)
1930bf215546Sopenharmony_ci      spirv = zs->spirv;
1931bf215546Sopenharmony_ci
1932bf215546Sopenharmony_ci   if (zink_debug & ZINK_DEBUG_SPIRV) {
1933bf215546Sopenharmony_ci      char buf[256];
1934bf215546Sopenharmony_ci      static int i;
1935bf215546Sopenharmony_ci      snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
1936bf215546Sopenharmony_ci      zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
1937bf215546Sopenharmony_ci   }
1938bf215546Sopenharmony_ci
1939bf215546Sopenharmony_ci   smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
1940bf215546Sopenharmony_ci   smci.codeSize = spirv->num_words * sizeof(uint32_t);
1941bf215546Sopenharmony_ci   smci.pCode = spirv->words;
1942bf215546Sopenharmony_ci
1943bf215546Sopenharmony_ci#ifndef NDEBUG
1944bf215546Sopenharmony_ci   if (zink_debug & ZINK_DEBUG_VALIDATION) {
1945bf215546Sopenharmony_ci      static const struct spirv_to_nir_options spirv_options = {
1946bf215546Sopenharmony_ci         .environment = NIR_SPIRV_VULKAN,
1947bf215546Sopenharmony_ci         .caps = {
1948bf215546Sopenharmony_ci            .float64 = true,
1949bf215546Sopenharmony_ci            .int16 = true,
1950bf215546Sopenharmony_ci            .int64 = true,
1951bf215546Sopenharmony_ci            .tessellation = true,
1952bf215546Sopenharmony_ci            .float_controls = true,
1953bf215546Sopenharmony_ci            .image_ms_array = true,
1954bf215546Sopenharmony_ci            .image_read_without_format = true,
1955bf215546Sopenharmony_ci            .image_write_without_format = true,
1956bf215546Sopenharmony_ci            .storage_image_ms = true,
1957bf215546Sopenharmony_ci            .geometry_streams = true,
1958bf215546Sopenharmony_ci            .storage_8bit = true,
1959bf215546Sopenharmony_ci            .storage_16bit = true,
1960bf215546Sopenharmony_ci            .variable_pointers = true,
1961bf215546Sopenharmony_ci            .stencil_export = true,
1962bf215546Sopenharmony_ci            .post_depth_coverage = true,
1963bf215546Sopenharmony_ci            .transform_feedback = true,
1964bf215546Sopenharmony_ci            .device_group = true,
1965bf215546Sopenharmony_ci            .draw_parameters = true,
1966bf215546Sopenharmony_ci            .shader_viewport_index_layer = true,
1967bf215546Sopenharmony_ci            .multiview = true,
1968bf215546Sopenharmony_ci            .physical_storage_buffer_address = true,
1969bf215546Sopenharmony_ci            .int64_atomics = true,
1970bf215546Sopenharmony_ci            .subgroup_arithmetic = true,
1971bf215546Sopenharmony_ci            .subgroup_basic = true,
1972bf215546Sopenharmony_ci            .subgroup_ballot = true,
1973bf215546Sopenharmony_ci            .subgroup_quad = true,
1974bf215546Sopenharmony_ci            .subgroup_shuffle = true,
1975bf215546Sopenharmony_ci            .subgroup_vote = true,
1976bf215546Sopenharmony_ci            .vk_memory_model = true,
1977bf215546Sopenharmony_ci            .vk_memory_model_device_scope = true,
1978bf215546Sopenharmony_ci            .int8 = true,
1979bf215546Sopenharmony_ci            .float16 = true,
1980bf215546Sopenharmony_ci            .demote_to_helper_invocation = true,
1981bf215546Sopenharmony_ci            .sparse_residency = true,
1982bf215546Sopenharmony_ci            .min_lod = true,
1983bf215546Sopenharmony_ci         },
1984bf215546Sopenharmony_ci         .ubo_addr_format = nir_address_format_32bit_index_offset,
1985bf215546Sopenharmony_ci         .ssbo_addr_format = nir_address_format_32bit_index_offset,
1986bf215546Sopenharmony_ci         .phys_ssbo_addr_format = nir_address_format_64bit_global,
1987bf215546Sopenharmony_ci         .push_const_addr_format = nir_address_format_logical,
1988bf215546Sopenharmony_ci         .shared_addr_format = nir_address_format_32bit_offset,
1989bf215546Sopenharmony_ci      };
1990bf215546Sopenharmony_ci      uint32_t num_spec_entries = 0;
1991bf215546Sopenharmony_ci      struct nir_spirv_specialization *spec_entries = NULL;
1992bf215546Sopenharmony_ci      VkSpecializationInfo sinfo = {0};
1993bf215546Sopenharmony_ci      VkSpecializationMapEntry me[3];
1994bf215546Sopenharmony_ci      uint32_t size[3] = {1,1,1};
1995bf215546Sopenharmony_ci      if (!zs->nir->info.workgroup_size[0]) {
1996bf215546Sopenharmony_ci         sinfo.mapEntryCount = 3;
1997bf215546Sopenharmony_ci         sinfo.pMapEntries = &me[0];
1998bf215546Sopenharmony_ci         sinfo.dataSize = sizeof(uint32_t) * 3;
1999bf215546Sopenharmony_ci         sinfo.pData = size;
2000bf215546Sopenharmony_ci         uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
2001bf215546Sopenharmony_ci         for (int i = 0; i < 3; i++) {
2002bf215546Sopenharmony_ci            me[i].size = sizeof(uint32_t);
2003bf215546Sopenharmony_ci            me[i].constantID = ids[i];
2004bf215546Sopenharmony_ci            me[i].offset = i * sizeof(uint32_t);
2005bf215546Sopenharmony_ci         }
2006bf215546Sopenharmony_ci         spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
2007bf215546Sopenharmony_ci      }
2008bf215546Sopenharmony_ci      nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
2009bf215546Sopenharmony_ci                         spec_entries, num_spec_entries,
2010bf215546Sopenharmony_ci                         zs->nir->info.stage, "main", &spirv_options, &screen->nir_options);
2011bf215546Sopenharmony_ci      assert(nir);
2012bf215546Sopenharmony_ci      ralloc_free(nir);
2013bf215546Sopenharmony_ci      free(spec_entries);
2014bf215546Sopenharmony_ci   }
2015bf215546Sopenharmony_ci#endif
2016bf215546Sopenharmony_ci
2017bf215546Sopenharmony_ci   VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod);
2018bf215546Sopenharmony_ci   bool success = zink_screen_handle_vkresult(screen, ret);
2019bf215546Sopenharmony_ci   assert(success);
2020bf215546Sopenharmony_ci   return success ? mod : VK_NULL_HANDLE;
2021bf215546Sopenharmony_ci}
2022bf215546Sopenharmony_ci
2023bf215546Sopenharmony_cistatic bool
2024bf215546Sopenharmony_cifind_var_deref(nir_shader *nir, nir_variable *var)
2025bf215546Sopenharmony_ci{
2026bf215546Sopenharmony_ci   nir_foreach_function(function, nir) {
2027bf215546Sopenharmony_ci      if (!function->impl)
2028bf215546Sopenharmony_ci         continue;
2029bf215546Sopenharmony_ci
2030bf215546Sopenharmony_ci      nir_foreach_block(block, function->impl) {
2031bf215546Sopenharmony_ci         nir_foreach_instr(instr, block) {
2032bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_deref)
2033bf215546Sopenharmony_ci               continue;
2034bf215546Sopenharmony_ci            nir_deref_instr *deref = nir_instr_as_deref(instr);
2035bf215546Sopenharmony_ci            if (deref->deref_type == nir_deref_type_var && deref->var == var)
2036bf215546Sopenharmony_ci               return true;
2037bf215546Sopenharmony_ci         }
2038bf215546Sopenharmony_ci      }
2039bf215546Sopenharmony_ci   }
2040bf215546Sopenharmony_ci   return false;
2041bf215546Sopenharmony_ci}
2042bf215546Sopenharmony_ci
2043bf215546Sopenharmony_cistatic void
2044bf215546Sopenharmony_ciprune_io(nir_shader *nir)
2045bf215546Sopenharmony_ci{
2046bf215546Sopenharmony_ci   nir_foreach_shader_in_variable_safe(var, nir) {
2047bf215546Sopenharmony_ci      if (!find_var_deref(nir, var))
2048bf215546Sopenharmony_ci         var->data.mode = nir_var_shader_temp;
2049bf215546Sopenharmony_ci   }
2050bf215546Sopenharmony_ci   nir_foreach_shader_out_variable_safe(var, nir) {
2051bf215546Sopenharmony_ci      if (!find_var_deref(nir, var))
2052bf215546Sopenharmony_ci         var->data.mode = nir_var_shader_temp;
2053bf215546Sopenharmony_ci   }
2054bf215546Sopenharmony_ci}
2055bf215546Sopenharmony_ci
2056bf215546Sopenharmony_ciVkShaderModule
2057bf215546Sopenharmony_cizink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
2058bf215546Sopenharmony_ci{
2059bf215546Sopenharmony_ci   VkShaderModule mod = VK_NULL_HANDLE;
2060bf215546Sopenharmony_ci   struct zink_shader_info *sinfo = &zs->sinfo;
2061bf215546Sopenharmony_ci   nir_shader *nir = nir_shader_clone(NULL, base_nir);
2062bf215546Sopenharmony_ci   bool need_optimize = false;
2063bf215546Sopenharmony_ci   bool inlined_uniforms = false;
2064bf215546Sopenharmony_ci
2065bf215546Sopenharmony_ci   if (key) {
2066bf215546Sopenharmony_ci      if (key->inline_uniforms) {
2067bf215546Sopenharmony_ci         NIR_PASS_V(nir, nir_inline_uniforms,
2068bf215546Sopenharmony_ci                    nir->info.num_inlinable_uniforms,
2069bf215546Sopenharmony_ci                    key->base.inlined_uniform_values,
2070bf215546Sopenharmony_ci                    nir->info.inlinable_uniform_dw_offsets);
2071bf215546Sopenharmony_ci
2072bf215546Sopenharmony_ci         inlined_uniforms = true;
2073bf215546Sopenharmony_ci      }
2074bf215546Sopenharmony_ci
2075bf215546Sopenharmony_ci      /* TODO: use a separate mem ctx here for ralloc */
2076bf215546Sopenharmony_ci      switch (zs->nir->info.stage) {
2077bf215546Sopenharmony_ci      case MESA_SHADER_VERTEX: {
2078bf215546Sopenharmony_ci         uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
2079bf215546Sopenharmony_ci         const struct zink_vs_key *vs_key = zink_vs_key(key);
2080bf215546Sopenharmony_ci         switch (vs_key->size) {
2081bf215546Sopenharmony_ci         case 4:
2082bf215546Sopenharmony_ci            decomposed_attrs = vs_key->u32.decomposed_attrs;
2083bf215546Sopenharmony_ci            decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
2084bf215546Sopenharmony_ci            break;
2085bf215546Sopenharmony_ci         case 2:
2086bf215546Sopenharmony_ci            decomposed_attrs = vs_key->u16.decomposed_attrs;
2087bf215546Sopenharmony_ci            decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
2088bf215546Sopenharmony_ci            break;
2089bf215546Sopenharmony_ci         case 1:
2090bf215546Sopenharmony_ci            decomposed_attrs = vs_key->u8.decomposed_attrs;
2091bf215546Sopenharmony_ci            decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
2092bf215546Sopenharmony_ci            break;
2093bf215546Sopenharmony_ci         default: break;
2094bf215546Sopenharmony_ci         }
2095bf215546Sopenharmony_ci         if (decomposed_attrs || decomposed_attrs_without_w)
2096bf215546Sopenharmony_ci            NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
2097bf215546Sopenharmony_ci         FALLTHROUGH;
2098bf215546Sopenharmony_ci      }
2099bf215546Sopenharmony_ci      case MESA_SHADER_TESS_EVAL:
2100bf215546Sopenharmony_ci      case MESA_SHADER_GEOMETRY:
2101bf215546Sopenharmony_ci         if (zink_vs_key_base(key)->last_vertex_stage) {
2102bf215546Sopenharmony_ci            if (zs->sinfo.have_xfb)
2103bf215546Sopenharmony_ci               sinfo->last_vertex = true;
2104bf215546Sopenharmony_ci
2105bf215546Sopenharmony_ci            if (!zink_vs_key_base(key)->clip_halfz && screen->driver_workarounds.depth_clip_control_missing) {
2106bf215546Sopenharmony_ci               NIR_PASS_V(nir, nir_lower_clip_halfz);
2107bf215546Sopenharmony_ci            }
2108bf215546Sopenharmony_ci            if (zink_vs_key_base(key)->push_drawid) {
2109bf215546Sopenharmony_ci               NIR_PASS_V(nir, lower_drawid);
2110bf215546Sopenharmony_ci            }
2111bf215546Sopenharmony_ci         }
2112bf215546Sopenharmony_ci         break;
2113bf215546Sopenharmony_ci      case MESA_SHADER_FRAGMENT:
2114bf215546Sopenharmony_ci         if (!zink_fs_key(key)->samples &&
2115bf215546Sopenharmony_ci            nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
2116bf215546Sopenharmony_ci            /* VK will always use gl_SampleMask[] values even if sample count is 0,
2117bf215546Sopenharmony_ci            * so we need to skip this write here to mimic GL's behavior of ignoring it
2118bf215546Sopenharmony_ci            */
2119bf215546Sopenharmony_ci            nir_foreach_shader_out_variable(var, nir) {
2120bf215546Sopenharmony_ci               if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
2121bf215546Sopenharmony_ci                  var->data.mode = nir_var_shader_temp;
2122bf215546Sopenharmony_ci            }
2123bf215546Sopenharmony_ci            nir_fixup_deref_modes(nir);
2124bf215546Sopenharmony_ci            NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2125bf215546Sopenharmony_ci            need_optimize = true;
2126bf215546Sopenharmony_ci         }
2127bf215546Sopenharmony_ci         if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
2128bf215546Sopenharmony_ci            NIR_PASS_V(nir, lower_dual_blend);
2129bf215546Sopenharmony_ci         }
2130bf215546Sopenharmony_ci         if (zink_fs_key(key)->coord_replace_bits) {
2131bf215546Sopenharmony_ci            NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
2132bf215546Sopenharmony_ci                     false, zink_fs_key(key)->coord_replace_yinvert);
2133bf215546Sopenharmony_ci         }
2134bf215546Sopenharmony_ci         if (zink_fs_key(key)->force_persample_interp || zink_fs_key(key)->fbfetch_ms) {
2135bf215546Sopenharmony_ci            nir_foreach_shader_in_variable(var, nir)
2136bf215546Sopenharmony_ci               var->data.sample = true;
2137bf215546Sopenharmony_ci            nir->info.fs.uses_sample_qualifier = true;
2138bf215546Sopenharmony_ci            nir->info.fs.uses_sample_shading = true;
2139bf215546Sopenharmony_ci         }
2140bf215546Sopenharmony_ci         if (nir->info.fs.uses_fbfetch_output) {
2141bf215546Sopenharmony_ci            nir_variable *fbfetch = NULL;
2142bf215546Sopenharmony_ci            NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key(key)->fbfetch_ms);
2143bf215546Sopenharmony_ci            /* old variable must be deleted to avoid spirv errors */
2144bf215546Sopenharmony_ci            fbfetch->data.mode = nir_var_shader_temp;
2145bf215546Sopenharmony_ci            nir_fixup_deref_modes(nir);
2146bf215546Sopenharmony_ci            NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2147bf215546Sopenharmony_ci            need_optimize = true;
2148bf215546Sopenharmony_ci         }
2149bf215546Sopenharmony_ci         break;
2150bf215546Sopenharmony_ci      default: break;
2151bf215546Sopenharmony_ci      }
2152bf215546Sopenharmony_ci      if (key->base.nonseamless_cube_mask) {
2153bf215546Sopenharmony_ci         NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
2154bf215546Sopenharmony_ci         need_optimize = true;
2155bf215546Sopenharmony_ci      }
2156bf215546Sopenharmony_ci   }
2157bf215546Sopenharmony_ci   if (screen->driconf.inline_uniforms) {
2158bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
2159bf215546Sopenharmony_ci      NIR_PASS_V(nir, rewrite_bo_access, screen);
2160bf215546Sopenharmony_ci      NIR_PASS_V(nir, remove_bo_access, zs);
2161bf215546Sopenharmony_ci      need_optimize = true;
2162bf215546Sopenharmony_ci   }
2163bf215546Sopenharmony_ci   if (inlined_uniforms) {
2164bf215546Sopenharmony_ci      optimize_nir(nir, zs);
2165bf215546Sopenharmony_ci
2166bf215546Sopenharmony_ci      /* This must be done again. */
2167bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
2168bf215546Sopenharmony_ci                                                       nir_var_shader_out);
2169bf215546Sopenharmony_ci
2170bf215546Sopenharmony_ci      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2171bf215546Sopenharmony_ci      if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
2172bf215546Sopenharmony_ci         zs->can_inline = false;
2173bf215546Sopenharmony_ci   } else if (need_optimize)
2174bf215546Sopenharmony_ci      optimize_nir(nir, zs);
2175bf215546Sopenharmony_ci   prune_io(nir);
2176bf215546Sopenharmony_ci
2177bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_convert_from_ssa, true);
2178bf215546Sopenharmony_ci
2179bf215546Sopenharmony_ci   struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
2180bf215546Sopenharmony_ci   if (spirv)
2181bf215546Sopenharmony_ci      mod = zink_shader_spirv_compile(screen, zs, spirv);
2182bf215546Sopenharmony_ci
2183bf215546Sopenharmony_ci   ralloc_free(nir);
2184bf215546Sopenharmony_ci
2185bf215546Sopenharmony_ci   /* TODO: determine if there's any reason to cache spirv output? */
2186bf215546Sopenharmony_ci   if (zs->nir->info.stage == MESA_SHADER_TESS_CTRL && zs->is_generated)
2187bf215546Sopenharmony_ci      zs->spirv = spirv;
2188bf215546Sopenharmony_ci   else
2189bf215546Sopenharmony_ci      ralloc_free(spirv);
2190bf215546Sopenharmony_ci   return mod;
2191bf215546Sopenharmony_ci}
2192bf215546Sopenharmony_ci
2193bf215546Sopenharmony_cistatic bool
2194bf215546Sopenharmony_cilower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
2195bf215546Sopenharmony_ci{
2196bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
2197bf215546Sopenharmony_ci      return false;
2198bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2199bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_instance_id)
2200bf215546Sopenharmony_ci      return false;
2201bf215546Sopenharmony_ci   b->cursor = nir_after_instr(instr);
2202bf215546Sopenharmony_ci   nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
2203bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
2204bf215546Sopenharmony_ci   return true;
2205bf215546Sopenharmony_ci}
2206bf215546Sopenharmony_ci
2207bf215546Sopenharmony_cistatic bool
2208bf215546Sopenharmony_cilower_baseinstance(nir_shader *shader)
2209bf215546Sopenharmony_ci{
2210bf215546Sopenharmony_ci   if (shader->info.stage != MESA_SHADER_VERTEX)
2211bf215546Sopenharmony_ci      return false;
2212bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
2213bf215546Sopenharmony_ci}
2214bf215546Sopenharmony_ci
2215bf215546Sopenharmony_ci/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
2216bf215546Sopenharmony_ci * so instead we delete all those broken variables and just make new ones
2217bf215546Sopenharmony_ci */
2218bf215546Sopenharmony_cistatic bool
2219bf215546Sopenharmony_ciunbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
2220bf215546Sopenharmony_ci{
2221bf215546Sopenharmony_ci   uint64_t max_ssbo_size = 0;
2222bf215546Sopenharmony_ci   uint64_t max_ubo_size = 0;
2223bf215546Sopenharmony_ci   uint64_t max_uniform_size = 0;
2224bf215546Sopenharmony_ci
2225bf215546Sopenharmony_ci   if (!shader->info.num_ssbos && !shader->info.num_ubos)
2226bf215546Sopenharmony_ci      return false;
2227bf215546Sopenharmony_ci
2228bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
2229bf215546Sopenharmony_ci      const struct glsl_type *type = glsl_without_array(var->type);
2230bf215546Sopenharmony_ci      if (type_is_counter(type))
2231bf215546Sopenharmony_ci         continue;
2232bf215546Sopenharmony_ci      /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
2233bf215546Sopenharmony_ci      unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
2234bf215546Sopenharmony_ci      const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
2235bf215546Sopenharmony_ci      if (interface_type) {
2236bf215546Sopenharmony_ci         unsigned block_size = glsl_get_explicit_size(interface_type, true);
2237bf215546Sopenharmony_ci         block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
2238bf215546Sopenharmony_ci         size = MAX2(size, block_size);
2239bf215546Sopenharmony_ci      }
2240bf215546Sopenharmony_ci      if (var->data.mode == nir_var_mem_ubo) {
2241bf215546Sopenharmony_ci         if (var->data.driver_location)
2242bf215546Sopenharmony_ci            max_ubo_size = MAX2(max_ubo_size, size);
2243bf215546Sopenharmony_ci         else
2244bf215546Sopenharmony_ci            max_uniform_size = MAX2(max_uniform_size, size);
2245bf215546Sopenharmony_ci      } else {
2246bf215546Sopenharmony_ci         max_ssbo_size = MAX2(max_ssbo_size, size);
2247bf215546Sopenharmony_ci         if (interface_type) {
2248bf215546Sopenharmony_ci            if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
2249bf215546Sopenharmony_ci               needs_size = true;
2250bf215546Sopenharmony_ci         }
2251bf215546Sopenharmony_ci      }
2252bf215546Sopenharmony_ci      var->data.mode = nir_var_shader_temp;
2253bf215546Sopenharmony_ci   }
2254bf215546Sopenharmony_ci   nir_fixup_deref_modes(shader);
2255bf215546Sopenharmony_ci   NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2256bf215546Sopenharmony_ci   optimize_nir(shader, NULL);
2257bf215546Sopenharmony_ci
2258bf215546Sopenharmony_ci   struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2259bf215546Sopenharmony_ci   fields[0].name = ralloc_strdup(shader, "base");
2260bf215546Sopenharmony_ci   fields[1].name = ralloc_strdup(shader, "unsized");
2261bf215546Sopenharmony_ci   if (shader->info.num_ubos) {
2262bf215546Sopenharmony_ci      if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
2263bf215546Sopenharmony_ci         fields[0].type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
2264bf215546Sopenharmony_ci         nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
2265bf215546Sopenharmony_ci                                                 glsl_array_type(glsl_interface_type(fields, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
2266bf215546Sopenharmony_ci                                                 "uniform_0");
2267bf215546Sopenharmony_ci         var->interface_type = var->type;
2268bf215546Sopenharmony_ci         var->data.mode = nir_var_mem_ubo;
2269bf215546Sopenharmony_ci         var->data.driver_location = 0;
2270bf215546Sopenharmony_ci      }
2271bf215546Sopenharmony_ci
2272bf215546Sopenharmony_ci      unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
2273bf215546Sopenharmony_ci      uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
2274bf215546Sopenharmony_ci      if (num_ubos && ubos_used) {
2275bf215546Sopenharmony_ci         fields[0].type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
2276bf215546Sopenharmony_ci         /* shrink array as much as possible */
2277bf215546Sopenharmony_ci         unsigned first_ubo = ffs(ubos_used) - 2;
2278bf215546Sopenharmony_ci         assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
2279bf215546Sopenharmony_ci         num_ubos -= first_ubo;
2280bf215546Sopenharmony_ci         assert(num_ubos);
2281bf215546Sopenharmony_ci         nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
2282bf215546Sopenharmony_ci                                   glsl_array_type(glsl_struct_type(fields, 1, "struct", false), num_ubos, 0),
2283bf215546Sopenharmony_ci                                   "ubos");
2284bf215546Sopenharmony_ci         var->interface_type = var->type;
2285bf215546Sopenharmony_ci         var->data.mode = nir_var_mem_ubo;
2286bf215546Sopenharmony_ci         var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
2287bf215546Sopenharmony_ci      }
2288bf215546Sopenharmony_ci   }
2289bf215546Sopenharmony_ci   if (shader->info.num_ssbos && zs->ssbos_used) {
2290bf215546Sopenharmony_ci      /* shrink array as much as possible */
2291bf215546Sopenharmony_ci      unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
2292bf215546Sopenharmony_ci      assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
2293bf215546Sopenharmony_ci      unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
2294bf215546Sopenharmony_ci      assert(num_ssbos);
2295bf215546Sopenharmony_ci      const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
2296bf215546Sopenharmony_ci      const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
2297bf215546Sopenharmony_ci      fields[0].type = ssbo_type;
2298bf215546Sopenharmony_ci      fields[1].type = max_ssbo_size ? unsized : NULL;
2299bf215546Sopenharmony_ci      unsigned field_count = max_ssbo_size && needs_size ? 2 : 1;
2300bf215546Sopenharmony_ci      nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
2301bf215546Sopenharmony_ci                                              glsl_array_type(glsl_struct_type(fields, field_count, "struct", false), num_ssbos, 0),
2302bf215546Sopenharmony_ci                                              "ssbos");
2303bf215546Sopenharmony_ci      var->interface_type = var->type;
2304bf215546Sopenharmony_ci      var->data.mode = nir_var_mem_ssbo;
2305bf215546Sopenharmony_ci      var->data.driver_location = first_ssbo;
2306bf215546Sopenharmony_ci   }
2307bf215546Sopenharmony_ci   return true;
2308bf215546Sopenharmony_ci}
2309bf215546Sopenharmony_ci
2310bf215546Sopenharmony_cistatic uint32_t
2311bf215546Sopenharmony_ciget_src_mask_ssbo(unsigned total, nir_src src)
2312bf215546Sopenharmony_ci{
2313bf215546Sopenharmony_ci   if (nir_src_is_const(src))
2314bf215546Sopenharmony_ci      return BITFIELD_BIT(nir_src_as_uint(src));
2315bf215546Sopenharmony_ci   return BITFIELD_MASK(total);
2316bf215546Sopenharmony_ci}
2317bf215546Sopenharmony_ci
2318bf215546Sopenharmony_cistatic uint32_t
2319bf215546Sopenharmony_ciget_src_mask_ubo(unsigned total, nir_src src)
2320bf215546Sopenharmony_ci{
2321bf215546Sopenharmony_ci   if (nir_src_is_const(src))
2322bf215546Sopenharmony_ci      return BITFIELD_BIT(nir_src_as_uint(src));
2323bf215546Sopenharmony_ci   return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
2324bf215546Sopenharmony_ci}
2325bf215546Sopenharmony_ci
2326bf215546Sopenharmony_cistatic bool
2327bf215546Sopenharmony_cianalyze_io(struct zink_shader *zs, nir_shader *shader)
2328bf215546Sopenharmony_ci{
2329bf215546Sopenharmony_ci   bool ret = false;
2330bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
2331bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
2332bf215546Sopenharmony_ci      nir_foreach_instr(instr, block) {
2333bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
2334bf215546Sopenharmony_ci            continue;
2335bf215546Sopenharmony_ci
2336bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2337bf215546Sopenharmony_ci         switch (intrin->intrinsic) {
2338bf215546Sopenharmony_ci         case nir_intrinsic_store_ssbo:
2339bf215546Sopenharmony_ci            zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
2340bf215546Sopenharmony_ci            break;
2341bf215546Sopenharmony_ci
2342bf215546Sopenharmony_ci         case nir_intrinsic_get_ssbo_size: {
2343bf215546Sopenharmony_ci            zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
2344bf215546Sopenharmony_ci            ret = true;
2345bf215546Sopenharmony_ci            break;
2346bf215546Sopenharmony_ci         }
2347bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_fadd:
2348bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_add:
2349bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_imin:
2350bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_umin:
2351bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_imax:
2352bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_umax:
2353bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_and:
2354bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_or:
2355bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_xor:
2356bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_exchange:
2357bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_comp_swap:
2358bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_fmin:
2359bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_fmax:
2360bf215546Sopenharmony_ci         case nir_intrinsic_ssbo_atomic_fcomp_swap:
2361bf215546Sopenharmony_ci         case nir_intrinsic_load_ssbo:
2362bf215546Sopenharmony_ci            zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
2363bf215546Sopenharmony_ci            break;
2364bf215546Sopenharmony_ci         case nir_intrinsic_load_ubo:
2365bf215546Sopenharmony_ci         case nir_intrinsic_load_ubo_vec4:
2366bf215546Sopenharmony_ci            zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
2367bf215546Sopenharmony_ci            break;
2368bf215546Sopenharmony_ci         default:
2369bf215546Sopenharmony_ci            break;
2370bf215546Sopenharmony_ci         }
2371bf215546Sopenharmony_ci      }
2372bf215546Sopenharmony_ci   }
2373bf215546Sopenharmony_ci   return ret;
2374bf215546Sopenharmony_ci}
2375bf215546Sopenharmony_ci
2376bf215546Sopenharmony_cistruct zink_bindless_info {
2377bf215546Sopenharmony_ci   nir_variable *bindless[4];
2378bf215546Sopenharmony_ci   unsigned bindless_set;
2379bf215546Sopenharmony_ci};
2380bf215546Sopenharmony_ci
2381bf215546Sopenharmony_ci/* this is a "default" bindless texture used if the shader has no texture variables */
2382bf215546Sopenharmony_cistatic nir_variable *
2383bf215546Sopenharmony_cicreate_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
2384bf215546Sopenharmony_ci{
2385bf215546Sopenharmony_ci   unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
2386bf215546Sopenharmony_ci   nir_variable *var;
2387bf215546Sopenharmony_ci
2388bf215546Sopenharmony_ci   const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
2389bf215546Sopenharmony_ci   var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
2390bf215546Sopenharmony_ci   var->data.descriptor_set = descriptor_set;
2391bf215546Sopenharmony_ci   var->data.driver_location = var->data.binding = binding;
2392bf215546Sopenharmony_ci   return var;
2393bf215546Sopenharmony_ci}
2394bf215546Sopenharmony_ci
2395bf215546Sopenharmony_ci/* this is a "default" bindless image used if the shader has no image variables */
2396bf215546Sopenharmony_cistatic nir_variable *
2397bf215546Sopenharmony_cicreate_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
2398bf215546Sopenharmony_ci{
2399bf215546Sopenharmony_ci   unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
2400bf215546Sopenharmony_ci   nir_variable *var;
2401bf215546Sopenharmony_ci
2402bf215546Sopenharmony_ci   const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
2403bf215546Sopenharmony_ci   var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
2404bf215546Sopenharmony_ci   var->data.descriptor_set = descriptor_set;
2405bf215546Sopenharmony_ci   var->data.driver_location = var->data.binding = binding;
2406bf215546Sopenharmony_ci   var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
2407bf215546Sopenharmony_ci   return var;
2408bf215546Sopenharmony_ci}
2409bf215546Sopenharmony_ci
2410bf215546Sopenharmony_ci/* rewrite bindless instructions as array deref instructions */
2411bf215546Sopenharmony_cistatic bool
2412bf215546Sopenharmony_cilower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
2413bf215546Sopenharmony_ci{
2414bf215546Sopenharmony_ci   struct zink_bindless_info *bindless = data;
2415bf215546Sopenharmony_ci
2416bf215546Sopenharmony_ci   if (in->type == nir_instr_type_tex) {
2417bf215546Sopenharmony_ci      nir_tex_instr *tex = nir_instr_as_tex(in);
2418bf215546Sopenharmony_ci      int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
2419bf215546Sopenharmony_ci      if (idx == -1)
2420bf215546Sopenharmony_ci         return false;
2421bf215546Sopenharmony_ci
2422bf215546Sopenharmony_ci      nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
2423bf215546Sopenharmony_ci      if (!var)
2424bf215546Sopenharmony_ci         var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
2425bf215546Sopenharmony_ci      b->cursor = nir_before_instr(in);
2426bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_build_deref_var(b, var);
2427bf215546Sopenharmony_ci      if (glsl_type_is_array(var->type))
2428bf215546Sopenharmony_ci         deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
2429bf215546Sopenharmony_ci      nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
2430bf215546Sopenharmony_ci
2431bf215546Sopenharmony_ci      /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
2432bf215546Sopenharmony_ci       * match up with it in contrast to normal sampler ops where things are a bit more flexible;
2433bf215546Sopenharmony_ci       * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
2434bf215546Sopenharmony_ci       * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
2435bf215546Sopenharmony_ci       *
2436bf215546Sopenharmony_ci       * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
2437bf215546Sopenharmony_ci       * - Warhammer 40k: Dawn of War III
2438bf215546Sopenharmony_ci       */
2439bf215546Sopenharmony_ci      unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
2440bf215546Sopenharmony_ci      unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
2441bf215546Sopenharmony_ci      unsigned coord_components = nir_src_num_components(tex->src[c].src);
2442bf215546Sopenharmony_ci      if (coord_components < needed_components) {
2443bf215546Sopenharmony_ci         nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
2444bf215546Sopenharmony_ci         nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
2445bf215546Sopenharmony_ci         tex->coord_components = needed_components;
2446bf215546Sopenharmony_ci      }
2447bf215546Sopenharmony_ci      return true;
2448bf215546Sopenharmony_ci   }
2449bf215546Sopenharmony_ci   if (in->type != nir_instr_type_intrinsic)
2450bf215546Sopenharmony_ci      return false;
2451bf215546Sopenharmony_ci   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2452bf215546Sopenharmony_ci
2453bf215546Sopenharmony_ci   nir_intrinsic_op op;
2454bf215546Sopenharmony_ci#define OP_SWAP(OP) \
2455bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_##OP: \
2456bf215546Sopenharmony_ci      op = nir_intrinsic_image_deref_##OP; \
2457bf215546Sopenharmony_ci      break;
2458bf215546Sopenharmony_ci
2459bf215546Sopenharmony_ci
2460bf215546Sopenharmony_ci   /* convert bindless intrinsics to deref intrinsics */
2461bf215546Sopenharmony_ci   switch (instr->intrinsic) {
2462bf215546Sopenharmony_ci   OP_SWAP(atomic_add)
2463bf215546Sopenharmony_ci   OP_SWAP(atomic_and)
2464bf215546Sopenharmony_ci   OP_SWAP(atomic_comp_swap)
2465bf215546Sopenharmony_ci   OP_SWAP(atomic_dec_wrap)
2466bf215546Sopenharmony_ci   OP_SWAP(atomic_exchange)
2467bf215546Sopenharmony_ci   OP_SWAP(atomic_fadd)
2468bf215546Sopenharmony_ci   OP_SWAP(atomic_fmax)
2469bf215546Sopenharmony_ci   OP_SWAP(atomic_fmin)
2470bf215546Sopenharmony_ci   OP_SWAP(atomic_imax)
2471bf215546Sopenharmony_ci   OP_SWAP(atomic_imin)
2472bf215546Sopenharmony_ci   OP_SWAP(atomic_inc_wrap)
2473bf215546Sopenharmony_ci   OP_SWAP(atomic_or)
2474bf215546Sopenharmony_ci   OP_SWAP(atomic_umax)
2475bf215546Sopenharmony_ci   OP_SWAP(atomic_umin)
2476bf215546Sopenharmony_ci   OP_SWAP(atomic_xor)
2477bf215546Sopenharmony_ci   OP_SWAP(format)
2478bf215546Sopenharmony_ci   OP_SWAP(load)
2479bf215546Sopenharmony_ci   OP_SWAP(order)
2480bf215546Sopenharmony_ci   OP_SWAP(samples)
2481bf215546Sopenharmony_ci   OP_SWAP(size)
2482bf215546Sopenharmony_ci   OP_SWAP(store)
2483bf215546Sopenharmony_ci   default:
2484bf215546Sopenharmony_ci      return false;
2485bf215546Sopenharmony_ci   }
2486bf215546Sopenharmony_ci
2487bf215546Sopenharmony_ci   enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2488bf215546Sopenharmony_ci   nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
2489bf215546Sopenharmony_ci   if (!var)
2490bf215546Sopenharmony_ci      var = create_bindless_image(b->shader, dim, bindless->bindless_set);
2491bf215546Sopenharmony_ci   instr->intrinsic = op;
2492bf215546Sopenharmony_ci   b->cursor = nir_before_instr(in);
2493bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_build_deref_var(b, var);
2494bf215546Sopenharmony_ci   if (glsl_type_is_array(var->type))
2495bf215546Sopenharmony_ci      deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
2496bf215546Sopenharmony_ci   nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
2497bf215546Sopenharmony_ci   return true;
2498bf215546Sopenharmony_ci}
2499bf215546Sopenharmony_ci
2500bf215546Sopenharmony_cistatic bool
2501bf215546Sopenharmony_cilower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
2502bf215546Sopenharmony_ci{
2503bf215546Sopenharmony_ci   if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
2504bf215546Sopenharmony_ci      return false;
2505bf215546Sopenharmony_ci   nir_fixup_deref_modes(shader);
2506bf215546Sopenharmony_ci   NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2507bf215546Sopenharmony_ci   optimize_nir(shader, NULL);
2508bf215546Sopenharmony_ci   return true;
2509bf215546Sopenharmony_ci}
2510bf215546Sopenharmony_ci
2511bf215546Sopenharmony_ci/* convert shader image/texture io variables to int64 handles for bindless indexing */
2512bf215546Sopenharmony_cistatic bool
2513bf215546Sopenharmony_cilower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
2514bf215546Sopenharmony_ci{
2515bf215546Sopenharmony_ci   if (in->type != nir_instr_type_intrinsic)
2516bf215546Sopenharmony_ci      return false;
2517bf215546Sopenharmony_ci   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2518bf215546Sopenharmony_ci   if (instr->intrinsic != nir_intrinsic_load_deref &&
2519bf215546Sopenharmony_ci       instr->intrinsic != nir_intrinsic_store_deref)
2520bf215546Sopenharmony_ci      return false;
2521bf215546Sopenharmony_ci
2522bf215546Sopenharmony_ci   nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
2523bf215546Sopenharmony_ci   nir_variable *var = nir_deref_instr_get_variable(src_deref);
2524bf215546Sopenharmony_ci   if (var->data.bindless)
2525bf215546Sopenharmony_ci      return false;
2526bf215546Sopenharmony_ci   if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
2527bf215546Sopenharmony_ci      return false;
2528bf215546Sopenharmony_ci   if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
2529bf215546Sopenharmony_ci      return false;
2530bf215546Sopenharmony_ci
2531bf215546Sopenharmony_ci   var->type = glsl_int64_t_type();
2532bf215546Sopenharmony_ci   var->data.bindless = 1;
2533bf215546Sopenharmony_ci   b->cursor = nir_before_instr(in);
2534bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_build_deref_var(b, var);
2535bf215546Sopenharmony_ci   if (instr->intrinsic == nir_intrinsic_load_deref) {
2536bf215546Sopenharmony_ci       nir_ssa_def *def = nir_load_deref(b, deref);
2537bf215546Sopenharmony_ci       nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
2538bf215546Sopenharmony_ci       nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
2539bf215546Sopenharmony_ci   } else {
2540bf215546Sopenharmony_ci      nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
2541bf215546Sopenharmony_ci   }
2542bf215546Sopenharmony_ci   nir_instr_remove(in);
2543bf215546Sopenharmony_ci   nir_instr_remove(&src_deref->instr);
2544bf215546Sopenharmony_ci   return true;
2545bf215546Sopenharmony_ci}
2546bf215546Sopenharmony_ci
2547bf215546Sopenharmony_cistatic bool
2548bf215546Sopenharmony_cilower_bindless_io(nir_shader *shader)
2549bf215546Sopenharmony_ci{
2550bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
2551bf215546Sopenharmony_ci}
2552bf215546Sopenharmony_ci
2553bf215546Sopenharmony_cistatic uint32_t
2554bf215546Sopenharmony_cizink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
2555bf215546Sopenharmony_ci{
2556bf215546Sopenharmony_ci   if (stage == MESA_SHADER_NONE) {
2557bf215546Sopenharmony_ci      unreachable("not supported");
2558bf215546Sopenharmony_ci   } else {
2559bf215546Sopenharmony_ci      switch (type) {
2560bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2561bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2562bf215546Sopenharmony_ci         return stage * 2 + !!index;
2563bf215546Sopenharmony_ci
2564bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2565bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2566bf215546Sopenharmony_ci         assert(index < PIPE_MAX_SAMPLERS);
2567bf215546Sopenharmony_ci         return (stage * PIPE_MAX_SAMPLERS) + index;
2568bf215546Sopenharmony_ci
2569bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2570bf215546Sopenharmony_ci         return stage + (compact_descriptors * (ZINK_SHADER_COUNT * 2));
2571bf215546Sopenharmony_ci
2572bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2573bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2574bf215546Sopenharmony_ci         assert(index < ZINK_MAX_SHADER_IMAGES);
2575bf215546Sopenharmony_ci         return (stage * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_SHADER_COUNT * PIPE_MAX_SAMPLERS));
2576bf215546Sopenharmony_ci
2577bf215546Sopenharmony_ci      default:
2578bf215546Sopenharmony_ci         unreachable("unexpected type");
2579bf215546Sopenharmony_ci      }
2580bf215546Sopenharmony_ci   }
2581bf215546Sopenharmony_ci}
2582bf215546Sopenharmony_ci
2583bf215546Sopenharmony_cistatic void
2584bf215546Sopenharmony_cihandle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
2585bf215546Sopenharmony_ci{
2586bf215546Sopenharmony_ci   if (glsl_type_is_struct(type)) {
2587bf215546Sopenharmony_ci      for (unsigned i = 0; i < glsl_get_length(type); i++)
2588bf215546Sopenharmony_ci         handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
2589bf215546Sopenharmony_ci      return;
2590bf215546Sopenharmony_ci   }
2591bf215546Sopenharmony_ci
2592bf215546Sopenharmony_ci   /* just a random scalar in a struct */
2593bf215546Sopenharmony_ci   if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
2594bf215546Sopenharmony_ci      return;
2595bf215546Sopenharmony_ci
2596bf215546Sopenharmony_ci   VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
2597bf215546Sopenharmony_ci   unsigned binding;
2598bf215546Sopenharmony_ci   switch (vktype) {
2599bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2600bf215546Sopenharmony_ci         binding = 0;
2601bf215546Sopenharmony_ci         break;
2602bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2603bf215546Sopenharmony_ci         binding = 1;
2604bf215546Sopenharmony_ci         break;
2605bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2606bf215546Sopenharmony_ci         binding = 2;
2607bf215546Sopenharmony_ci         break;
2608bf215546Sopenharmony_ci      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2609bf215546Sopenharmony_ci         binding = 3;
2610bf215546Sopenharmony_ci         break;
2611bf215546Sopenharmony_ci      default:
2612bf215546Sopenharmony_ci         unreachable("unknown");
2613bf215546Sopenharmony_ci   }
2614bf215546Sopenharmony_ci   if (!bindless->bindless[binding]) {
2615bf215546Sopenharmony_ci      bindless->bindless[binding] = nir_variable_clone(var, nir);
2616bf215546Sopenharmony_ci      bindless->bindless[binding]->data.bindless = 0;
2617bf215546Sopenharmony_ci      bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
2618bf215546Sopenharmony_ci      bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
2619bf215546Sopenharmony_ci      bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
2620bf215546Sopenharmony_ci      if (!bindless->bindless[binding]->data.image.format)
2621bf215546Sopenharmony_ci         bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
2622bf215546Sopenharmony_ci      nir_shader_add_variable(nir, bindless->bindless[binding]);
2623bf215546Sopenharmony_ci   } else {
2624bf215546Sopenharmony_ci      assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
2625bf215546Sopenharmony_ci   }
2626bf215546Sopenharmony_ci   var->data.mode = nir_var_shader_temp;
2627bf215546Sopenharmony_ci}
2628bf215546Sopenharmony_ci
2629bf215546Sopenharmony_cistatic enum pipe_prim_type
2630bf215546Sopenharmony_ciprim_to_pipe(enum shader_prim primitive_type)
2631bf215546Sopenharmony_ci{
2632bf215546Sopenharmony_ci   switch (primitive_type) {
2633bf215546Sopenharmony_ci   case SHADER_PRIM_POINTS:
2634bf215546Sopenharmony_ci      return PIPE_PRIM_POINTS;
2635bf215546Sopenharmony_ci   case SHADER_PRIM_LINES:
2636bf215546Sopenharmony_ci   case SHADER_PRIM_LINE_LOOP:
2637bf215546Sopenharmony_ci   case SHADER_PRIM_LINE_STRIP:
2638bf215546Sopenharmony_ci   case SHADER_PRIM_LINES_ADJACENCY:
2639bf215546Sopenharmony_ci   case SHADER_PRIM_LINE_STRIP_ADJACENCY:
2640bf215546Sopenharmony_ci      return PIPE_PRIM_LINES;
2641bf215546Sopenharmony_ci   default:
2642bf215546Sopenharmony_ci      return PIPE_PRIM_TRIANGLES;
2643bf215546Sopenharmony_ci   }
2644bf215546Sopenharmony_ci}
2645bf215546Sopenharmony_ci
2646bf215546Sopenharmony_cistatic enum pipe_prim_type
2647bf215546Sopenharmony_citess_prim_to_pipe(enum tess_primitive_mode prim_mode)
2648bf215546Sopenharmony_ci{
2649bf215546Sopenharmony_ci   switch (prim_mode) {
2650bf215546Sopenharmony_ci   case TESS_PRIMITIVE_ISOLINES:
2651bf215546Sopenharmony_ci      return PIPE_PRIM_LINES;
2652bf215546Sopenharmony_ci   default:
2653bf215546Sopenharmony_ci      return PIPE_PRIM_TRIANGLES;
2654bf215546Sopenharmony_ci   }
2655bf215546Sopenharmony_ci}
2656bf215546Sopenharmony_ci
2657bf215546Sopenharmony_cistatic enum pipe_prim_type
2658bf215546Sopenharmony_ciget_shader_base_prim_type(struct nir_shader *nir)
2659bf215546Sopenharmony_ci{
2660bf215546Sopenharmony_ci   switch (nir->info.stage) {
2661bf215546Sopenharmony_ci   case MESA_SHADER_GEOMETRY:
2662bf215546Sopenharmony_ci      return prim_to_pipe(nir->info.gs.output_primitive);
2663bf215546Sopenharmony_ci   case MESA_SHADER_TESS_EVAL:
2664bf215546Sopenharmony_ci      return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : tess_prim_to_pipe(nir->info.tess._primitive_mode);
2665bf215546Sopenharmony_ci   default:
2666bf215546Sopenharmony_ci      break;
2667bf215546Sopenharmony_ci   }
2668bf215546Sopenharmony_ci   return PIPE_PRIM_MAX;
2669bf215546Sopenharmony_ci}
2670bf215546Sopenharmony_ci
2671bf215546Sopenharmony_cistatic bool
2672bf215546Sopenharmony_ciconvert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
2673bf215546Sopenharmony_ci{
2674bf215546Sopenharmony_ci   struct zink_screen *screen = data;
2675bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_tex)
2676bf215546Sopenharmony_ci      return false;
2677bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_instr_as_tex(instr);
2678bf215546Sopenharmony_ci   if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
2679bf215546Sopenharmony_ci      return false;
2680bf215546Sopenharmony_ci   if (tex->is_sparse && screen->need_2D_sparse) {
2681bf215546Sopenharmony_ci      /* no known case of this exists: only nvidia can hit it, and nothing uses it */
2682bf215546Sopenharmony_ci      mesa_loge("unhandled/unsupported 1D sparse texture!");
2683bf215546Sopenharmony_ci      abort();
2684bf215546Sopenharmony_ci   }
2685bf215546Sopenharmony_ci   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
2686bf215546Sopenharmony_ci   b->cursor = nir_before_instr(instr);
2687bf215546Sopenharmony_ci   tex->coord_components++;
2688bf215546Sopenharmony_ci   unsigned srcs[] = {
2689bf215546Sopenharmony_ci      nir_tex_src_coord,
2690bf215546Sopenharmony_ci      nir_tex_src_offset,
2691bf215546Sopenharmony_ci      nir_tex_src_ddx,
2692bf215546Sopenharmony_ci      nir_tex_src_ddy,
2693bf215546Sopenharmony_ci   };
2694bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
2695bf215546Sopenharmony_ci      unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
2696bf215546Sopenharmony_ci      if (c == -1)
2697bf215546Sopenharmony_ci         continue;
2698bf215546Sopenharmony_ci      if (tex->src[c].src.ssa->num_components == tex->coord_components)
2699bf215546Sopenharmony_ci         continue;
2700bf215546Sopenharmony_ci      nir_ssa_def *def;
2701bf215546Sopenharmony_ci      nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
2702bf215546Sopenharmony_ci      if (tex->src[c].src.ssa->num_components == 1)
2703bf215546Sopenharmony_ci         def = nir_vec2(b, tex->src[c].src.ssa, zero);
2704bf215546Sopenharmony_ci      else
2705bf215546Sopenharmony_ci         def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
2706bf215546Sopenharmony_ci      nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
2707bf215546Sopenharmony_ci   }
2708bf215546Sopenharmony_ci   b->cursor = nir_after_instr(instr);
2709bf215546Sopenharmony_ci   unsigned needed_components = nir_tex_instr_dest_size(tex);
2710bf215546Sopenharmony_ci   unsigned num_components = tex->dest.ssa.num_components;
2711bf215546Sopenharmony_ci   if (needed_components > num_components) {
2712bf215546Sopenharmony_ci      tex->dest.ssa.num_components = needed_components;
2713bf215546Sopenharmony_ci      assert(num_components < 3);
2714bf215546Sopenharmony_ci      /* take either xz or just x since this is promoted to 2D from 1D */
2715bf215546Sopenharmony_ci      uint32_t mask = num_components == 2 ? (1|4) : 1;
2716bf215546Sopenharmony_ci      nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
2717bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
2718bf215546Sopenharmony_ci   }
2719bf215546Sopenharmony_ci   return true;
2720bf215546Sopenharmony_ci}
2721bf215546Sopenharmony_ci
2722bf215546Sopenharmony_cistatic bool
2723bf215546Sopenharmony_cilower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
2724bf215546Sopenharmony_ci{
2725bf215546Sopenharmony_ci   bool found = false;
2726bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
2727bf215546Sopenharmony_ci      const struct glsl_type *type = glsl_without_array(var->type);
2728bf215546Sopenharmony_ci      unsigned length = glsl_get_length(var->type);
2729bf215546Sopenharmony_ci      if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
2730bf215546Sopenharmony_ci         continue;
2731bf215546Sopenharmony_ci      const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
2732bf215546Sopenharmony_ci      var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
2733bf215546Sopenharmony_ci
2734bf215546Sopenharmony_ci      found = true;
2735bf215546Sopenharmony_ci   }
2736bf215546Sopenharmony_ci   if (found)
2737bf215546Sopenharmony_ci      nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
2738bf215546Sopenharmony_ci   return found;
2739bf215546Sopenharmony_ci}
2740bf215546Sopenharmony_ci
2741bf215546Sopenharmony_cistatic void
2742bf215546Sopenharmony_ciscan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
2743bf215546Sopenharmony_ci{
2744bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
2745bf215546Sopenharmony_ci      if (!function->impl)
2746bf215546Sopenharmony_ci         continue;
2747bf215546Sopenharmony_ci      nir_foreach_block_safe(block, function->impl) {
2748bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
2749bf215546Sopenharmony_ci            if (instr->type == nir_instr_type_tex) {
2750bf215546Sopenharmony_ci               nir_tex_instr *tex = nir_instr_as_tex(instr);
2751bf215546Sopenharmony_ci               zs->sinfo.have_sparse |= tex->is_sparse;
2752bf215546Sopenharmony_ci            }
2753bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_intrinsic)
2754bf215546Sopenharmony_ci               continue;
2755bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2756bf215546Sopenharmony_ci            if (intr->intrinsic == nir_intrinsic_image_deref_load ||
2757bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
2758bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_store ||
2759bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
2760bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
2761bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
2762bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
2763bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
2764bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
2765bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
2766bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
2767bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
2768bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
2769bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
2770bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_size ||
2771bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_samples ||
2772bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_format ||
2773bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_order) {
2774bf215546Sopenharmony_ci
2775bf215546Sopenharmony_ci                nir_variable *var =
2776bf215546Sopenharmony_ci                   nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
2777bf215546Sopenharmony_ci
2778bf215546Sopenharmony_ci                /* Structs have been lowered already, so get_aoa_size is sufficient. */
2779bf215546Sopenharmony_ci                const unsigned size =
2780bf215546Sopenharmony_ci                   glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
2781bf215546Sopenharmony_ci                BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
2782bf215546Sopenharmony_ci                                 var->data.binding + (MAX2(size, 1) - 1));
2783bf215546Sopenharmony_ci            }
2784bf215546Sopenharmony_ci            if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
2785bf215546Sopenharmony_ci                intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
2786bf215546Sopenharmony_ci               zs->sinfo.have_sparse = true;
2787bf215546Sopenharmony_ci
2788bf215546Sopenharmony_ci            static bool warned = false;
2789bf215546Sopenharmony_ci            if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
2790bf215546Sopenharmony_ci               switch (intr->intrinsic) {
2791bf215546Sopenharmony_ci               case nir_intrinsic_image_deref_atomic_add: {
2792bf215546Sopenharmony_ci                  nir_variable *var = nir_intrinsic_get_var(intr, 0);
2793bf215546Sopenharmony_ci                  if (util_format_is_float(var->data.image.format))
2794bf215546Sopenharmony_ci                     fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
2795bf215546Sopenharmony_ci                  break;
2796bf215546Sopenharmony_ci               }
2797bf215546Sopenharmony_ci               default:
2798bf215546Sopenharmony_ci                  break;
2799bf215546Sopenharmony_ci               }
2800bf215546Sopenharmony_ci            }
2801bf215546Sopenharmony_ci         }
2802bf215546Sopenharmony_ci      }
2803bf215546Sopenharmony_ci   }
2804bf215546Sopenharmony_ci}
2805bf215546Sopenharmony_ci
2806bf215546Sopenharmony_cistatic bool
2807bf215546Sopenharmony_ciis_residency_code(nir_ssa_def *src)
2808bf215546Sopenharmony_ci{
2809bf215546Sopenharmony_ci   nir_instr *parent = src->parent_instr;
2810bf215546Sopenharmony_ci   while (1) {
2811bf215546Sopenharmony_ci      if (parent->type == nir_instr_type_intrinsic) {
2812bf215546Sopenharmony_ci         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
2813bf215546Sopenharmony_ci         assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
2814bf215546Sopenharmony_ci         return false;
2815bf215546Sopenharmony_ci      }
2816bf215546Sopenharmony_ci      if (parent->type == nir_instr_type_tex)
2817bf215546Sopenharmony_ci         return true;
2818bf215546Sopenharmony_ci      assert(parent->type == nir_instr_type_alu);
2819bf215546Sopenharmony_ci      nir_alu_instr *alu = nir_instr_as_alu(parent);
2820bf215546Sopenharmony_ci      parent = alu->src[0].src.ssa->parent_instr;
2821bf215546Sopenharmony_ci   }
2822bf215546Sopenharmony_ci}
2823bf215546Sopenharmony_ci
2824bf215546Sopenharmony_cistatic bool
2825bf215546Sopenharmony_cilower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
2826bf215546Sopenharmony_ci{
2827bf215546Sopenharmony_ci   if (in->type != nir_instr_type_intrinsic)
2828bf215546Sopenharmony_ci      return false;
2829bf215546Sopenharmony_ci   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2830bf215546Sopenharmony_ci   if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
2831bf215546Sopenharmony_ci      b->cursor = nir_before_instr(&instr->instr);
2832bf215546Sopenharmony_ci      nir_ssa_def *src0;
2833bf215546Sopenharmony_ci      if (is_residency_code(instr->src[0].ssa))
2834bf215546Sopenharmony_ci         src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
2835bf215546Sopenharmony_ci      else
2836bf215546Sopenharmony_ci         src0 = instr->src[0].ssa;
2837bf215546Sopenharmony_ci      nir_ssa_def *src1;
2838bf215546Sopenharmony_ci      if (is_residency_code(instr->src[1].ssa))
2839bf215546Sopenharmony_ci         src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
2840bf215546Sopenharmony_ci      else
2841bf215546Sopenharmony_ci         src1 = instr->src[1].ssa;
2842bf215546Sopenharmony_ci      nir_ssa_def *def = nir_iand(b, src0, src1);
2843bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
2844bf215546Sopenharmony_ci      nir_instr_remove(in);
2845bf215546Sopenharmony_ci      return true;
2846bf215546Sopenharmony_ci   }
2847bf215546Sopenharmony_ci   if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
2848bf215546Sopenharmony_ci      return false;
2849bf215546Sopenharmony_ci
2850bf215546Sopenharmony_ci   /* vulkan vec can only be a vec4, but this is (maybe) vec5,
2851bf215546Sopenharmony_ci    * so just rewrite as the first component since ntv is going to use a different
2852bf215546Sopenharmony_ci    * method for storing the residency value anyway
2853bf215546Sopenharmony_ci    */
2854bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&instr->instr);
2855bf215546Sopenharmony_ci   nir_instr *parent = instr->src[0].ssa->parent_instr;
2856bf215546Sopenharmony_ci   if (is_residency_code(instr->src[0].ssa)) {
2857bf215546Sopenharmony_ci      assert(parent->type == nir_instr_type_alu);
2858bf215546Sopenharmony_ci      nir_alu_instr *alu = nir_instr_as_alu(parent);
2859bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
2860bf215546Sopenharmony_ci      nir_instr_remove(parent);
2861bf215546Sopenharmony_ci   } else {
2862bf215546Sopenharmony_ci      nir_ssa_def *src;
2863bf215546Sopenharmony_ci      if (parent->type == nir_instr_type_intrinsic) {
2864bf215546Sopenharmony_ci         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
2865bf215546Sopenharmony_ci         assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
2866bf215546Sopenharmony_ci         src = intr->src[0].ssa;
2867bf215546Sopenharmony_ci      } else {
2868bf215546Sopenharmony_ci         assert(parent->type == nir_instr_type_alu);
2869bf215546Sopenharmony_ci         nir_alu_instr *alu = nir_instr_as_alu(parent);
2870bf215546Sopenharmony_ci         src = alu->src[0].src.ssa;
2871bf215546Sopenharmony_ci      }
2872bf215546Sopenharmony_ci      if (instr->dest.ssa.bit_size != 32) {
2873bf215546Sopenharmony_ci         if (instr->dest.ssa.bit_size == 1)
2874bf215546Sopenharmony_ci            src = nir_ieq_imm(b, src, 1);
2875bf215546Sopenharmony_ci         else
2876bf215546Sopenharmony_ci            src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
2877bf215546Sopenharmony_ci      }
2878bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
2879bf215546Sopenharmony_ci      nir_instr_remove(in);
2880bf215546Sopenharmony_ci   }
2881bf215546Sopenharmony_ci   return true;
2882bf215546Sopenharmony_ci}
2883bf215546Sopenharmony_ci
2884bf215546Sopenharmony_cistatic bool
2885bf215546Sopenharmony_cilower_sparse(nir_shader *shader)
2886bf215546Sopenharmony_ci{
2887bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
2888bf215546Sopenharmony_ci}
2889bf215546Sopenharmony_ci
2890bf215546Sopenharmony_cistatic bool
2891bf215546Sopenharmony_cimatch_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
2892bf215546Sopenharmony_ci{
2893bf215546Sopenharmony_ci   if (in->type != nir_instr_type_tex)
2894bf215546Sopenharmony_ci      return false;
2895bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_instr_as_tex(in);
2896bf215546Sopenharmony_ci   if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
2897bf215546Sopenharmony_ci      return false;
2898bf215546Sopenharmony_ci   int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
2899bf215546Sopenharmony_ci   nir_variable *var = NULL;
2900bf215546Sopenharmony_ci   if (handle != -1) {
2901bf215546Sopenharmony_ci      var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
2902bf215546Sopenharmony_ci   } else {
2903bf215546Sopenharmony_ci      nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
2904bf215546Sopenharmony_ci         if (glsl_type_is_sampler(glsl_without_array(img->type))) {
2905bf215546Sopenharmony_ci            unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
2906bf215546Sopenharmony_ci            if (tex->texture_index >= img->data.driver_location &&
2907bf215546Sopenharmony_ci                tex->texture_index < img->data.driver_location + size) {
2908bf215546Sopenharmony_ci               var = img;
2909bf215546Sopenharmony_ci               break;
2910bf215546Sopenharmony_ci            }
2911bf215546Sopenharmony_ci         }
2912bf215546Sopenharmony_ci      }
2913bf215546Sopenharmony_ci   }
2914bf215546Sopenharmony_ci   assert(var);
2915bf215546Sopenharmony_ci   const struct glsl_type *type = glsl_without_array(var->type);
2916bf215546Sopenharmony_ci   enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
2917bf215546Sopenharmony_ci   bool is_int = glsl_base_type_is_integer(ret_type);
2918bf215546Sopenharmony_ci   unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
2919bf215546Sopenharmony_ci   unsigned dest_size = nir_dest_bit_size(tex->dest);
2920bf215546Sopenharmony_ci   b->cursor = nir_after_instr(in);
2921bf215546Sopenharmony_ci   unsigned num_components = nir_dest_num_components(tex->dest);
2922bf215546Sopenharmony_ci   bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
2923bf215546Sopenharmony_ci   if (bit_size == dest_size && !rewrite_depth)
2924bf215546Sopenharmony_ci      return false;
2925bf215546Sopenharmony_ci   nir_ssa_def *dest = &tex->dest.ssa;
2926bf215546Sopenharmony_ci   if (bit_size != dest_size) {
2927bf215546Sopenharmony_ci      tex->dest.ssa.bit_size = bit_size;
2928bf215546Sopenharmony_ci      tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
2929bf215546Sopenharmony_ci      if (rewrite_depth) {
2930bf215546Sopenharmony_ci         assert(!tex->is_new_style_shadow);
2931bf215546Sopenharmony_ci         tex->dest.ssa.num_components = 1;
2932bf215546Sopenharmony_ci         tex->is_new_style_shadow = true;
2933bf215546Sopenharmony_ci      }
2934bf215546Sopenharmony_ci
2935bf215546Sopenharmony_ci      if (is_int) {
2936bf215546Sopenharmony_ci         if (glsl_unsigned_base_type_of(ret_type) == ret_type)
2937bf215546Sopenharmony_ci            dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
2938bf215546Sopenharmony_ci         else
2939bf215546Sopenharmony_ci            dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
2940bf215546Sopenharmony_ci      } else {
2941bf215546Sopenharmony_ci         dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
2942bf215546Sopenharmony_ci      }
2943bf215546Sopenharmony_ci      if (rewrite_depth) {
2944bf215546Sopenharmony_ci         nir_ssa_def *vec[4] = {dest, dest, dest, dest};
2945bf215546Sopenharmony_ci         dest = nir_vec(b, vec, num_components);
2946bf215546Sopenharmony_ci      }
2947bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
2948bf215546Sopenharmony_ci   } else if (rewrite_depth) {
2949bf215546Sopenharmony_ci      assert(!tex->is_new_style_shadow);
2950bf215546Sopenharmony_ci      tex->dest.ssa.num_components = 1;
2951bf215546Sopenharmony_ci      tex->is_new_style_shadow = true;
2952bf215546Sopenharmony_ci      nir_ssa_def *vec[4] = {dest, dest, dest, dest};
2953bf215546Sopenharmony_ci      nir_ssa_def *splat = nir_vec(b, vec, num_components);
2954bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
2955bf215546Sopenharmony_ci   }
2956bf215546Sopenharmony_ci   return true;
2957bf215546Sopenharmony_ci}
2958bf215546Sopenharmony_ci
2959bf215546Sopenharmony_cistatic bool
2960bf215546Sopenharmony_cimatch_tex_dests(nir_shader *shader)
2961bf215546Sopenharmony_ci{
2962bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, NULL);
2963bf215546Sopenharmony_ci}
2964bf215546Sopenharmony_ci
2965bf215546Sopenharmony_cistatic bool
2966bf215546Sopenharmony_cisplit_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
2967bf215546Sopenharmony_ci{
2968bf215546Sopenharmony_ci   if (in->type != nir_instr_type_alu)
2969bf215546Sopenharmony_ci      return false;
2970bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_instr_as_alu(in);
2971bf215546Sopenharmony_ci   switch (alu->op) {
2972bf215546Sopenharmony_ci   case nir_op_ubitfield_extract:
2973bf215546Sopenharmony_ci   case nir_op_ibitfield_extract:
2974bf215546Sopenharmony_ci   case nir_op_bitfield_insert:
2975bf215546Sopenharmony_ci      break;
2976bf215546Sopenharmony_ci   default:
2977bf215546Sopenharmony_ci      return false;
2978bf215546Sopenharmony_ci   }
2979bf215546Sopenharmony_ci   unsigned num_components = nir_dest_num_components(alu->dest.dest);
2980bf215546Sopenharmony_ci   if (num_components == 1)
2981bf215546Sopenharmony_ci      return false;
2982bf215546Sopenharmony_ci   b->cursor = nir_before_instr(in);
2983bf215546Sopenharmony_ci   nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
2984bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_components; i++) {
2985bf215546Sopenharmony_ci      if (alu->op == nir_op_bitfield_insert)
2986bf215546Sopenharmony_ci         dests[i] = nir_bitfield_insert(b,
2987bf215546Sopenharmony_ci                                        nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2988bf215546Sopenharmony_ci                                        nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
2989bf215546Sopenharmony_ci                                        nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
2990bf215546Sopenharmony_ci                                        nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
2991bf215546Sopenharmony_ci      else if (alu->op == nir_op_ubitfield_extract)
2992bf215546Sopenharmony_ci         dests[i] = nir_ubitfield_extract(b,
2993bf215546Sopenharmony_ci                                          nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2994bf215546Sopenharmony_ci                                          nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
2995bf215546Sopenharmony_ci                                          nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
2996bf215546Sopenharmony_ci      else
2997bf215546Sopenharmony_ci         dests[i] = nir_ibitfield_extract(b,
2998bf215546Sopenharmony_ci                                          nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2999bf215546Sopenharmony_ci                                          nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
3000bf215546Sopenharmony_ci                                          nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
3001bf215546Sopenharmony_ci   }
3002bf215546Sopenharmony_ci   nir_ssa_def *dest = nir_vec(b, dests, num_components);
3003bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
3004bf215546Sopenharmony_ci   nir_instr_remove(in);
3005bf215546Sopenharmony_ci   return true;
3006bf215546Sopenharmony_ci}
3007bf215546Sopenharmony_ci
3008bf215546Sopenharmony_ci
3009bf215546Sopenharmony_cistatic bool
3010bf215546Sopenharmony_cisplit_bitfields(nir_shader *shader)
3011bf215546Sopenharmony_ci{
3012bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
3013bf215546Sopenharmony_ci}
3014bf215546Sopenharmony_ci
3015bf215546Sopenharmony_cistruct zink_shader *
3016bf215546Sopenharmony_cizink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
3017bf215546Sopenharmony_ci                   const struct pipe_stream_output_info *so_info)
3018bf215546Sopenharmony_ci{
3019bf215546Sopenharmony_ci   struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
3020bf215546Sopenharmony_ci   bool have_psiz = false;
3021bf215546Sopenharmony_ci
3022bf215546Sopenharmony_ci   ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
3023bf215546Sopenharmony_ci
3024bf215546Sopenharmony_ci   ret->hash = _mesa_hash_pointer(ret);
3025bf215546Sopenharmony_ci   ret->reduced_prim = get_shader_base_prim_type(nir);
3026bf215546Sopenharmony_ci
3027bf215546Sopenharmony_ci   ret->programs = _mesa_pointer_set_create(NULL);
3028bf215546Sopenharmony_ci   simple_mtx_init(&ret->lock, mtx_plain);
3029bf215546Sopenharmony_ci
3030bf215546Sopenharmony_ci   nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
3031bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
3032bf215546Sopenharmony_ci       nir->info.stage == MESA_SHADER_TESS_EVAL)
3033bf215546Sopenharmony_ci      indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
3034bf215546Sopenharmony_ci
3035bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
3036bf215546Sopenharmony_ci              UINT32_MAX);
3037bf215546Sopenharmony_ci
3038bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_VERTEX)
3039bf215546Sopenharmony_ci      create_vs_pushconst(nir);
3040bf215546Sopenharmony_ci   else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
3041bf215546Sopenharmony_ci            nir->info.stage == MESA_SHADER_TESS_EVAL)
3042bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
3043bf215546Sopenharmony_ci   else if (nir->info.stage == MESA_SHADER_KERNEL)
3044bf215546Sopenharmony_ci      create_cs_pushconst(nir);
3045bf215546Sopenharmony_ci
3046bf215546Sopenharmony_ci   if (nir->info.stage < MESA_SHADER_FRAGMENT)
3047bf215546Sopenharmony_ci      have_psiz = check_psiz(nir);
3048bf215546Sopenharmony_ci   NIR_PASS_V(nir, lower_basevertex);
3049bf215546Sopenharmony_ci   NIR_PASS_V(nir, lower_work_dim);
3050bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3051bf215546Sopenharmony_ci   NIR_PASS_V(nir, lower_baseinstance);
3052bf215546Sopenharmony_ci   NIR_PASS_V(nir, lower_sparse);
3053bf215546Sopenharmony_ci   NIR_PASS_V(nir, split_bitfields);
3054bf215546Sopenharmony_ci
3055bf215546Sopenharmony_ci   if (screen->need_2D_zs)
3056bf215546Sopenharmony_ci      NIR_PASS_V(nir, lower_1d_shadow, screen);
3057bf215546Sopenharmony_ci
3058bf215546Sopenharmony_ci   {
3059bf215546Sopenharmony_ci      nir_lower_subgroups_options subgroup_options = {0};
3060bf215546Sopenharmony_ci      subgroup_options.lower_to_scalar = true;
3061bf215546Sopenharmony_ci      subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
3062bf215546Sopenharmony_ci      subgroup_options.ballot_bit_size = 32;
3063bf215546Sopenharmony_ci      subgroup_options.ballot_components = 4;
3064bf215546Sopenharmony_ci      subgroup_options.lower_subgroup_masks = true;
3065bf215546Sopenharmony_ci      if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(nir->info.stage))) {
3066bf215546Sopenharmony_ci         subgroup_options.subgroup_size = 1;
3067bf215546Sopenharmony_ci         subgroup_options.lower_vote_trivial = true;
3068bf215546Sopenharmony_ci      }
3069bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3070bf215546Sopenharmony_ci   }
3071bf215546Sopenharmony_ci
3072bf215546Sopenharmony_ci   if (so_info && so_info->num_outputs)
3073bf215546Sopenharmony_ci      NIR_PASS_V(nir, split_blocks);
3074bf215546Sopenharmony_ci
3075bf215546Sopenharmony_ci   optimize_nir(nir, NULL);
3076bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3077bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_discard_if);
3078bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_fragcolor,
3079bf215546Sopenharmony_ci         nir->info.fs.color_is_dual_source ? 1 : 8);
3080bf215546Sopenharmony_ci   NIR_PASS_V(nir, lower_64bit_vertex_attribs);
3081bf215546Sopenharmony_ci   bool needs_size = analyze_io(ret, nir);
3082bf215546Sopenharmony_ci   NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
3083bf215546Sopenharmony_ci   /* run in compile if there could be inlined uniforms */
3084bf215546Sopenharmony_ci   if (!screen->driconf.inline_uniforms) {
3085bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3086bf215546Sopenharmony_ci      NIR_PASS_V(nir, rewrite_bo_access, screen);
3087bf215546Sopenharmony_ci      NIR_PASS_V(nir, remove_bo_access, ret);
3088bf215546Sopenharmony_ci   }
3089bf215546Sopenharmony_ci
3090bf215546Sopenharmony_ci   if (zink_debug & ZINK_DEBUG_NIR) {
3091bf215546Sopenharmony_ci      fprintf(stderr, "NIR shader:\n---8<---\n");
3092bf215546Sopenharmony_ci      nir_print_shader(nir, stderr);
3093bf215546Sopenharmony_ci      fprintf(stderr, "---8<---\n");
3094bf215546Sopenharmony_ci   }
3095bf215546Sopenharmony_ci
3096bf215546Sopenharmony_ci   struct zink_bindless_info bindless = {0};
3097bf215546Sopenharmony_ci   bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
3098bf215546Sopenharmony_ci   bool has_bindless_io = false;
3099bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
3100bf215546Sopenharmony_ci      var->data.is_xfb = false;
3101bf215546Sopenharmony_ci      if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
3102bf215546Sopenharmony_ci         has_bindless_io = true;
3103bf215546Sopenharmony_ci         break;
3104bf215546Sopenharmony_ci      }
3105bf215546Sopenharmony_ci   }
3106bf215546Sopenharmony_ci   if (has_bindless_io)
3107bf215546Sopenharmony_ci      NIR_PASS_V(nir, lower_bindless_io);
3108bf215546Sopenharmony_ci
3109bf215546Sopenharmony_ci   optimize_nir(nir, NULL);
3110bf215546Sopenharmony_ci   prune_io(nir);
3111bf215546Sopenharmony_ci
3112bf215546Sopenharmony_ci   scan_nir(screen, nir, ret);
3113bf215546Sopenharmony_ci
3114bf215546Sopenharmony_ci   foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
3115bf215546Sopenharmony_ci      if (_nir_shader_variable_has_mode(var, nir_var_uniform |
3116bf215546Sopenharmony_ci                                        nir_var_image |
3117bf215546Sopenharmony_ci                                        nir_var_mem_ubo |
3118bf215546Sopenharmony_ci                                        nir_var_mem_ssbo)) {
3119bf215546Sopenharmony_ci         enum zink_descriptor_type ztype;
3120bf215546Sopenharmony_ci         const struct glsl_type *type = glsl_without_array(var->type);
3121bf215546Sopenharmony_ci         if (var->data.mode == nir_var_mem_ubo) {
3122bf215546Sopenharmony_ci            ztype = ZINK_DESCRIPTOR_TYPE_UBO;
3123bf215546Sopenharmony_ci            /* buffer 0 is a push descriptor */
3124bf215546Sopenharmony_ci            var->data.descriptor_set = !!var->data.driver_location;
3125bf215546Sopenharmony_ci            var->data.binding = !var->data.driver_location ? nir->info.stage :
3126bf215546Sopenharmony_ci                                zink_binding(nir->info.stage,
3127bf215546Sopenharmony_ci                                             VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
3128bf215546Sopenharmony_ci                                             var->data.driver_location,
3129bf215546Sopenharmony_ci                                             screen->compact_descriptors);
3130bf215546Sopenharmony_ci            assert(var->data.driver_location || var->data.binding < 10);
3131bf215546Sopenharmony_ci            VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
3132bf215546Sopenharmony_ci            int binding = var->data.binding;
3133bf215546Sopenharmony_ci
3134bf215546Sopenharmony_ci            ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3135bf215546Sopenharmony_ci            ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
3136bf215546Sopenharmony_ci            ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
3137bf215546Sopenharmony_ci            ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
3138bf215546Sopenharmony_ci            assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
3139bf215546Sopenharmony_ci            ret->num_bindings[ztype]++;
3140bf215546Sopenharmony_ci         } else if (var->data.mode == nir_var_mem_ssbo) {
3141bf215546Sopenharmony_ci            ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
3142bf215546Sopenharmony_ci            var->data.descriptor_set = screen->desc_set_id[ztype];
3143bf215546Sopenharmony_ci            var->data.binding = zink_binding(nir->info.stage,
3144bf215546Sopenharmony_ci                                             VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
3145bf215546Sopenharmony_ci                                             var->data.driver_location,
3146bf215546Sopenharmony_ci                                             screen->compact_descriptors);
3147bf215546Sopenharmony_ci            ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3148bf215546Sopenharmony_ci            ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
3149bf215546Sopenharmony_ci            ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3150bf215546Sopenharmony_ci            ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
3151bf215546Sopenharmony_ci            assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
3152bf215546Sopenharmony_ci            ret->num_bindings[ztype]++;
3153bf215546Sopenharmony_ci         } else {
3154bf215546Sopenharmony_ci            assert(var->data.mode == nir_var_uniform ||
3155bf215546Sopenharmony_ci                   var->data.mode == nir_var_image);
3156bf215546Sopenharmony_ci            if (var->data.bindless) {
3157bf215546Sopenharmony_ci               ret->bindless = true;
3158bf215546Sopenharmony_ci               handle_bindless_var(nir, var, type, &bindless);
3159bf215546Sopenharmony_ci            } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
3160bf215546Sopenharmony_ci               VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
3161bf215546Sopenharmony_ci               ztype = zink_desc_type_from_vktype(vktype);
3162bf215546Sopenharmony_ci               if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
3163bf215546Sopenharmony_ci                  ret->num_texel_buffers++;
3164bf215546Sopenharmony_ci               var->data.driver_location = var->data.binding;
3165bf215546Sopenharmony_ci               var->data.descriptor_set = screen->desc_set_id[ztype];
3166bf215546Sopenharmony_ci               var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
3167bf215546Sopenharmony_ci               ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3168bf215546Sopenharmony_ci               ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
3169bf215546Sopenharmony_ci               ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
3170bf215546Sopenharmony_ci               if (glsl_type_is_array(var->type))
3171bf215546Sopenharmony_ci                  ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
3172bf215546Sopenharmony_ci               else
3173bf215546Sopenharmony_ci                  ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
3174bf215546Sopenharmony_ci               ret->num_bindings[ztype]++;
3175bf215546Sopenharmony_ci            }
3176bf215546Sopenharmony_ci         }
3177bf215546Sopenharmony_ci      }
3178bf215546Sopenharmony_ci   }
3179bf215546Sopenharmony_ci   bool bindless_lowered = false;
3180bf215546Sopenharmony_ci   NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
3181bf215546Sopenharmony_ci   ret->bindless |= bindless_lowered;
3182bf215546Sopenharmony_ci
3183bf215546Sopenharmony_ci   if (!screen->info.feats.features.shaderInt64)
3184bf215546Sopenharmony_ci      NIR_PASS_V(nir, lower_64bit_vars);
3185bf215546Sopenharmony_ci   NIR_PASS_V(nir, match_tex_dests);
3186bf215546Sopenharmony_ci
3187bf215546Sopenharmony_ci   ret->nir = nir;
3188bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(var, nir)
3189bf215546Sopenharmony_ci      var->data.explicit_xfb_buffer = 0;
3190bf215546Sopenharmony_ci   if (so_info && so_info->num_outputs)
3191bf215546Sopenharmony_ci      update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
3192bf215546Sopenharmony_ci   else if (have_psiz) {
3193bf215546Sopenharmony_ci      bool have_fake_psiz = false;
3194bf215546Sopenharmony_ci      nir_variable *psiz = NULL;
3195bf215546Sopenharmony_ci      nir_foreach_shader_out_variable(var, nir) {
3196bf215546Sopenharmony_ci         if (var->data.location == VARYING_SLOT_PSIZ) {
3197bf215546Sopenharmony_ci            if (!var->data.explicit_location)
3198bf215546Sopenharmony_ci               have_fake_psiz = true;
3199bf215546Sopenharmony_ci            else
3200bf215546Sopenharmony_ci               psiz = var;
3201bf215546Sopenharmony_ci         }
3202bf215546Sopenharmony_ci      }
3203bf215546Sopenharmony_ci      if (have_fake_psiz && psiz) {
3204bf215546Sopenharmony_ci         psiz->data.mode = nir_var_shader_temp;
3205bf215546Sopenharmony_ci         nir_fixup_deref_modes(nir);
3206bf215546Sopenharmony_ci         NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3207bf215546Sopenharmony_ci      }
3208bf215546Sopenharmony_ci   }
3209bf215546Sopenharmony_ci
3210bf215546Sopenharmony_ci   ret->can_inline = true;
3211bf215546Sopenharmony_ci
3212bf215546Sopenharmony_ci   return ret;
3213bf215546Sopenharmony_ci}
3214bf215546Sopenharmony_ci
3215bf215546Sopenharmony_cichar *
3216bf215546Sopenharmony_cizink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
3217bf215546Sopenharmony_ci{
3218bf215546Sopenharmony_ci   struct zink_screen *screen = zink_screen(pscreen);
3219bf215546Sopenharmony_ci   nir_shader *nir = nirptr;
3220bf215546Sopenharmony_ci
3221bf215546Sopenharmony_ci   nir_lower_tex_options tex_opts = {
3222bf215546Sopenharmony_ci      .lower_invalid_implicit_lod = true,
3223bf215546Sopenharmony_ci   };
3224bf215546Sopenharmony_ci   /*
3225bf215546Sopenharmony_ci      Sampled Image must be an object whose type is OpTypeSampledImage.
3226bf215546Sopenharmony_ci      The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
3227bf215546Sopenharmony_ci      or Rect, and the Arrayed and MS operands must be 0.
3228bf215546Sopenharmony_ci      - SPIRV, OpImageSampleProj* opcodes
3229bf215546Sopenharmony_ci    */
3230bf215546Sopenharmony_ci   tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
3231bf215546Sopenharmony_ci                        BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
3232bf215546Sopenharmony_ci   tex_opts.lower_txp_array = true;
3233bf215546Sopenharmony_ci   if (!screen->info.feats.features.shaderImageGatherExtended)
3234bf215546Sopenharmony_ci      tex_opts.lower_tg4_offsets = true;
3235bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
3236bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_GEOMETRY)
3237bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
3238bf215546Sopenharmony_ci   optimize_nir(nir, NULL);
3239bf215546Sopenharmony_ci   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
3240bf215546Sopenharmony_ci   if (screen->driconf.inline_uniforms)
3241bf215546Sopenharmony_ci      nir_find_inlinable_uniforms(nir);
3242bf215546Sopenharmony_ci
3243bf215546Sopenharmony_ci   return NULL;
3244bf215546Sopenharmony_ci}
3245bf215546Sopenharmony_ci
3246bf215546Sopenharmony_civoid
3247bf215546Sopenharmony_cizink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
3248bf215546Sopenharmony_ci{
3249bf215546Sopenharmony_ci   set_foreach(shader->programs, entry) {
3250bf215546Sopenharmony_ci      if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
3251bf215546Sopenharmony_ci         struct zink_compute_program *comp = (void*)entry->key;
3252bf215546Sopenharmony_ci         if (!comp->base.removed) {
3253bf215546Sopenharmony_ci            _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
3254bf215546Sopenharmony_ci            comp->base.removed = true;
3255bf215546Sopenharmony_ci         }
3256bf215546Sopenharmony_ci         comp->shader = NULL;
3257bf215546Sopenharmony_ci         zink_compute_program_reference(ctx, &comp, NULL);
3258bf215546Sopenharmony_ci      } else {
3259bf215546Sopenharmony_ci         struct zink_gfx_program *prog = (void*)entry->key;
3260bf215546Sopenharmony_ci         enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
3261bf215546Sopenharmony_ci         assert(pstage < ZINK_SHADER_COUNT);
3262bf215546Sopenharmony_ci         if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
3263bf215546Sopenharmony_ci            unsigned stages_present = prog->stages_present;
3264bf215546Sopenharmony_ci            if (prog->shaders[PIPE_SHADER_TESS_CTRL] && prog->shaders[PIPE_SHADER_TESS_CTRL]->is_generated)
3265bf215546Sopenharmony_ci               stages_present &= ~BITFIELD_BIT(PIPE_SHADER_TESS_CTRL);
3266bf215546Sopenharmony_ci            struct hash_table *ht = &ctx->program_cache[stages_present >> 2];
3267bf215546Sopenharmony_ci            struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
3268bf215546Sopenharmony_ci            assert(he);
3269bf215546Sopenharmony_ci            _mesa_hash_table_remove(ht, he);
3270bf215546Sopenharmony_ci            prog->base.removed = true;
3271bf215546Sopenharmony_ci         }
3272bf215546Sopenharmony_ci         if (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)
3273bf215546Sopenharmony_ci            prog->shaders[pstage] = NULL;
3274bf215546Sopenharmony_ci         /* only remove generated tcs during parent tes destruction */
3275bf215546Sopenharmony_ci         if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
3276bf215546Sopenharmony_ci            prog->shaders[PIPE_SHADER_TESS_CTRL] = NULL;
3277bf215546Sopenharmony_ci         zink_gfx_program_reference(ctx, &prog, NULL);
3278bf215546Sopenharmony_ci      }
3279bf215546Sopenharmony_ci   }
3280bf215546Sopenharmony_ci   if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) {
3281bf215546Sopenharmony_ci      /* automatically destroy generated tcs shaders when tes is destroyed */
3282bf215546Sopenharmony_ci      zink_shader_free(ctx, shader->generated);
3283bf215546Sopenharmony_ci      shader->generated = NULL;
3284bf215546Sopenharmony_ci   }
3285bf215546Sopenharmony_ci   _mesa_set_destroy(shader->programs, NULL);
3286bf215546Sopenharmony_ci   ralloc_free(shader->nir);
3287bf215546Sopenharmony_ci   ralloc_free(shader->spirv);
3288bf215546Sopenharmony_ci   FREE(shader);
3289bf215546Sopenharmony_ci}
3290bf215546Sopenharmony_ci
3291bf215546Sopenharmony_ci
3292bf215546Sopenharmony_ciVkShaderModule
3293bf215546Sopenharmony_cizink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
3294bf215546Sopenharmony_ci{
3295bf215546Sopenharmony_ci   assert(zs->nir->info.stage == MESA_SHADER_TESS_CTRL);
3296bf215546Sopenharmony_ci   /* shortcut all the nir passes since we just have to change this one word */
3297bf215546Sopenharmony_ci   zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
3298bf215546Sopenharmony_ci   return zink_shader_spirv_compile(screen, zs, NULL);
3299bf215546Sopenharmony_ci}
3300bf215546Sopenharmony_ci
3301bf215546Sopenharmony_ci/* creating a passthrough tcs shader that's roughly:
3302bf215546Sopenharmony_ci
3303bf215546Sopenharmony_ci#version 150
3304bf215546Sopenharmony_ci#extension GL_ARB_tessellation_shader : require
3305bf215546Sopenharmony_ci
3306bf215546Sopenharmony_ciin vec4 some_var[gl_MaxPatchVertices];
3307bf215546Sopenharmony_ciout vec4 some_var_out;
3308bf215546Sopenharmony_ci
3309bf215546Sopenharmony_cilayout(push_constant) uniform tcsPushConstants {
3310bf215546Sopenharmony_ci    layout(offset = 0) float TessLevelInner[2];
3311bf215546Sopenharmony_ci    layout(offset = 8) float TessLevelOuter[4];
3312bf215546Sopenharmony_ci} u_tcsPushConstants;
3313bf215546Sopenharmony_cilayout(vertices = $vertices_per_patch) out;
3314bf215546Sopenharmony_civoid main()
3315bf215546Sopenharmony_ci{
3316bf215546Sopenharmony_ci  gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
3317bf215546Sopenharmony_ci  gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
3318bf215546Sopenharmony_ci  some_var_out = some_var[gl_InvocationID];
3319bf215546Sopenharmony_ci}
3320bf215546Sopenharmony_ci
3321bf215546Sopenharmony_ci*/
3322bf215546Sopenharmony_cistruct zink_shader *
3323bf215546Sopenharmony_cizink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
3324bf215546Sopenharmony_ci{
3325bf215546Sopenharmony_ci   struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
3326bf215546Sopenharmony_ci   ret->hash = _mesa_hash_pointer(ret);
3327bf215546Sopenharmony_ci   ret->programs = _mesa_pointer_set_create(NULL);
3328bf215546Sopenharmony_ci   simple_mtx_init(&ret->lock, mtx_plain);
3329bf215546Sopenharmony_ci
3330bf215546Sopenharmony_ci   nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
3331bf215546Sopenharmony_ci   nir_function *fn = nir_function_create(nir, "main");
3332bf215546Sopenharmony_ci   fn->is_entrypoint = true;
3333bf215546Sopenharmony_ci   nir_function_impl *impl = nir_function_impl_create(fn);
3334bf215546Sopenharmony_ci
3335bf215546Sopenharmony_ci   nir_builder b;
3336bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
3337bf215546Sopenharmony_ci   b.cursor = nir_before_block(nir_start_block(impl));
3338bf215546Sopenharmony_ci
3339bf215546Sopenharmony_ci   nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
3340bf215546Sopenharmony_ci
3341bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(var, vs->nir) {
3342bf215546Sopenharmony_ci      const struct glsl_type *type = var->type;
3343bf215546Sopenharmony_ci      const struct glsl_type *in_type = var->type;
3344bf215546Sopenharmony_ci      const struct glsl_type *out_type = var->type;
3345bf215546Sopenharmony_ci      char buf[1024];
3346bf215546Sopenharmony_ci      snprintf(buf, sizeof(buf), "%s_out", var->name);
3347bf215546Sopenharmony_ci      in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
3348bf215546Sopenharmony_ci      out_type = glsl_array_type(type, vertices_per_patch, 0);
3349bf215546Sopenharmony_ci
3350bf215546Sopenharmony_ci      nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
3351bf215546Sopenharmony_ci      nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
3352bf215546Sopenharmony_ci      out->data.location = in->data.location = var->data.location;
3353bf215546Sopenharmony_ci      out->data.location_frac = in->data.location_frac = var->data.location_frac;
3354bf215546Sopenharmony_ci
3355bf215546Sopenharmony_ci      /* gl_in[] receives values from equivalent built-in output
3356bf215546Sopenharmony_ci         variables written by the vertex shader (section 2.14.7).  Each array
3357bf215546Sopenharmony_ci         element of gl_in[] is a structure holding values for a specific vertex of
3358bf215546Sopenharmony_ci         the input patch.  The length of gl_in[] is equal to the
3359bf215546Sopenharmony_ci         implementation-dependent maximum patch size (gl_MaxPatchVertices).
3360bf215546Sopenharmony_ci         - ARB_tessellation_shader
3361bf215546Sopenharmony_ci       */
3362bf215546Sopenharmony_ci      /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
3363bf215546Sopenharmony_ci      nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
3364bf215546Sopenharmony_ci      nir_ssa_def *load = nir_load_deref(&b, in_array_var);
3365bf215546Sopenharmony_ci      nir_deref_instr *out_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
3366bf215546Sopenharmony_ci      nir_store_deref(&b, out_array_var, load, 0xff);
3367bf215546Sopenharmony_ci   }
3368bf215546Sopenharmony_ci   nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
3369bf215546Sopenharmony_ci   gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
3370bf215546Sopenharmony_ci   gl_TessLevelInner->data.patch = 1;
3371bf215546Sopenharmony_ci   nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
3372bf215546Sopenharmony_ci   gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
3373bf215546Sopenharmony_ci   gl_TessLevelOuter->data.patch = 1;
3374bf215546Sopenharmony_ci
3375bf215546Sopenharmony_ci   /* hacks so we can size these right for now */
3376bf215546Sopenharmony_ci   struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
3377bf215546Sopenharmony_ci   /* just use a single blob for padding here because it's easier */
3378bf215546Sopenharmony_ci   fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
3379bf215546Sopenharmony_ci   fields[0].name = ralloc_asprintf(nir, "padding");
3380bf215546Sopenharmony_ci   fields[0].offset = 0;
3381bf215546Sopenharmony_ci   fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
3382bf215546Sopenharmony_ci   fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
3383bf215546Sopenharmony_ci   fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
3384bf215546Sopenharmony_ci   fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
3385bf215546Sopenharmony_ci   fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
3386bf215546Sopenharmony_ci   fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
3387bf215546Sopenharmony_ci   nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
3388bf215546Sopenharmony_ci                                                 glsl_struct_type(fields, 3, "struct", false), "pushconst");
3389bf215546Sopenharmony_ci   pushconst->data.location = VARYING_SLOT_VAR0;
3390bf215546Sopenharmony_ci
3391bf215546Sopenharmony_ci   nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
3392bf215546Sopenharmony_ci   nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
3393bf215546Sopenharmony_ci
3394bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; i++) {
3395bf215546Sopenharmony_ci      nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
3396bf215546Sopenharmony_ci      nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
3397bf215546Sopenharmony_ci   }
3398bf215546Sopenharmony_ci   for (unsigned i = 0; i < 4; i++) {
3399bf215546Sopenharmony_ci      nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
3400bf215546Sopenharmony_ci      nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
3401bf215546Sopenharmony_ci   }
3402bf215546Sopenharmony_ci
3403bf215546Sopenharmony_ci   nir->info.tess.tcs_vertices_out = vertices_per_patch;
3404bf215546Sopenharmony_ci   nir_validate_shader(nir, "created");
3405bf215546Sopenharmony_ci
3406bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3407bf215546Sopenharmony_ci   optimize_nir(nir, NULL);
3408bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3409bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_convert_from_ssa, true);
3410bf215546Sopenharmony_ci
3411bf215546Sopenharmony_ci   ret->nir = nir;
3412bf215546Sopenharmony_ci   ret->is_generated = true;
3413bf215546Sopenharmony_ci   return ret;
3414bf215546Sopenharmony_ci}
3415