1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Connor Abbott (cwabbott0@gmail.com)
25bf215546Sopenharmony_ci *    Jason Ekstrand (jason@jlekstrand.net)
26bf215546Sopenharmony_ci *
27bf215546Sopenharmony_ci */
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci/*
30bf215546Sopenharmony_ci * This lowering pass converts references to input/output variables with
31bf215546Sopenharmony_ci * loads/stores to actual input/output intrinsics.
32bf215546Sopenharmony_ci */
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci#include "nir.h"
35bf215546Sopenharmony_ci#include "nir_builder.h"
36bf215546Sopenharmony_ci#include "nir_deref.h"
37bf215546Sopenharmony_ci#include "nir_xfb_info.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci#include "util/u_math.h"
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_cistruct lower_io_state {
42bf215546Sopenharmony_ci   void *dead_ctx;
43bf215546Sopenharmony_ci   nir_builder builder;
44bf215546Sopenharmony_ci   int (*type_size)(const struct glsl_type *type, bool);
45bf215546Sopenharmony_ci   nir_variable_mode modes;
46bf215546Sopenharmony_ci   nir_lower_io_options options;
47bf215546Sopenharmony_ci};
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_cistatic nir_intrinsic_op
50bf215546Sopenharmony_cissbo_atomic_for_deref(nir_intrinsic_op deref_op)
51bf215546Sopenharmony_ci{
52bf215546Sopenharmony_ci   switch (deref_op) {
53bf215546Sopenharmony_ci#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
54bf215546Sopenharmony_ci   OP(atomic_exchange)
55bf215546Sopenharmony_ci   OP(atomic_comp_swap)
56bf215546Sopenharmony_ci   OP(atomic_add)
57bf215546Sopenharmony_ci   OP(atomic_imin)
58bf215546Sopenharmony_ci   OP(atomic_umin)
59bf215546Sopenharmony_ci   OP(atomic_imax)
60bf215546Sopenharmony_ci   OP(atomic_umax)
61bf215546Sopenharmony_ci   OP(atomic_and)
62bf215546Sopenharmony_ci   OP(atomic_or)
63bf215546Sopenharmony_ci   OP(atomic_xor)
64bf215546Sopenharmony_ci   OP(atomic_fadd)
65bf215546Sopenharmony_ci   OP(atomic_fmin)
66bf215546Sopenharmony_ci   OP(atomic_fmax)
67bf215546Sopenharmony_ci   OP(atomic_fcomp_swap)
68bf215546Sopenharmony_ci#undef OP
69bf215546Sopenharmony_ci   default:
70bf215546Sopenharmony_ci      unreachable("Invalid SSBO atomic");
71bf215546Sopenharmony_ci   }
72bf215546Sopenharmony_ci}
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_cistatic nir_intrinsic_op
75bf215546Sopenharmony_ciglobal_atomic_for_deref(nir_address_format addr_format,
76bf215546Sopenharmony_ci                        nir_intrinsic_op deref_op)
77bf215546Sopenharmony_ci{
78bf215546Sopenharmony_ci   switch (deref_op) {
79bf215546Sopenharmony_ci#define OP(O) case nir_intrinsic_deref_##O:              \
80bf215546Sopenharmony_ci   if (addr_format != nir_address_format_2x32bit_global) \
81bf215546Sopenharmony_ci      return nir_intrinsic_global_##O;                   \
82bf215546Sopenharmony_ci   else                                                  \
83bf215546Sopenharmony_ci      return nir_intrinsic_global_##O##_2x32;
84bf215546Sopenharmony_ci   OP(atomic_exchange)
85bf215546Sopenharmony_ci   OP(atomic_comp_swap)
86bf215546Sopenharmony_ci   OP(atomic_add)
87bf215546Sopenharmony_ci   OP(atomic_imin)
88bf215546Sopenharmony_ci   OP(atomic_umin)
89bf215546Sopenharmony_ci   OP(atomic_imax)
90bf215546Sopenharmony_ci   OP(atomic_umax)
91bf215546Sopenharmony_ci   OP(atomic_and)
92bf215546Sopenharmony_ci   OP(atomic_or)
93bf215546Sopenharmony_ci   OP(atomic_xor)
94bf215546Sopenharmony_ci   OP(atomic_fadd)
95bf215546Sopenharmony_ci   OP(atomic_fmin)
96bf215546Sopenharmony_ci   OP(atomic_fmax)
97bf215546Sopenharmony_ci   OP(atomic_fcomp_swap)
98bf215546Sopenharmony_ci#undef OP
99bf215546Sopenharmony_ci   default:
100bf215546Sopenharmony_ci      unreachable("Invalid SSBO atomic");
101bf215546Sopenharmony_ci   }
102bf215546Sopenharmony_ci}
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_cistatic nir_intrinsic_op
105bf215546Sopenharmony_cishared_atomic_for_deref(nir_intrinsic_op deref_op)
106bf215546Sopenharmony_ci{
107bf215546Sopenharmony_ci   switch (deref_op) {
108bf215546Sopenharmony_ci#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
109bf215546Sopenharmony_ci   OP(atomic_exchange)
110bf215546Sopenharmony_ci   OP(atomic_comp_swap)
111bf215546Sopenharmony_ci   OP(atomic_add)
112bf215546Sopenharmony_ci   OP(atomic_imin)
113bf215546Sopenharmony_ci   OP(atomic_umin)
114bf215546Sopenharmony_ci   OP(atomic_imax)
115bf215546Sopenharmony_ci   OP(atomic_umax)
116bf215546Sopenharmony_ci   OP(atomic_and)
117bf215546Sopenharmony_ci   OP(atomic_or)
118bf215546Sopenharmony_ci   OP(atomic_xor)
119bf215546Sopenharmony_ci   OP(atomic_fadd)
120bf215546Sopenharmony_ci   OP(atomic_fmin)
121bf215546Sopenharmony_ci   OP(atomic_fmax)
122bf215546Sopenharmony_ci   OP(atomic_fcomp_swap)
123bf215546Sopenharmony_ci#undef OP
124bf215546Sopenharmony_ci   default:
125bf215546Sopenharmony_ci      unreachable("Invalid shared atomic");
126bf215546Sopenharmony_ci   }
127bf215546Sopenharmony_ci}
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_cistatic nir_intrinsic_op
130bf215546Sopenharmony_citask_payload_atomic_for_deref(nir_intrinsic_op deref_op)
131bf215546Sopenharmony_ci{
132bf215546Sopenharmony_ci   switch (deref_op) {
133bf215546Sopenharmony_ci#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_task_payload_##O;
134bf215546Sopenharmony_ci   OP(atomic_exchange)
135bf215546Sopenharmony_ci   OP(atomic_comp_swap)
136bf215546Sopenharmony_ci   OP(atomic_add)
137bf215546Sopenharmony_ci   OP(atomic_imin)
138bf215546Sopenharmony_ci   OP(atomic_umin)
139bf215546Sopenharmony_ci   OP(atomic_imax)
140bf215546Sopenharmony_ci   OP(atomic_umax)
141bf215546Sopenharmony_ci   OP(atomic_and)
142bf215546Sopenharmony_ci   OP(atomic_or)
143bf215546Sopenharmony_ci   OP(atomic_xor)
144bf215546Sopenharmony_ci   OP(atomic_fadd)
145bf215546Sopenharmony_ci   OP(atomic_fmin)
146bf215546Sopenharmony_ci   OP(atomic_fmax)
147bf215546Sopenharmony_ci   OP(atomic_fcomp_swap)
148bf215546Sopenharmony_ci#undef OP
149bf215546Sopenharmony_ci   default:
150bf215546Sopenharmony_ci      unreachable("Invalid task payload atomic");
151bf215546Sopenharmony_ci   }
152bf215546Sopenharmony_ci}
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_civoid
155bf215546Sopenharmony_cinir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
156bf215546Sopenharmony_ci                         unsigned *size,
157bf215546Sopenharmony_ci                         int (*type_size)(const struct glsl_type *, bool))
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci   unsigned location = 0;
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, shader, mode) {
162bf215546Sopenharmony_ci      var->data.driver_location = location;
163bf215546Sopenharmony_ci      bool bindless_type_size = var->data.mode == nir_var_shader_in ||
164bf215546Sopenharmony_ci                                var->data.mode == nir_var_shader_out ||
165bf215546Sopenharmony_ci                                var->data.bindless;
166bf215546Sopenharmony_ci      location += type_size(var->type, bindless_type_size);
167bf215546Sopenharmony_ci   }
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   *size = location;
170bf215546Sopenharmony_ci}
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci/**
173bf215546Sopenharmony_ci * Some inputs and outputs are arrayed, meaning that there is an extra level
174bf215546Sopenharmony_ci * of array indexing to handle mismatches between the shader interface and the
175bf215546Sopenharmony_ci * dispatch pattern of the shader.  For instance, geometry shaders are
176bf215546Sopenharmony_ci * executed per-primitive while their inputs and outputs are specified
177bf215546Sopenharmony_ci * per-vertex so all inputs and outputs have to be additionally indexed with
178bf215546Sopenharmony_ci * the vertex index within the primitive.
179bf215546Sopenharmony_ci */
180bf215546Sopenharmony_cibool
181bf215546Sopenharmony_cinir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
182bf215546Sopenharmony_ci{
183bf215546Sopenharmony_ci   if (var->data.patch || !glsl_type_is_array(var->type))
184bf215546Sopenharmony_ci      return false;
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   if (stage == MESA_SHADER_MESH) {
187bf215546Sopenharmony_ci      /* NV_mesh_shader: this is flat array for the whole workgroup. */
188bf215546Sopenharmony_ci      if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
189bf215546Sopenharmony_ci         return var->data.per_primitive;
190bf215546Sopenharmony_ci   }
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   if (var->data.mode == nir_var_shader_in)
193bf215546Sopenharmony_ci      return stage == MESA_SHADER_GEOMETRY ||
194bf215546Sopenharmony_ci             stage == MESA_SHADER_TESS_CTRL ||
195bf215546Sopenharmony_ci             stage == MESA_SHADER_TESS_EVAL;
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   if (var->data.mode == nir_var_shader_out)
198bf215546Sopenharmony_ci      return stage == MESA_SHADER_TESS_CTRL ||
199bf215546Sopenharmony_ci             stage == MESA_SHADER_MESH;
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   return false;
202bf215546Sopenharmony_ci}
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_cistatic unsigned get_number_of_slots(struct lower_io_state *state,
205bf215546Sopenharmony_ci                                    const nir_variable *var)
206bf215546Sopenharmony_ci{
207bf215546Sopenharmony_ci   const struct glsl_type *type = var->type;
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
210bf215546Sopenharmony_ci      assert(glsl_type_is_array(type));
211bf215546Sopenharmony_ci      type = glsl_get_array_element(type);
212bf215546Sopenharmony_ci   }
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   /* NV_mesh_shader:
215bf215546Sopenharmony_ci    * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
216bf215546Sopenharmony_ci    * as opposed to D3D-style mesh shaders where it's addressed by
217bf215546Sopenharmony_ci    * the primitive index.
218bf215546Sopenharmony_ci    * Prevent assigning several slots to primitive indices,
219bf215546Sopenharmony_ci    * to avoid some issues.
220bf215546Sopenharmony_ci    */
221bf215546Sopenharmony_ci   if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
222bf215546Sopenharmony_ci       var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
223bf215546Sopenharmony_ci       !nir_is_arrayed_io(var, state->builder.shader->info.stage))
224bf215546Sopenharmony_ci      return 1;
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci   return state->type_size(type, var->data.bindless);
227bf215546Sopenharmony_ci}
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_cistatic nir_ssa_def *
230bf215546Sopenharmony_ciget_io_offset(nir_builder *b, nir_deref_instr *deref,
231bf215546Sopenharmony_ci              nir_ssa_def **array_index,
232bf215546Sopenharmony_ci              int (*type_size)(const struct glsl_type *, bool),
233bf215546Sopenharmony_ci              unsigned *component, bool bts)
234bf215546Sopenharmony_ci{
235bf215546Sopenharmony_ci   nir_deref_path path;
236bf215546Sopenharmony_ci   nir_deref_path_init(&path, deref, NULL);
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   assert(path.path[0]->deref_type == nir_deref_type_var);
239bf215546Sopenharmony_ci   nir_deref_instr **p = &path.path[1];
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci   /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
242bf215546Sopenharmony_ci    * inputs), skip the outermost array index.  Process the rest normally.
243bf215546Sopenharmony_ci    */
244bf215546Sopenharmony_ci   if (array_index != NULL) {
245bf215546Sopenharmony_ci      assert((*p)->deref_type == nir_deref_type_array);
246bf215546Sopenharmony_ci      *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
247bf215546Sopenharmony_ci      p++;
248bf215546Sopenharmony_ci   }
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   if (path.path[0]->var->data.compact) {
251bf215546Sopenharmony_ci      assert((*p)->deref_type == nir_deref_type_array);
252bf215546Sopenharmony_ci      assert(glsl_type_is_scalar((*p)->type));
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci      /* We always lower indirect dereferences for "compact" array vars. */
255bf215546Sopenharmony_ci      const unsigned index = nir_src_as_uint((*p)->arr.index);
256bf215546Sopenharmony_ci      const unsigned total_offset = *component + index;
257bf215546Sopenharmony_ci      const unsigned slot_offset = total_offset / 4;
258bf215546Sopenharmony_ci      *component = total_offset % 4;
259bf215546Sopenharmony_ci      return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
260bf215546Sopenharmony_ci   }
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   /* Just emit code and let constant-folding go to town */
263bf215546Sopenharmony_ci   nir_ssa_def *offset = nir_imm_int(b, 0);
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci   for (; *p; p++) {
266bf215546Sopenharmony_ci      if ((*p)->deref_type == nir_deref_type_array) {
267bf215546Sopenharmony_ci         unsigned size = type_size((*p)->type, bts);
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci         nir_ssa_def *mul =
270bf215546Sopenharmony_ci            nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci         offset = nir_iadd(b, offset, mul);
273bf215546Sopenharmony_ci      } else if ((*p)->deref_type == nir_deref_type_struct) {
274bf215546Sopenharmony_ci         /* p starts at path[1], so this is safe */
275bf215546Sopenharmony_ci         nir_deref_instr *parent = *(p - 1);
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci         unsigned field_offset = 0;
278bf215546Sopenharmony_ci         for (unsigned i = 0; i < (*p)->strct.index; i++) {
279bf215546Sopenharmony_ci            field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
280bf215546Sopenharmony_ci         }
281bf215546Sopenharmony_ci         offset = nir_iadd_imm(b, offset, field_offset);
282bf215546Sopenharmony_ci      } else {
283bf215546Sopenharmony_ci         unreachable("Unsupported deref type");
284bf215546Sopenharmony_ci      }
285bf215546Sopenharmony_ci   }
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci   nir_deref_path_finish(&path);
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   return offset;
290bf215546Sopenharmony_ci}
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_cistatic nir_ssa_def *
293bf215546Sopenharmony_ciemit_load(struct lower_io_state *state,
294bf215546Sopenharmony_ci          nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
295bf215546Sopenharmony_ci          unsigned component, unsigned num_components, unsigned bit_size,
296bf215546Sopenharmony_ci          nir_alu_type dest_type)
297bf215546Sopenharmony_ci{
298bf215546Sopenharmony_ci   nir_builder *b = &state->builder;
299bf215546Sopenharmony_ci   const nir_shader *nir = b->shader;
300bf215546Sopenharmony_ci   nir_variable_mode mode = var->data.mode;
301bf215546Sopenharmony_ci   nir_ssa_def *barycentric = NULL;
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci   nir_intrinsic_op op;
304bf215546Sopenharmony_ci   switch (mode) {
305bf215546Sopenharmony_ci   case nir_var_shader_in:
306bf215546Sopenharmony_ci      if (nir->info.stage == MESA_SHADER_FRAGMENT &&
307bf215546Sopenharmony_ci          nir->options->use_interpolated_input_intrinsics &&
308bf215546Sopenharmony_ci          var->data.interpolation != INTERP_MODE_FLAT &&
309bf215546Sopenharmony_ci          !var->data.per_primitive) {
310bf215546Sopenharmony_ci         if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
311bf215546Sopenharmony_ci            assert(array_index != NULL);
312bf215546Sopenharmony_ci            op = nir_intrinsic_load_input_vertex;
313bf215546Sopenharmony_ci         } else {
314bf215546Sopenharmony_ci            assert(array_index == NULL);
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci            nir_intrinsic_op bary_op;
317bf215546Sopenharmony_ci            if (var->data.sample ||
318bf215546Sopenharmony_ci                (state->options & nir_lower_io_force_sample_interpolation))
319bf215546Sopenharmony_ci               bary_op = nir_intrinsic_load_barycentric_sample;
320bf215546Sopenharmony_ci            else if (var->data.centroid)
321bf215546Sopenharmony_ci               bary_op = nir_intrinsic_load_barycentric_centroid;
322bf215546Sopenharmony_ci            else
323bf215546Sopenharmony_ci               bary_op = nir_intrinsic_load_barycentric_pixel;
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci            barycentric = nir_load_barycentric(&state->builder, bary_op,
326bf215546Sopenharmony_ci                                               var->data.interpolation);
327bf215546Sopenharmony_ci            op = nir_intrinsic_load_interpolated_input;
328bf215546Sopenharmony_ci         }
329bf215546Sopenharmony_ci      } else {
330bf215546Sopenharmony_ci         op = array_index ? nir_intrinsic_load_per_vertex_input :
331bf215546Sopenharmony_ci                            nir_intrinsic_load_input;
332bf215546Sopenharmony_ci      }
333bf215546Sopenharmony_ci      break;
334bf215546Sopenharmony_ci   case nir_var_shader_out:
335bf215546Sopenharmony_ci      op = !array_index            ? nir_intrinsic_load_output :
336bf215546Sopenharmony_ci           var->data.per_primitive ? nir_intrinsic_load_per_primitive_output :
337bf215546Sopenharmony_ci                                     nir_intrinsic_load_per_vertex_output;
338bf215546Sopenharmony_ci      break;
339bf215546Sopenharmony_ci   case nir_var_uniform:
340bf215546Sopenharmony_ci      op = nir_intrinsic_load_uniform;
341bf215546Sopenharmony_ci      break;
342bf215546Sopenharmony_ci   default:
343bf215546Sopenharmony_ci      unreachable("Unknown variable mode");
344bf215546Sopenharmony_ci   }
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   nir_intrinsic_instr *load =
347bf215546Sopenharmony_ci      nir_intrinsic_instr_create(state->builder.shader, op);
348bf215546Sopenharmony_ci   load->num_components = num_components;
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   nir_intrinsic_set_base(load, var->data.driver_location);
351bf215546Sopenharmony_ci   if (mode == nir_var_shader_in || mode == nir_var_shader_out)
352bf215546Sopenharmony_ci      nir_intrinsic_set_component(load, component);
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci   if (load->intrinsic == nir_intrinsic_load_uniform)
355bf215546Sopenharmony_ci      nir_intrinsic_set_range(load,
356bf215546Sopenharmony_ci                              state->type_size(var->type, var->data.bindless));
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci   if (nir_intrinsic_has_access(load))
359bf215546Sopenharmony_ci      nir_intrinsic_set_access(load, var->data.access);
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci   nir_intrinsic_set_dest_type(load, dest_type);
362bf215546Sopenharmony_ci
363bf215546Sopenharmony_ci   if (load->intrinsic != nir_intrinsic_load_uniform) {
364bf215546Sopenharmony_ci      nir_io_semantics semantics = {0};
365bf215546Sopenharmony_ci      semantics.location = var->data.location;
366bf215546Sopenharmony_ci      semantics.num_slots = get_number_of_slots(state, var);
367bf215546Sopenharmony_ci      semantics.fb_fetch_output = var->data.fb_fetch_output;
368bf215546Sopenharmony_ci      semantics.medium_precision =
369bf215546Sopenharmony_ci         var->data.precision == GLSL_PRECISION_MEDIUM ||
370bf215546Sopenharmony_ci         var->data.precision == GLSL_PRECISION_LOW;
371bf215546Sopenharmony_ci      nir_intrinsic_set_io_semantics(load, semantics);
372bf215546Sopenharmony_ci   }
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci   if (array_index) {
375bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(array_index);
376bf215546Sopenharmony_ci      load->src[1] = nir_src_for_ssa(offset);
377bf215546Sopenharmony_ci   } else if (barycentric) {
378bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(barycentric);
379bf215546Sopenharmony_ci      load->src[1] = nir_src_for_ssa(offset);
380bf215546Sopenharmony_ci   } else {
381bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(offset);
382bf215546Sopenharmony_ci   }
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci   nir_ssa_dest_init(&load->instr, &load->dest,
385bf215546Sopenharmony_ci                     num_components, bit_size, NULL);
386bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &load->instr);
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   return &load->dest.ssa;
389bf215546Sopenharmony_ci}
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_cistatic nir_ssa_def *
392bf215546Sopenharmony_cilower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
393bf215546Sopenharmony_ci           nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
394bf215546Sopenharmony_ci           unsigned component, const struct glsl_type *type)
395bf215546Sopenharmony_ci{
396bf215546Sopenharmony_ci   assert(intrin->dest.is_ssa);
397bf215546Sopenharmony_ci   if (intrin->dest.ssa.bit_size == 64 &&
398bf215546Sopenharmony_ci       (state->options & nir_lower_io_lower_64bit_to_32)) {
399bf215546Sopenharmony_ci      nir_builder *b = &state->builder;
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci      const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci      nir_ssa_def *comp64[4];
404bf215546Sopenharmony_ci      assert(component == 0 || component == 2);
405bf215546Sopenharmony_ci      unsigned dest_comp = 0;
406bf215546Sopenharmony_ci      while (dest_comp < intrin->dest.ssa.num_components) {
407bf215546Sopenharmony_ci         const unsigned num_comps =
408bf215546Sopenharmony_ci            MIN2(intrin->dest.ssa.num_components - dest_comp,
409bf215546Sopenharmony_ci                 (4 - component) / 2);
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci         nir_ssa_def *data32 =
412bf215546Sopenharmony_ci            emit_load(state, array_index, var, offset, component,
413bf215546Sopenharmony_ci                      num_comps * 2, 32, nir_type_uint32);
414bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_comps; i++) {
415bf215546Sopenharmony_ci            comp64[dest_comp + i] =
416bf215546Sopenharmony_ci               nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
417bf215546Sopenharmony_ci         }
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci         /* Only the first store has a component offset */
420bf215546Sopenharmony_ci         component = 0;
421bf215546Sopenharmony_ci         dest_comp += num_comps;
422bf215546Sopenharmony_ci         offset = nir_iadd_imm(b, offset, slot_size);
423bf215546Sopenharmony_ci      }
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci      return nir_vec(b, comp64, intrin->dest.ssa.num_components);
426bf215546Sopenharmony_ci   } else if (intrin->dest.ssa.bit_size == 1) {
427bf215546Sopenharmony_ci      /* Booleans are 32-bit */
428bf215546Sopenharmony_ci      assert(glsl_type_is_boolean(type));
429bf215546Sopenharmony_ci      return nir_b2b1(&state->builder,
430bf215546Sopenharmony_ci                      emit_load(state, array_index, var, offset, component,
431bf215546Sopenharmony_ci                                intrin->dest.ssa.num_components, 32,
432bf215546Sopenharmony_ci                                nir_type_bool32));
433bf215546Sopenharmony_ci   } else {
434bf215546Sopenharmony_ci      return emit_load(state, array_index, var, offset, component,
435bf215546Sopenharmony_ci                       intrin->dest.ssa.num_components,
436bf215546Sopenharmony_ci                       intrin->dest.ssa.bit_size,
437bf215546Sopenharmony_ci                       nir_get_nir_type_for_glsl_type(type));
438bf215546Sopenharmony_ci   }
439bf215546Sopenharmony_ci}
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_cistatic void
442bf215546Sopenharmony_ciemit_store(struct lower_io_state *state, nir_ssa_def *data,
443bf215546Sopenharmony_ci           nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
444bf215546Sopenharmony_ci           unsigned component, unsigned num_components,
445bf215546Sopenharmony_ci           nir_component_mask_t write_mask, nir_alu_type src_type)
446bf215546Sopenharmony_ci{
447bf215546Sopenharmony_ci   nir_builder *b = &state->builder;
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci   assert(var->data.mode == nir_var_shader_out);
450bf215546Sopenharmony_ci   nir_intrinsic_op op =
451bf215546Sopenharmony_ci      !array_index            ? nir_intrinsic_store_output :
452bf215546Sopenharmony_ci      var->data.per_primitive ? nir_intrinsic_store_per_primitive_output :
453bf215546Sopenharmony_ci                                nir_intrinsic_store_per_vertex_output;
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci   nir_intrinsic_instr *store =
456bf215546Sopenharmony_ci      nir_intrinsic_instr_create(state->builder.shader, op);
457bf215546Sopenharmony_ci   store->num_components = num_components;
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci   store->src[0] = nir_src_for_ssa(data);
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci   nir_intrinsic_set_base(store, var->data.driver_location);
462bf215546Sopenharmony_ci   nir_intrinsic_set_component(store, component);
463bf215546Sopenharmony_ci   nir_intrinsic_set_src_type(store, src_type);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   nir_intrinsic_set_write_mask(store, write_mask);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   if (nir_intrinsic_has_access(store))
468bf215546Sopenharmony_ci      nir_intrinsic_set_access(store, var->data.access);
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_ci   if (array_index)
471bf215546Sopenharmony_ci      store->src[1] = nir_src_for_ssa(array_index);
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci   store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci   unsigned gs_streams = 0;
476bf215546Sopenharmony_ci   if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
477bf215546Sopenharmony_ci      if (var->data.stream & NIR_STREAM_PACKED) {
478bf215546Sopenharmony_ci         gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
479bf215546Sopenharmony_ci      } else {
480bf215546Sopenharmony_ci         assert(var->data.stream < 4);
481bf215546Sopenharmony_ci         gs_streams = 0;
482bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_components; ++i)
483bf215546Sopenharmony_ci            gs_streams |= var->data.stream << (2 * i);
484bf215546Sopenharmony_ci      }
485bf215546Sopenharmony_ci   }
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   nir_io_semantics semantics = {0};
488bf215546Sopenharmony_ci   semantics.location = var->data.location;
489bf215546Sopenharmony_ci   semantics.num_slots = get_number_of_slots(state, var);
490bf215546Sopenharmony_ci   semantics.dual_source_blend_index = var->data.index;
491bf215546Sopenharmony_ci   semantics.gs_streams = gs_streams;
492bf215546Sopenharmony_ci   semantics.medium_precision =
493bf215546Sopenharmony_ci      var->data.precision == GLSL_PRECISION_MEDIUM ||
494bf215546Sopenharmony_ci      var->data.precision == GLSL_PRECISION_LOW;
495bf215546Sopenharmony_ci   semantics.per_view = var->data.per_view;
496bf215546Sopenharmony_ci   semantics.invariant = var->data.invariant;
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   nir_intrinsic_set_io_semantics(store, semantics);
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &store->instr);
501bf215546Sopenharmony_ci}
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_cistatic void
504bf215546Sopenharmony_cilower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
505bf215546Sopenharmony_ci            nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
506bf215546Sopenharmony_ci            unsigned component, const struct glsl_type *type)
507bf215546Sopenharmony_ci{
508bf215546Sopenharmony_ci   assert(intrin->src[1].is_ssa);
509bf215546Sopenharmony_ci   if (intrin->src[1].ssa->bit_size == 64 &&
510bf215546Sopenharmony_ci       (state->options & nir_lower_io_lower_64bit_to_32)) {
511bf215546Sopenharmony_ci      nir_builder *b = &state->builder;
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci      const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci      assert(component == 0 || component == 2);
516bf215546Sopenharmony_ci      unsigned src_comp = 0;
517bf215546Sopenharmony_ci      nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
518bf215546Sopenharmony_ci      while (src_comp < intrin->num_components) {
519bf215546Sopenharmony_ci         const unsigned num_comps =
520bf215546Sopenharmony_ci            MIN2(intrin->num_components - src_comp,
521bf215546Sopenharmony_ci                 (4 - component) / 2);
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci         if (write_mask & BITFIELD_MASK(num_comps)) {
524bf215546Sopenharmony_ci            nir_ssa_def *data =
525bf215546Sopenharmony_ci               nir_channels(b, intrin->src[1].ssa,
526bf215546Sopenharmony_ci                            BITFIELD_RANGE(src_comp, num_comps));
527bf215546Sopenharmony_ci            nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci            nir_component_mask_t write_mask32 = 0;
530bf215546Sopenharmony_ci            for (unsigned i = 0; i < num_comps; i++) {
531bf215546Sopenharmony_ci               if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
532bf215546Sopenharmony_ci                  write_mask32 |= 3 << (i * 2);
533bf215546Sopenharmony_ci            }
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci            emit_store(state, data32, array_index, var, offset,
536bf215546Sopenharmony_ci                       component, data32->num_components, write_mask32,
537bf215546Sopenharmony_ci                       nir_type_uint32);
538bf215546Sopenharmony_ci         }
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci         /* Only the first store has a component offset */
541bf215546Sopenharmony_ci         component = 0;
542bf215546Sopenharmony_ci         src_comp += num_comps;
543bf215546Sopenharmony_ci         write_mask >>= num_comps;
544bf215546Sopenharmony_ci         offset = nir_iadd_imm(b, offset, slot_size);
545bf215546Sopenharmony_ci      }
546bf215546Sopenharmony_ci   } else if (intrin->dest.ssa.bit_size == 1) {
547bf215546Sopenharmony_ci      /* Booleans are 32-bit */
548bf215546Sopenharmony_ci      assert(glsl_type_is_boolean(type));
549bf215546Sopenharmony_ci      nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
550bf215546Sopenharmony_ci      emit_store(state, b32_val, array_index, var, offset,
551bf215546Sopenharmony_ci                 component, intrin->num_components,
552bf215546Sopenharmony_ci                 nir_intrinsic_write_mask(intrin),
553bf215546Sopenharmony_ci                 nir_type_bool32);
554bf215546Sopenharmony_ci   } else {
555bf215546Sopenharmony_ci      emit_store(state, intrin->src[1].ssa, array_index, var, offset,
556bf215546Sopenharmony_ci                 component, intrin->num_components,
557bf215546Sopenharmony_ci                 nir_intrinsic_write_mask(intrin),
558bf215546Sopenharmony_ci                 nir_get_nir_type_for_glsl_type(type));
559bf215546Sopenharmony_ci   }
560bf215546Sopenharmony_ci}
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_cistatic nir_ssa_def *
563bf215546Sopenharmony_cilower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
564bf215546Sopenharmony_ci                     nir_variable *var, nir_ssa_def *offset, unsigned component,
565bf215546Sopenharmony_ci                     const struct glsl_type *type)
566bf215546Sopenharmony_ci{
567bf215546Sopenharmony_ci   nir_builder *b = &state->builder;
568bf215546Sopenharmony_ci   assert(var->data.mode == nir_var_shader_in);
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_ci   /* Ignore interpolateAt() for flat variables - flat is flat. Lower
571bf215546Sopenharmony_ci    * interpolateAtVertex() for explicit variables.
572bf215546Sopenharmony_ci    */
573bf215546Sopenharmony_ci   if (var->data.interpolation == INTERP_MODE_FLAT ||
574bf215546Sopenharmony_ci       var->data.interpolation == INTERP_MODE_EXPLICIT) {
575bf215546Sopenharmony_ci      nir_ssa_def *vertex_index = NULL;
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci      if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
578bf215546Sopenharmony_ci         assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
579bf215546Sopenharmony_ci         vertex_index = intrin->src[1].ssa;
580bf215546Sopenharmony_ci      }
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci      return lower_load(intrin, state, vertex_index, var, offset, component, type);
583bf215546Sopenharmony_ci   }
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_ci   /* None of the supported APIs allow interpolation on 64-bit things */
586bf215546Sopenharmony_ci   assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   nir_intrinsic_op bary_op;
589bf215546Sopenharmony_ci   switch (intrin->intrinsic) {
590bf215546Sopenharmony_ci   case nir_intrinsic_interp_deref_at_centroid:
591bf215546Sopenharmony_ci      bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
592bf215546Sopenharmony_ci                nir_intrinsic_load_barycentric_sample :
593bf215546Sopenharmony_ci                nir_intrinsic_load_barycentric_centroid;
594bf215546Sopenharmony_ci      break;
595bf215546Sopenharmony_ci   case nir_intrinsic_interp_deref_at_sample:
596bf215546Sopenharmony_ci      bary_op = nir_intrinsic_load_barycentric_at_sample;
597bf215546Sopenharmony_ci      break;
598bf215546Sopenharmony_ci   case nir_intrinsic_interp_deref_at_offset:
599bf215546Sopenharmony_ci      bary_op = nir_intrinsic_load_barycentric_at_offset;
600bf215546Sopenharmony_ci      break;
601bf215546Sopenharmony_ci   default:
602bf215546Sopenharmony_ci      unreachable("Bogus interpolateAt() intrinsic.");
603bf215546Sopenharmony_ci   }
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci   nir_intrinsic_instr *bary_setup =
606bf215546Sopenharmony_ci      nir_intrinsic_instr_create(state->builder.shader, bary_op);
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
609bf215546Sopenharmony_ci   nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
610bf215546Sopenharmony_ci
611bf215546Sopenharmony_ci   if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
612bf215546Sopenharmony_ci       intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
613bf215546Sopenharmony_ci       intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
614bf215546Sopenharmony_ci      nir_src_copy(&bary_setup->src[0], &intrin->src[1]);
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &bary_setup->instr);
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci   nir_io_semantics semantics = {0};
619bf215546Sopenharmony_ci   semantics.location = var->data.location;
620bf215546Sopenharmony_ci   semantics.num_slots = get_number_of_slots(state, var);
621bf215546Sopenharmony_ci   semantics.medium_precision =
622bf215546Sopenharmony_ci      var->data.precision == GLSL_PRECISION_MEDIUM ||
623bf215546Sopenharmony_ci      var->data.precision == GLSL_PRECISION_LOW;
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci   assert(intrin->dest.is_ssa);
626bf215546Sopenharmony_ci   nir_ssa_def *load =
627bf215546Sopenharmony_ci      nir_load_interpolated_input(&state->builder,
628bf215546Sopenharmony_ci                                  intrin->dest.ssa.num_components,
629bf215546Sopenharmony_ci                                  intrin->dest.ssa.bit_size,
630bf215546Sopenharmony_ci                                  &bary_setup->dest.ssa,
631bf215546Sopenharmony_ci                                  offset,
632bf215546Sopenharmony_ci                                  .base = var->data.driver_location,
633bf215546Sopenharmony_ci                                  .component = component,
634bf215546Sopenharmony_ci                                  .io_semantics = semantics);
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   return load;
637bf215546Sopenharmony_ci}
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_cistatic bool
640bf215546Sopenharmony_cinir_lower_io_block(nir_block *block,
641bf215546Sopenharmony_ci                   struct lower_io_state *state)
642bf215546Sopenharmony_ci{
643bf215546Sopenharmony_ci   nir_builder *b = &state->builder;
644bf215546Sopenharmony_ci   const nir_shader_compiler_options *options = b->shader->options;
645bf215546Sopenharmony_ci   bool progress = false;
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci   nir_foreach_instr_safe(instr, block) {
648bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_intrinsic)
649bf215546Sopenharmony_ci         continue;
650bf215546Sopenharmony_ci
651bf215546Sopenharmony_ci      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci      switch (intrin->intrinsic) {
654bf215546Sopenharmony_ci      case nir_intrinsic_load_deref:
655bf215546Sopenharmony_ci      case nir_intrinsic_store_deref:
656bf215546Sopenharmony_ci         /* We can lower the io for this nir instrinsic */
657bf215546Sopenharmony_ci         break;
658bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_centroid:
659bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_sample:
660bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_offset:
661bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_vertex:
662bf215546Sopenharmony_ci         /* We can optionally lower these to load_interpolated_input */
663bf215546Sopenharmony_ci         if (options->use_interpolated_input_intrinsics ||
664bf215546Sopenharmony_ci             options->lower_interpolate_at)
665bf215546Sopenharmony_ci            break;
666bf215546Sopenharmony_ci         FALLTHROUGH;
667bf215546Sopenharmony_ci      default:
668bf215546Sopenharmony_ci         /* We can't lower the io for this nir instrinsic, so skip it */
669bf215546Sopenharmony_ci         continue;
670bf215546Sopenharmony_ci      }
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
673bf215546Sopenharmony_ci      if (!nir_deref_mode_is_one_of(deref, state->modes))
674bf215546Sopenharmony_ci         continue;
675bf215546Sopenharmony_ci
676bf215546Sopenharmony_ci      nir_variable *var = nir_deref_instr_get_variable(deref);
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci      b->cursor = nir_before_instr(instr);
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci      const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci      nir_ssa_def *offset;
683bf215546Sopenharmony_ci      nir_ssa_def *array_index = NULL;
684bf215546Sopenharmony_ci      unsigned component_offset = var->data.location_frac;
685bf215546Sopenharmony_ci      bool bindless_type_size = var->data.mode == nir_var_shader_in ||
686bf215546Sopenharmony_ci                                var->data.mode == nir_var_shader_out ||
687bf215546Sopenharmony_ci                                var->data.bindless;
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_ci     if (nir_deref_instr_is_known_out_of_bounds(deref)) {
690bf215546Sopenharmony_ci        /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
691bf215546Sopenharmony_ci         *
692bf215546Sopenharmony_ci         *    In the subsections described above for array, vector, matrix and
693bf215546Sopenharmony_ci         *    structure accesses, any out-of-bounds access produced undefined
694bf215546Sopenharmony_ci         *    behavior....
695bf215546Sopenharmony_ci         *    Out-of-bounds reads return undefined values, which
696bf215546Sopenharmony_ci         *    include values from other variables of the active program or zero.
697bf215546Sopenharmony_ci         *    Out-of-bounds writes may be discarded or overwrite
698bf215546Sopenharmony_ci         *    other variables of the active program.
699bf215546Sopenharmony_ci         *
700bf215546Sopenharmony_ci         * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
701bf215546Sopenharmony_ci         * for reads.
702bf215546Sopenharmony_ci         *
703bf215546Sopenharmony_ci         * Otherwise get_io_offset would return out-of-bound offset which may
704bf215546Sopenharmony_ci         * result in out-of-bound loading/storing of inputs/outputs,
705bf215546Sopenharmony_ci         * that could cause issues in drivers down the line.
706bf215546Sopenharmony_ci         */
707bf215546Sopenharmony_ci         if (intrin->intrinsic != nir_intrinsic_store_deref) {
708bf215546Sopenharmony_ci            nir_ssa_def *zero =
709bf215546Sopenharmony_ci               nir_imm_zero(b, intrin->dest.ssa.num_components,
710bf215546Sopenharmony_ci                             intrin->dest.ssa.bit_size);
711bf215546Sopenharmony_ci            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
712bf215546Sopenharmony_ci                                  zero);
713bf215546Sopenharmony_ci         }
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci         nir_instr_remove(&intrin->instr);
716bf215546Sopenharmony_ci         progress = true;
717bf215546Sopenharmony_ci         continue;
718bf215546Sopenharmony_ci      }
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci      offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
721bf215546Sopenharmony_ci                             state->type_size, &component_offset,
722bf215546Sopenharmony_ci                             bindless_type_size);
723bf215546Sopenharmony_ci
724bf215546Sopenharmony_ci      nir_ssa_def *replacement = NULL;
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_ci      switch (intrin->intrinsic) {
727bf215546Sopenharmony_ci      case nir_intrinsic_load_deref:
728bf215546Sopenharmony_ci         replacement = lower_load(intrin, state, array_index, var, offset,
729bf215546Sopenharmony_ci                                  component_offset, deref->type);
730bf215546Sopenharmony_ci         break;
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci      case nir_intrinsic_store_deref:
733bf215546Sopenharmony_ci         lower_store(intrin, state, array_index, var, offset,
734bf215546Sopenharmony_ci                     component_offset, deref->type);
735bf215546Sopenharmony_ci         break;
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_centroid:
738bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_sample:
739bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_offset:
740bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_vertex:
741bf215546Sopenharmony_ci         assert(array_index == NULL);
742bf215546Sopenharmony_ci         replacement = lower_interpolate_at(intrin, state, var, offset,
743bf215546Sopenharmony_ci                                            component_offset, deref->type);
744bf215546Sopenharmony_ci         break;
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci      default:
747bf215546Sopenharmony_ci         continue;
748bf215546Sopenharmony_ci      }
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci      if (replacement) {
751bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
752bf215546Sopenharmony_ci                                  replacement);
753bf215546Sopenharmony_ci      }
754bf215546Sopenharmony_ci      nir_instr_remove(&intrin->instr);
755bf215546Sopenharmony_ci      progress = true;
756bf215546Sopenharmony_ci   }
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci   return progress;
759bf215546Sopenharmony_ci}
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_cistatic bool
762bf215546Sopenharmony_cinir_lower_io_impl(nir_function_impl *impl,
763bf215546Sopenharmony_ci                  nir_variable_mode modes,
764bf215546Sopenharmony_ci                  int (*type_size)(const struct glsl_type *, bool),
765bf215546Sopenharmony_ci                  nir_lower_io_options options)
766bf215546Sopenharmony_ci{
767bf215546Sopenharmony_ci   struct lower_io_state state;
768bf215546Sopenharmony_ci   bool progress = false;
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci   nir_builder_init(&state.builder, impl);
771bf215546Sopenharmony_ci   state.dead_ctx = ralloc_context(NULL);
772bf215546Sopenharmony_ci   state.modes = modes;
773bf215546Sopenharmony_ci   state.type_size = type_size;
774bf215546Sopenharmony_ci   state.options = options;
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci   ASSERTED nir_variable_mode supported_modes =
777bf215546Sopenharmony_ci      nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
778bf215546Sopenharmony_ci   assert(!(modes & ~supported_modes));
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
781bf215546Sopenharmony_ci      progress |= nir_lower_io_block(block, &state);
782bf215546Sopenharmony_ci   }
783bf215546Sopenharmony_ci
784bf215546Sopenharmony_ci   ralloc_free(state.dead_ctx);
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_none);
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_ci   return progress;
789bf215546Sopenharmony_ci}
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
792bf215546Sopenharmony_ci *
793bf215546Sopenharmony_ci * This pass is intended to be used for cross-stage shader I/O and driver-
794bf215546Sopenharmony_ci * managed uniforms to turn deref-based access into a simpler model using
795bf215546Sopenharmony_ci * locations or offsets.  For fragment shader inputs, it can optionally turn
796bf215546Sopenharmony_ci * load_deref into an explicit interpolation using barycentrics coming from
797bf215546Sopenharmony_ci * one of the load_barycentric_* intrinsics.  This pass requires that all
798bf215546Sopenharmony_ci * deref chains are complete and contain no casts.
799bf215546Sopenharmony_ci */
800bf215546Sopenharmony_cibool
801bf215546Sopenharmony_cinir_lower_io(nir_shader *shader, nir_variable_mode modes,
802bf215546Sopenharmony_ci             int (*type_size)(const struct glsl_type *, bool),
803bf215546Sopenharmony_ci             nir_lower_io_options options)
804bf215546Sopenharmony_ci{
805bf215546Sopenharmony_ci   bool progress = false;
806bf215546Sopenharmony_ci
807bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
808bf215546Sopenharmony_ci      if (function->impl) {
809bf215546Sopenharmony_ci         progress |= nir_lower_io_impl(function->impl, modes,
810bf215546Sopenharmony_ci                                       type_size, options);
811bf215546Sopenharmony_ci      }
812bf215546Sopenharmony_ci   }
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci   return progress;
815bf215546Sopenharmony_ci}
816bf215546Sopenharmony_ci
817bf215546Sopenharmony_cistatic unsigned
818bf215546Sopenharmony_citype_scalar_size_bytes(const struct glsl_type *type)
819bf215546Sopenharmony_ci{
820bf215546Sopenharmony_ci   assert(glsl_type_is_vector_or_scalar(type) ||
821bf215546Sopenharmony_ci          glsl_type_is_matrix(type));
822bf215546Sopenharmony_ci   return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
823bf215546Sopenharmony_ci}
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_cistatic nir_ssa_def *
826bf215546Sopenharmony_cibuild_addr_iadd(nir_builder *b, nir_ssa_def *addr,
827bf215546Sopenharmony_ci                nir_address_format addr_format,
828bf215546Sopenharmony_ci                nir_variable_mode modes,
829bf215546Sopenharmony_ci                nir_ssa_def *offset)
830bf215546Sopenharmony_ci{
831bf215546Sopenharmony_ci   assert(offset->num_components == 1);
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci   switch (addr_format) {
834bf215546Sopenharmony_ci   case nir_address_format_32bit_global:
835bf215546Sopenharmony_ci   case nir_address_format_64bit_global:
836bf215546Sopenharmony_ci   case nir_address_format_32bit_offset:
837bf215546Sopenharmony_ci      assert(addr->bit_size == offset->bit_size);
838bf215546Sopenharmony_ci      assert(addr->num_components == 1);
839bf215546Sopenharmony_ci      return nir_iadd(b, addr, offset);
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci   case nir_address_format_2x32bit_global: {
842bf215546Sopenharmony_ci      assert(addr->num_components == 2);
843bf215546Sopenharmony_ci      nir_ssa_def *lo = nir_channel(b, addr, 0);
844bf215546Sopenharmony_ci      nir_ssa_def *hi = nir_channel(b, addr, 1);
845bf215546Sopenharmony_ci      nir_ssa_def *res_lo = nir_iadd(b, lo, offset);
846bf215546Sopenharmony_ci      nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo));
847bf215546Sopenharmony_ci      nir_ssa_def *res_hi = nir_iadd(b, hi, carry);
848bf215546Sopenharmony_ci      return nir_vec2(b, res_lo, res_hi);
849bf215546Sopenharmony_ci   }
850bf215546Sopenharmony_ci
851bf215546Sopenharmony_ci   case nir_address_format_32bit_offset_as_64bit:
852bf215546Sopenharmony_ci      assert(addr->num_components == 1);
853bf215546Sopenharmony_ci      assert(offset->bit_size == 32);
854bf215546Sopenharmony_ci      return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   case nir_address_format_64bit_global_32bit_offset:
857bf215546Sopenharmony_ci   case nir_address_format_64bit_bounded_global:
858bf215546Sopenharmony_ci      assert(addr->num_components == 4);
859bf215546Sopenharmony_ci      assert(addr->bit_size == offset->bit_size);
860bf215546Sopenharmony_ci      return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset:
863bf215546Sopenharmony_ci      assert(addr->num_components == 2);
864bf215546Sopenharmony_ci      assert(addr->bit_size == offset->bit_size);
865bf215546Sopenharmony_ci      return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset_pack64:
868bf215546Sopenharmony_ci      assert(addr->num_components == 1);
869bf215546Sopenharmony_ci      assert(offset->bit_size == 32);
870bf215546Sopenharmony_ci      return nir_pack_64_2x32_split(b,
871bf215546Sopenharmony_ci                                    nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
872bf215546Sopenharmony_ci                                    nir_unpack_64_2x32_split_y(b, addr));
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_ci   case nir_address_format_vec2_index_32bit_offset:
875bf215546Sopenharmony_ci      assert(addr->num_components == 3);
876bf215546Sopenharmony_ci      assert(offset->bit_size == 32);
877bf215546Sopenharmony_ci      return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_ci   case nir_address_format_62bit_generic:
880bf215546Sopenharmony_ci      assert(addr->num_components == 1);
881bf215546Sopenharmony_ci      assert(addr->bit_size == 64);
882bf215546Sopenharmony_ci      assert(offset->bit_size == 64);
883bf215546Sopenharmony_ci      if (!(modes & ~(nir_var_function_temp |
884bf215546Sopenharmony_ci                      nir_var_shader_temp |
885bf215546Sopenharmony_ci                      nir_var_mem_shared))) {
886bf215546Sopenharmony_ci         /* If we're sure it's one of these modes, we can do an easy 32-bit
887bf215546Sopenharmony_ci          * addition and don't need to bother with 64-bit math.
888bf215546Sopenharmony_ci          */
889bf215546Sopenharmony_ci         nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
890bf215546Sopenharmony_ci         nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr);
891bf215546Sopenharmony_ci         addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
892bf215546Sopenharmony_ci         return nir_pack_64_2x32_split(b, addr32, type);
893bf215546Sopenharmony_ci      } else {
894bf215546Sopenharmony_ci         return nir_iadd(b, addr, offset);
895bf215546Sopenharmony_ci      }
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci   case nir_address_format_logical:
898bf215546Sopenharmony_ci      unreachable("Unsupported address format");
899bf215546Sopenharmony_ci   }
900bf215546Sopenharmony_ci   unreachable("Invalid address format");
901bf215546Sopenharmony_ci}
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_cistatic unsigned
904bf215546Sopenharmony_ciaddr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format)
905bf215546Sopenharmony_ci{
906bf215546Sopenharmony_ci   if (addr_format == nir_address_format_32bit_offset_as_64bit ||
907bf215546Sopenharmony_ci       addr_format == nir_address_format_32bit_index_offset_pack64)
908bf215546Sopenharmony_ci      return 32;
909bf215546Sopenharmony_ci   return addr->bit_size;
910bf215546Sopenharmony_ci}
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_cistatic nir_ssa_def *
913bf215546Sopenharmony_cibuild_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
914bf215546Sopenharmony_ci                    nir_address_format addr_format,
915bf215546Sopenharmony_ci                    nir_variable_mode modes,
916bf215546Sopenharmony_ci                    int64_t offset)
917bf215546Sopenharmony_ci{
918bf215546Sopenharmony_ci   return build_addr_iadd(b, addr, addr_format, modes,
919bf215546Sopenharmony_ci                             nir_imm_intN_t(b, offset,
920bf215546Sopenharmony_ci                                            addr_get_offset_bit_size(addr, addr_format)));
921bf215546Sopenharmony_ci}
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_cistatic nir_ssa_def *
924bf215546Sopenharmony_cibuild_addr_for_var(nir_builder *b, nir_variable *var,
925bf215546Sopenharmony_ci                   nir_address_format addr_format)
926bf215546Sopenharmony_ci{
927bf215546Sopenharmony_ci   assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
928bf215546Sopenharmony_ci                            nir_var_mem_task_payload |
929bf215546Sopenharmony_ci                            nir_var_mem_global |
930bf215546Sopenharmony_ci                            nir_var_shader_temp | nir_var_function_temp |
931bf215546Sopenharmony_ci                            nir_var_mem_push_const | nir_var_mem_constant));
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   const unsigned num_comps = nir_address_format_num_components(addr_format);
934bf215546Sopenharmony_ci   const unsigned bit_size = nir_address_format_bit_size(addr_format);
935bf215546Sopenharmony_ci
936bf215546Sopenharmony_ci   switch (addr_format) {
937bf215546Sopenharmony_ci   case nir_address_format_2x32bit_global:
938bf215546Sopenharmony_ci   case nir_address_format_32bit_global:
939bf215546Sopenharmony_ci   case nir_address_format_64bit_global: {
940bf215546Sopenharmony_ci      nir_ssa_def *base_addr;
941bf215546Sopenharmony_ci      switch (var->data.mode) {
942bf215546Sopenharmony_ci      case nir_var_shader_temp:
943bf215546Sopenharmony_ci         base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
944bf215546Sopenharmony_ci         break;
945bf215546Sopenharmony_ci
946bf215546Sopenharmony_ci      case nir_var_function_temp:
947bf215546Sopenharmony_ci         base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
948bf215546Sopenharmony_ci         break;
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci      case nir_var_mem_constant:
951bf215546Sopenharmony_ci         base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
952bf215546Sopenharmony_ci         break;
953bf215546Sopenharmony_ci
954bf215546Sopenharmony_ci      case nir_var_mem_shared:
955bf215546Sopenharmony_ci         base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
956bf215546Sopenharmony_ci         break;
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci      case nir_var_mem_global:
959bf215546Sopenharmony_ci         base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
960bf215546Sopenharmony_ci         break;
961bf215546Sopenharmony_ci
962bf215546Sopenharmony_ci      default:
963bf215546Sopenharmony_ci         unreachable("Unsupported variable mode");
964bf215546Sopenharmony_ci      }
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci      return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
967bf215546Sopenharmony_ci                                    var->data.driver_location);
968bf215546Sopenharmony_ci   }
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci   case nir_address_format_32bit_offset:
971bf215546Sopenharmony_ci      assert(var->data.driver_location <= UINT32_MAX);
972bf215546Sopenharmony_ci      return nir_imm_int(b, var->data.driver_location);
973bf215546Sopenharmony_ci
974bf215546Sopenharmony_ci   case nir_address_format_32bit_offset_as_64bit:
975bf215546Sopenharmony_ci      assert(var->data.driver_location <= UINT32_MAX);
976bf215546Sopenharmony_ci      return nir_imm_int64(b, var->data.driver_location);
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_ci   case nir_address_format_62bit_generic:
979bf215546Sopenharmony_ci      switch (var->data.mode) {
980bf215546Sopenharmony_ci      case nir_var_shader_temp:
981bf215546Sopenharmony_ci      case nir_var_function_temp:
982bf215546Sopenharmony_ci         assert(var->data.driver_location <= UINT32_MAX);
983bf215546Sopenharmony_ci         return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
984bf215546Sopenharmony_ci
985bf215546Sopenharmony_ci      case nir_var_mem_shared:
986bf215546Sopenharmony_ci         assert(var->data.driver_location <= UINT32_MAX);
987bf215546Sopenharmony_ci         return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci      case nir_var_mem_global:
990bf215546Sopenharmony_ci         return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
991bf215546Sopenharmony_ci                                var->data.driver_location);
992bf215546Sopenharmony_ci
993bf215546Sopenharmony_ci      default:
994bf215546Sopenharmony_ci         unreachable("Unsupported variable mode");
995bf215546Sopenharmony_ci      }
996bf215546Sopenharmony_ci
997bf215546Sopenharmony_ci   default:
998bf215546Sopenharmony_ci      unreachable("Unsupported address format");
999bf215546Sopenharmony_ci   }
1000bf215546Sopenharmony_ci}
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_cistatic nir_ssa_def *
1003bf215546Sopenharmony_cibuild_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr,
1004bf215546Sopenharmony_ci                              nir_address_format addr_format,
1005bf215546Sopenharmony_ci                              nir_variable_mode mode)
1006bf215546Sopenharmony_ci{
1007bf215546Sopenharmony_ci   /* The compile-time check failed; do a run-time check */
1008bf215546Sopenharmony_ci   switch (addr_format) {
1009bf215546Sopenharmony_ci   case nir_address_format_62bit_generic: {
1010bf215546Sopenharmony_ci      assert(addr->num_components == 1);
1011bf215546Sopenharmony_ci      assert(addr->bit_size == 64);
1012bf215546Sopenharmony_ci      nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62));
1013bf215546Sopenharmony_ci      switch (mode) {
1014bf215546Sopenharmony_ci      case nir_var_function_temp:
1015bf215546Sopenharmony_ci      case nir_var_shader_temp:
1016bf215546Sopenharmony_ci         return nir_ieq_imm(b, mode_enum, 0x2);
1017bf215546Sopenharmony_ci
1018bf215546Sopenharmony_ci      case nir_var_mem_shared:
1019bf215546Sopenharmony_ci         return nir_ieq_imm(b, mode_enum, 0x1);
1020bf215546Sopenharmony_ci
1021bf215546Sopenharmony_ci      case nir_var_mem_global:
1022bf215546Sopenharmony_ci         return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
1023bf215546Sopenharmony_ci                           nir_ieq_imm(b, mode_enum, 0x3));
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci      default:
1026bf215546Sopenharmony_ci         unreachable("Invalid mode check intrinsic");
1027bf215546Sopenharmony_ci      }
1028bf215546Sopenharmony_ci   }
1029bf215546Sopenharmony_ci
1030bf215546Sopenharmony_ci   default:
1031bf215546Sopenharmony_ci      unreachable("Unsupported address mode");
1032bf215546Sopenharmony_ci   }
1033bf215546Sopenharmony_ci}
1034bf215546Sopenharmony_ci
1035bf215546Sopenharmony_ciunsigned
1036bf215546Sopenharmony_cinir_address_format_bit_size(nir_address_format addr_format)
1037bf215546Sopenharmony_ci{
1038bf215546Sopenharmony_ci   switch (addr_format) {
1039bf215546Sopenharmony_ci   case nir_address_format_32bit_global:              return 32;
1040bf215546Sopenharmony_ci   case nir_address_format_2x32bit_global:            return 32;
1041bf215546Sopenharmony_ci   case nir_address_format_64bit_global:              return 64;
1042bf215546Sopenharmony_ci   case nir_address_format_64bit_global_32bit_offset: return 32;
1043bf215546Sopenharmony_ci   case nir_address_format_64bit_bounded_global:      return 32;
1044bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset:        return 32;
1045bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset_pack64: return 64;
1046bf215546Sopenharmony_ci   case nir_address_format_vec2_index_32bit_offset:   return 32;
1047bf215546Sopenharmony_ci   case nir_address_format_62bit_generic:             return 64;
1048bf215546Sopenharmony_ci   case nir_address_format_32bit_offset:              return 32;
1049bf215546Sopenharmony_ci   case nir_address_format_32bit_offset_as_64bit:     return 64;
1050bf215546Sopenharmony_ci   case nir_address_format_logical:                   return 32;
1051bf215546Sopenharmony_ci   }
1052bf215546Sopenharmony_ci   unreachable("Invalid address format");
1053bf215546Sopenharmony_ci}
1054bf215546Sopenharmony_ci
1055bf215546Sopenharmony_ciunsigned
1056bf215546Sopenharmony_cinir_address_format_num_components(nir_address_format addr_format)
1057bf215546Sopenharmony_ci{
1058bf215546Sopenharmony_ci   switch (addr_format) {
1059bf215546Sopenharmony_ci   case nir_address_format_32bit_global:              return 1;
1060bf215546Sopenharmony_ci   case nir_address_format_2x32bit_global:            return 2;
1061bf215546Sopenharmony_ci   case nir_address_format_64bit_global:              return 1;
1062bf215546Sopenharmony_ci   case nir_address_format_64bit_global_32bit_offset: return 4;
1063bf215546Sopenharmony_ci   case nir_address_format_64bit_bounded_global:      return 4;
1064bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset:        return 2;
1065bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset_pack64: return 1;
1066bf215546Sopenharmony_ci   case nir_address_format_vec2_index_32bit_offset:   return 3;
1067bf215546Sopenharmony_ci   case nir_address_format_62bit_generic:             return 1;
1068bf215546Sopenharmony_ci   case nir_address_format_32bit_offset:              return 1;
1069bf215546Sopenharmony_ci   case nir_address_format_32bit_offset_as_64bit:     return 1;
1070bf215546Sopenharmony_ci   case nir_address_format_logical:                   return 1;
1071bf215546Sopenharmony_ci   }
1072bf215546Sopenharmony_ci   unreachable("Invalid address format");
1073bf215546Sopenharmony_ci}
1074bf215546Sopenharmony_ci
1075bf215546Sopenharmony_cistatic nir_ssa_def *
1076bf215546Sopenharmony_ciaddr_to_index(nir_builder *b, nir_ssa_def *addr,
1077bf215546Sopenharmony_ci              nir_address_format addr_format)
1078bf215546Sopenharmony_ci{
1079bf215546Sopenharmony_ci   switch (addr_format) {
1080bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset:
1081bf215546Sopenharmony_ci      assert(addr->num_components == 2);
1082bf215546Sopenharmony_ci      return nir_channel(b, addr, 0);
1083bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset_pack64:
1084bf215546Sopenharmony_ci      return nir_unpack_64_2x32_split_y(b, addr);
1085bf215546Sopenharmony_ci   case nir_address_format_vec2_index_32bit_offset:
1086bf215546Sopenharmony_ci      assert(addr->num_components == 3);
1087bf215546Sopenharmony_ci      return nir_channels(b, addr, 0x3);
1088bf215546Sopenharmony_ci   default: unreachable("Invalid address format");
1089bf215546Sopenharmony_ci   }
1090bf215546Sopenharmony_ci}
1091bf215546Sopenharmony_ci
1092bf215546Sopenharmony_cistatic nir_ssa_def *
1093bf215546Sopenharmony_ciaddr_to_offset(nir_builder *b, nir_ssa_def *addr,
1094bf215546Sopenharmony_ci               nir_address_format addr_format)
1095bf215546Sopenharmony_ci{
1096bf215546Sopenharmony_ci   switch (addr_format) {
1097bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset:
1098bf215546Sopenharmony_ci      assert(addr->num_components == 2);
1099bf215546Sopenharmony_ci      return nir_channel(b, addr, 1);
1100bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset_pack64:
1101bf215546Sopenharmony_ci      return nir_unpack_64_2x32_split_x(b, addr);
1102bf215546Sopenharmony_ci   case nir_address_format_vec2_index_32bit_offset:
1103bf215546Sopenharmony_ci      assert(addr->num_components == 3);
1104bf215546Sopenharmony_ci      return nir_channel(b, addr, 2);
1105bf215546Sopenharmony_ci   case nir_address_format_32bit_offset:
1106bf215546Sopenharmony_ci      return addr;
1107bf215546Sopenharmony_ci   case nir_address_format_32bit_offset_as_64bit:
1108bf215546Sopenharmony_ci   case nir_address_format_62bit_generic:
1109bf215546Sopenharmony_ci      return nir_u2u32(b, addr);
1110bf215546Sopenharmony_ci   default:
1111bf215546Sopenharmony_ci      unreachable("Invalid address format");
1112bf215546Sopenharmony_ci   }
1113bf215546Sopenharmony_ci}
1114bf215546Sopenharmony_ci
1115bf215546Sopenharmony_ci/** Returns true if the given address format resolves to a global address */
1116bf215546Sopenharmony_cistatic bool
1117bf215546Sopenharmony_ciaddr_format_is_global(nir_address_format addr_format,
1118bf215546Sopenharmony_ci                      nir_variable_mode mode)
1119bf215546Sopenharmony_ci{
1120bf215546Sopenharmony_ci   if (addr_format == nir_address_format_62bit_generic)
1121bf215546Sopenharmony_ci      return mode == nir_var_mem_global;
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci   return addr_format == nir_address_format_32bit_global ||
1124bf215546Sopenharmony_ci          addr_format == nir_address_format_2x32bit_global ||
1125bf215546Sopenharmony_ci          addr_format == nir_address_format_64bit_global ||
1126bf215546Sopenharmony_ci          addr_format == nir_address_format_64bit_global_32bit_offset ||
1127bf215546Sopenharmony_ci          addr_format == nir_address_format_64bit_bounded_global;
1128bf215546Sopenharmony_ci}
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_cistatic bool
1131bf215546Sopenharmony_ciaddr_format_is_offset(nir_address_format addr_format,
1132bf215546Sopenharmony_ci                      nir_variable_mode mode)
1133bf215546Sopenharmony_ci{
1134bf215546Sopenharmony_ci   if (addr_format == nir_address_format_62bit_generic)
1135bf215546Sopenharmony_ci      return mode != nir_var_mem_global;
1136bf215546Sopenharmony_ci
1137bf215546Sopenharmony_ci   return addr_format == nir_address_format_32bit_offset ||
1138bf215546Sopenharmony_ci          addr_format == nir_address_format_32bit_offset_as_64bit;
1139bf215546Sopenharmony_ci}
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_cistatic nir_ssa_def *
1142bf215546Sopenharmony_ciaddr_to_global(nir_builder *b, nir_ssa_def *addr,
1143bf215546Sopenharmony_ci               nir_address_format addr_format)
1144bf215546Sopenharmony_ci{
1145bf215546Sopenharmony_ci   switch (addr_format) {
1146bf215546Sopenharmony_ci   case nir_address_format_32bit_global:
1147bf215546Sopenharmony_ci   case nir_address_format_64bit_global:
1148bf215546Sopenharmony_ci   case nir_address_format_62bit_generic:
1149bf215546Sopenharmony_ci      assert(addr->num_components == 1);
1150bf215546Sopenharmony_ci      return addr;
1151bf215546Sopenharmony_ci
1152bf215546Sopenharmony_ci   case nir_address_format_2x32bit_global:
1153bf215546Sopenharmony_ci      assert(addr->num_components == 2);
1154bf215546Sopenharmony_ci      return addr;
1155bf215546Sopenharmony_ci
1156bf215546Sopenharmony_ci   case nir_address_format_64bit_global_32bit_offset:
1157bf215546Sopenharmony_ci   case nir_address_format_64bit_bounded_global:
1158bf215546Sopenharmony_ci      assert(addr->num_components == 4);
1159bf215546Sopenharmony_ci      return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
1160bf215546Sopenharmony_ci                         nir_u2u64(b, nir_channel(b, addr, 3)));
1161bf215546Sopenharmony_ci
1162bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset:
1163bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset_pack64:
1164bf215546Sopenharmony_ci   case nir_address_format_vec2_index_32bit_offset:
1165bf215546Sopenharmony_ci   case nir_address_format_32bit_offset:
1166bf215546Sopenharmony_ci   case nir_address_format_32bit_offset_as_64bit:
1167bf215546Sopenharmony_ci   case nir_address_format_logical:
1168bf215546Sopenharmony_ci      unreachable("Cannot get a 64-bit address with this address format");
1169bf215546Sopenharmony_ci   }
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_ci   unreachable("Invalid address format");
1172bf215546Sopenharmony_ci}
1173bf215546Sopenharmony_ci
1174bf215546Sopenharmony_cistatic bool
1175bf215546Sopenharmony_ciaddr_format_needs_bounds_check(nir_address_format addr_format)
1176bf215546Sopenharmony_ci{
1177bf215546Sopenharmony_ci   return addr_format == nir_address_format_64bit_bounded_global;
1178bf215546Sopenharmony_ci}
1179bf215546Sopenharmony_ci
1180bf215546Sopenharmony_cistatic nir_ssa_def *
1181bf215546Sopenharmony_ciaddr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
1182bf215546Sopenharmony_ci                  nir_address_format addr_format, unsigned size)
1183bf215546Sopenharmony_ci{
1184bf215546Sopenharmony_ci   assert(addr_format == nir_address_format_64bit_bounded_global);
1185bf215546Sopenharmony_ci   assert(addr->num_components == 4);
1186bf215546Sopenharmony_ci   return nir_ige(b, nir_channel(b, addr, 2),
1187bf215546Sopenharmony_ci                     nir_iadd_imm(b, nir_channel(b, addr, 3), size));
1188bf215546Sopenharmony_ci}
1189bf215546Sopenharmony_ci
1190bf215546Sopenharmony_cistatic void
1191bf215546Sopenharmony_cinir_get_explicit_deref_range(nir_deref_instr *deref,
1192bf215546Sopenharmony_ci                             nir_address_format addr_format,
1193bf215546Sopenharmony_ci                             uint32_t *out_base,
1194bf215546Sopenharmony_ci                             uint32_t *out_range)
1195bf215546Sopenharmony_ci{
1196bf215546Sopenharmony_ci   uint32_t base = 0;
1197bf215546Sopenharmony_ci   uint32_t range = glsl_get_explicit_size(deref->type, false);
1198bf215546Sopenharmony_ci
1199bf215546Sopenharmony_ci   while (true) {
1200bf215546Sopenharmony_ci      nir_deref_instr *parent = nir_deref_instr_parent(deref);
1201bf215546Sopenharmony_ci
1202bf215546Sopenharmony_ci      switch (deref->deref_type) {
1203bf215546Sopenharmony_ci      case nir_deref_type_array:
1204bf215546Sopenharmony_ci      case nir_deref_type_array_wildcard:
1205bf215546Sopenharmony_ci      case nir_deref_type_ptr_as_array: {
1206bf215546Sopenharmony_ci         const unsigned stride = nir_deref_instr_array_stride(deref);
1207bf215546Sopenharmony_ci         if (stride == 0)
1208bf215546Sopenharmony_ci            goto fail;
1209bf215546Sopenharmony_ci
1210bf215546Sopenharmony_ci         if (!parent)
1211bf215546Sopenharmony_ci            goto fail;
1212bf215546Sopenharmony_ci
1213bf215546Sopenharmony_ci         if (deref->deref_type != nir_deref_type_array_wildcard &&
1214bf215546Sopenharmony_ci             nir_src_is_const(deref->arr.index)) {
1215bf215546Sopenharmony_ci            base += stride * nir_src_as_uint(deref->arr.index);
1216bf215546Sopenharmony_ci         } else {
1217bf215546Sopenharmony_ci            if (glsl_get_length(parent->type) == 0)
1218bf215546Sopenharmony_ci               goto fail;
1219bf215546Sopenharmony_ci            range += stride * (glsl_get_length(parent->type) - 1);
1220bf215546Sopenharmony_ci         }
1221bf215546Sopenharmony_ci         break;
1222bf215546Sopenharmony_ci      }
1223bf215546Sopenharmony_ci
1224bf215546Sopenharmony_ci      case nir_deref_type_struct: {
1225bf215546Sopenharmony_ci         if (!parent)
1226bf215546Sopenharmony_ci            goto fail;
1227bf215546Sopenharmony_ci
1228bf215546Sopenharmony_ci         base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1229bf215546Sopenharmony_ci         break;
1230bf215546Sopenharmony_ci      }
1231bf215546Sopenharmony_ci
1232bf215546Sopenharmony_ci      case nir_deref_type_cast: {
1233bf215546Sopenharmony_ci         nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1234bf215546Sopenharmony_ci
1235bf215546Sopenharmony_ci         switch (parent_instr->type) {
1236bf215546Sopenharmony_ci         case nir_instr_type_load_const: {
1237bf215546Sopenharmony_ci            nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1238bf215546Sopenharmony_ci
1239bf215546Sopenharmony_ci            switch (addr_format) {
1240bf215546Sopenharmony_ci            case nir_address_format_32bit_offset:
1241bf215546Sopenharmony_ci               base += load->value[1].u32;
1242bf215546Sopenharmony_ci               break;
1243bf215546Sopenharmony_ci            case nir_address_format_32bit_index_offset:
1244bf215546Sopenharmony_ci               base += load->value[1].u32;
1245bf215546Sopenharmony_ci               break;
1246bf215546Sopenharmony_ci            case nir_address_format_vec2_index_32bit_offset:
1247bf215546Sopenharmony_ci               base += load->value[2].u32;
1248bf215546Sopenharmony_ci               break;
1249bf215546Sopenharmony_ci            default:
1250bf215546Sopenharmony_ci               goto fail;
1251bf215546Sopenharmony_ci            }
1252bf215546Sopenharmony_ci
1253bf215546Sopenharmony_ci            *out_base = base;
1254bf215546Sopenharmony_ci            *out_range = range;
1255bf215546Sopenharmony_ci            return;
1256bf215546Sopenharmony_ci         }
1257bf215546Sopenharmony_ci
1258bf215546Sopenharmony_ci         case nir_instr_type_intrinsic: {
1259bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1260bf215546Sopenharmony_ci            switch (intr->intrinsic) {
1261bf215546Sopenharmony_ci            case nir_intrinsic_load_vulkan_descriptor:
1262bf215546Sopenharmony_ci               /* Assume that a load_vulkan_descriptor won't contribute to an
1263bf215546Sopenharmony_ci                * offset within the resource.
1264bf215546Sopenharmony_ci                */
1265bf215546Sopenharmony_ci               break;
1266bf215546Sopenharmony_ci            default:
1267bf215546Sopenharmony_ci               goto fail;
1268bf215546Sopenharmony_ci            }
1269bf215546Sopenharmony_ci
1270bf215546Sopenharmony_ci            *out_base = base;
1271bf215546Sopenharmony_ci            *out_range = range;
1272bf215546Sopenharmony_ci            return;
1273bf215546Sopenharmony_ci         }
1274bf215546Sopenharmony_ci
1275bf215546Sopenharmony_ci         default:
1276bf215546Sopenharmony_ci            goto fail;
1277bf215546Sopenharmony_ci         }
1278bf215546Sopenharmony_ci      }
1279bf215546Sopenharmony_ci
1280bf215546Sopenharmony_ci      default:
1281bf215546Sopenharmony_ci         goto fail;
1282bf215546Sopenharmony_ci      }
1283bf215546Sopenharmony_ci
1284bf215546Sopenharmony_ci      deref = parent;
1285bf215546Sopenharmony_ci   }
1286bf215546Sopenharmony_ci
1287bf215546Sopenharmony_cifail:
1288bf215546Sopenharmony_ci   *out_base = 0;
1289bf215546Sopenharmony_ci   *out_range = ~0;
1290bf215546Sopenharmony_ci}
1291bf215546Sopenharmony_ci
1292bf215546Sopenharmony_cistatic nir_variable_mode
1293bf215546Sopenharmony_cicanonicalize_generic_modes(nir_variable_mode modes)
1294bf215546Sopenharmony_ci{
1295bf215546Sopenharmony_ci   assert(modes != 0);
1296bf215546Sopenharmony_ci   if (util_bitcount(modes) == 1)
1297bf215546Sopenharmony_ci      return modes;
1298bf215546Sopenharmony_ci
1299bf215546Sopenharmony_ci   assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1300bf215546Sopenharmony_ci                      nir_var_mem_shared | nir_var_mem_global)));
1301bf215546Sopenharmony_ci
1302bf215546Sopenharmony_ci   /* Canonicalize by converting shader_temp to function_temp */
1303bf215546Sopenharmony_ci   if (modes & nir_var_shader_temp) {
1304bf215546Sopenharmony_ci      modes &= ~nir_var_shader_temp;
1305bf215546Sopenharmony_ci      modes |= nir_var_function_temp;
1306bf215546Sopenharmony_ci   }
1307bf215546Sopenharmony_ci
1308bf215546Sopenharmony_ci   return modes;
1309bf215546Sopenharmony_ci}
1310bf215546Sopenharmony_ci
1311bf215546Sopenharmony_cistatic nir_intrinsic_op
1312bf215546Sopenharmony_ciget_store_global_op_from_addr_format(nir_address_format addr_format)
1313bf215546Sopenharmony_ci{
1314bf215546Sopenharmony_ci   if (addr_format != nir_address_format_2x32bit_global)
1315bf215546Sopenharmony_ci      return nir_intrinsic_store_global;
1316bf215546Sopenharmony_ci   else
1317bf215546Sopenharmony_ci      return nir_intrinsic_store_global_2x32;
1318bf215546Sopenharmony_ci}
1319bf215546Sopenharmony_ci
1320bf215546Sopenharmony_cistatic nir_intrinsic_op
1321bf215546Sopenharmony_ciget_load_global_op_from_addr_format(nir_address_format addr_format)
1322bf215546Sopenharmony_ci{
1323bf215546Sopenharmony_ci   if (addr_format != nir_address_format_2x32bit_global)
1324bf215546Sopenharmony_ci      return nir_intrinsic_load_global;
1325bf215546Sopenharmony_ci   else
1326bf215546Sopenharmony_ci      return nir_intrinsic_load_global_2x32;
1327bf215546Sopenharmony_ci}
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_cistatic nir_ssa_def *
1330bf215546Sopenharmony_cibuild_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1331bf215546Sopenharmony_ci                       nir_ssa_def *addr, nir_address_format addr_format,
1332bf215546Sopenharmony_ci                       nir_variable_mode modes,
1333bf215546Sopenharmony_ci                       uint32_t align_mul, uint32_t align_offset,
1334bf215546Sopenharmony_ci                       unsigned num_components)
1335bf215546Sopenharmony_ci{
1336bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1337bf215546Sopenharmony_ci   modes = canonicalize_generic_modes(modes);
1338bf215546Sopenharmony_ci
1339bf215546Sopenharmony_ci   if (util_bitcount(modes) > 1) {
1340bf215546Sopenharmony_ci      if (addr_format_is_global(addr_format, modes)) {
1341bf215546Sopenharmony_ci         return build_explicit_io_load(b, intrin, addr, addr_format,
1342bf215546Sopenharmony_ci                                       nir_var_mem_global,
1343bf215546Sopenharmony_ci                                       align_mul, align_offset,
1344bf215546Sopenharmony_ci                                       num_components);
1345bf215546Sopenharmony_ci      } else if (modes & nir_var_function_temp) {
1346bf215546Sopenharmony_ci         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1347bf215546Sopenharmony_ci                                                      nir_var_function_temp));
1348bf215546Sopenharmony_ci         nir_ssa_def *res1 =
1349bf215546Sopenharmony_ci            build_explicit_io_load(b, intrin, addr, addr_format,
1350bf215546Sopenharmony_ci                                   nir_var_function_temp,
1351bf215546Sopenharmony_ci                                   align_mul, align_offset,
1352bf215546Sopenharmony_ci                                   num_components);
1353bf215546Sopenharmony_ci         nir_push_else(b, NULL);
1354bf215546Sopenharmony_ci         nir_ssa_def *res2 =
1355bf215546Sopenharmony_ci            build_explicit_io_load(b, intrin, addr, addr_format,
1356bf215546Sopenharmony_ci                                   modes & ~nir_var_function_temp,
1357bf215546Sopenharmony_ci                                   align_mul, align_offset,
1358bf215546Sopenharmony_ci                                   num_components);
1359bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1360bf215546Sopenharmony_ci         return nir_if_phi(b, res1, res2);
1361bf215546Sopenharmony_ci      } else {
1362bf215546Sopenharmony_ci         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1363bf215546Sopenharmony_ci                                                      nir_var_mem_shared));
1364bf215546Sopenharmony_ci         assert(modes & nir_var_mem_shared);
1365bf215546Sopenharmony_ci         nir_ssa_def *res1 =
1366bf215546Sopenharmony_ci            build_explicit_io_load(b, intrin, addr, addr_format,
1367bf215546Sopenharmony_ci                                   nir_var_mem_shared,
1368bf215546Sopenharmony_ci                                   align_mul, align_offset,
1369bf215546Sopenharmony_ci                                   num_components);
1370bf215546Sopenharmony_ci         nir_push_else(b, NULL);
1371bf215546Sopenharmony_ci         assert(modes & nir_var_mem_global);
1372bf215546Sopenharmony_ci         nir_ssa_def *res2 =
1373bf215546Sopenharmony_ci            build_explicit_io_load(b, intrin, addr, addr_format,
1374bf215546Sopenharmony_ci                                   nir_var_mem_global,
1375bf215546Sopenharmony_ci                                   align_mul, align_offset,
1376bf215546Sopenharmony_ci                                   num_components);
1377bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1378bf215546Sopenharmony_ci         return nir_if_phi(b, res1, res2);
1379bf215546Sopenharmony_ci      }
1380bf215546Sopenharmony_ci   }
1381bf215546Sopenharmony_ci
1382bf215546Sopenharmony_ci   assert(util_bitcount(modes) == 1);
1383bf215546Sopenharmony_ci   const nir_variable_mode mode = modes;
1384bf215546Sopenharmony_ci
1385bf215546Sopenharmony_ci   nir_intrinsic_op op;
1386bf215546Sopenharmony_ci   switch (intrin->intrinsic) {
1387bf215546Sopenharmony_ci   case nir_intrinsic_load_deref:
1388bf215546Sopenharmony_ci      switch (mode) {
1389bf215546Sopenharmony_ci      case nir_var_mem_ubo:
1390bf215546Sopenharmony_ci         if (addr_format == nir_address_format_64bit_global_32bit_offset)
1391bf215546Sopenharmony_ci            op = nir_intrinsic_load_global_constant_offset;
1392bf215546Sopenharmony_ci         else if (addr_format == nir_address_format_64bit_bounded_global)
1393bf215546Sopenharmony_ci            op = nir_intrinsic_load_global_constant_bounded;
1394bf215546Sopenharmony_ci         else if (addr_format_is_global(addr_format, mode))
1395bf215546Sopenharmony_ci            op = nir_intrinsic_load_global_constant;
1396bf215546Sopenharmony_ci         else
1397bf215546Sopenharmony_ci            op = nir_intrinsic_load_ubo;
1398bf215546Sopenharmony_ci         break;
1399bf215546Sopenharmony_ci      case nir_var_mem_ssbo:
1400bf215546Sopenharmony_ci         if (addr_format_is_global(addr_format, mode))
1401bf215546Sopenharmony_ci            op = nir_intrinsic_load_global;
1402bf215546Sopenharmony_ci         else
1403bf215546Sopenharmony_ci            op = nir_intrinsic_load_ssbo;
1404bf215546Sopenharmony_ci         break;
1405bf215546Sopenharmony_ci      case nir_var_mem_global:
1406bf215546Sopenharmony_ci         assert(addr_format_is_global(addr_format, mode));
1407bf215546Sopenharmony_ci         op = get_load_global_op_from_addr_format(addr_format);
1408bf215546Sopenharmony_ci         break;
1409bf215546Sopenharmony_ci      case nir_var_uniform:
1410bf215546Sopenharmony_ci         assert(addr_format_is_offset(addr_format, mode));
1411bf215546Sopenharmony_ci         assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1412bf215546Sopenharmony_ci         op = nir_intrinsic_load_kernel_input;
1413bf215546Sopenharmony_ci         break;
1414bf215546Sopenharmony_ci      case nir_var_mem_shared:
1415bf215546Sopenharmony_ci         assert(addr_format_is_offset(addr_format, mode));
1416bf215546Sopenharmony_ci         op = nir_intrinsic_load_shared;
1417bf215546Sopenharmony_ci         break;
1418bf215546Sopenharmony_ci      case nir_var_mem_task_payload:
1419bf215546Sopenharmony_ci         assert(addr_format_is_offset(addr_format, mode));
1420bf215546Sopenharmony_ci         op = nir_intrinsic_load_task_payload;
1421bf215546Sopenharmony_ci         break;
1422bf215546Sopenharmony_ci      case nir_var_shader_temp:
1423bf215546Sopenharmony_ci      case nir_var_function_temp:
1424bf215546Sopenharmony_ci         if (addr_format_is_offset(addr_format, mode)) {
1425bf215546Sopenharmony_ci            op = nir_intrinsic_load_scratch;
1426bf215546Sopenharmony_ci         } else {
1427bf215546Sopenharmony_ci            assert(addr_format_is_global(addr_format, mode));
1428bf215546Sopenharmony_ci            op = get_load_global_op_from_addr_format(addr_format);
1429bf215546Sopenharmony_ci         }
1430bf215546Sopenharmony_ci         break;
1431bf215546Sopenharmony_ci      case nir_var_mem_push_const:
1432bf215546Sopenharmony_ci         assert(addr_format == nir_address_format_32bit_offset);
1433bf215546Sopenharmony_ci         op = nir_intrinsic_load_push_constant;
1434bf215546Sopenharmony_ci         break;
1435bf215546Sopenharmony_ci      case nir_var_mem_constant:
1436bf215546Sopenharmony_ci         if (addr_format_is_offset(addr_format, mode)) {
1437bf215546Sopenharmony_ci            op = nir_intrinsic_load_constant;
1438bf215546Sopenharmony_ci         } else {
1439bf215546Sopenharmony_ci            assert(addr_format_is_global(addr_format, mode));
1440bf215546Sopenharmony_ci            op = get_load_global_op_from_addr_format(addr_format);
1441bf215546Sopenharmony_ci         }
1442bf215546Sopenharmony_ci         break;
1443bf215546Sopenharmony_ci      default:
1444bf215546Sopenharmony_ci         unreachable("Unsupported explicit IO variable mode");
1445bf215546Sopenharmony_ci      }
1446bf215546Sopenharmony_ci      break;
1447bf215546Sopenharmony_ci
1448bf215546Sopenharmony_ci   case nir_intrinsic_load_deref_block_intel:
1449bf215546Sopenharmony_ci      switch (mode) {
1450bf215546Sopenharmony_ci      case nir_var_mem_ssbo:
1451bf215546Sopenharmony_ci         if (addr_format_is_global(addr_format, mode))
1452bf215546Sopenharmony_ci            op = nir_intrinsic_load_global_block_intel;
1453bf215546Sopenharmony_ci         else
1454bf215546Sopenharmony_ci            op = nir_intrinsic_load_ssbo_block_intel;
1455bf215546Sopenharmony_ci         break;
1456bf215546Sopenharmony_ci      case nir_var_mem_global:
1457bf215546Sopenharmony_ci         op = nir_intrinsic_load_global_block_intel;
1458bf215546Sopenharmony_ci         break;
1459bf215546Sopenharmony_ci      case nir_var_mem_shared:
1460bf215546Sopenharmony_ci         op = nir_intrinsic_load_shared_block_intel;
1461bf215546Sopenharmony_ci         break;
1462bf215546Sopenharmony_ci      default:
1463bf215546Sopenharmony_ci         unreachable("Unsupported explicit IO variable mode");
1464bf215546Sopenharmony_ci      }
1465bf215546Sopenharmony_ci      break;
1466bf215546Sopenharmony_ci
1467bf215546Sopenharmony_ci   default:
1468bf215546Sopenharmony_ci      unreachable("Invalid intrinsic");
1469bf215546Sopenharmony_ci   }
1470bf215546Sopenharmony_ci
1471bf215546Sopenharmony_ci   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1472bf215546Sopenharmony_ci
1473bf215546Sopenharmony_ci   if (op == nir_intrinsic_load_global_constant_offset) {
1474bf215546Sopenharmony_ci      assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1475bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(
1476bf215546Sopenharmony_ci         nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1477bf215546Sopenharmony_ci      load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1478bf215546Sopenharmony_ci   } else if (op == nir_intrinsic_load_global_constant_bounded) {
1479bf215546Sopenharmony_ci      assert(addr_format == nir_address_format_64bit_bounded_global);
1480bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(
1481bf215546Sopenharmony_ci         nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1482bf215546Sopenharmony_ci      load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1483bf215546Sopenharmony_ci      load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1484bf215546Sopenharmony_ci   } else if (addr_format_is_global(addr_format, mode)) {
1485bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1486bf215546Sopenharmony_ci   } else if (addr_format_is_offset(addr_format, mode)) {
1487bf215546Sopenharmony_ci      assert(addr->num_components == 1);
1488bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1489bf215546Sopenharmony_ci   } else {
1490bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1491bf215546Sopenharmony_ci      load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1492bf215546Sopenharmony_ci   }
1493bf215546Sopenharmony_ci
1494bf215546Sopenharmony_ci   if (nir_intrinsic_has_access(load))
1495bf215546Sopenharmony_ci      nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1496bf215546Sopenharmony_ci
1497bf215546Sopenharmony_ci   if (op == nir_intrinsic_load_constant) {
1498bf215546Sopenharmony_ci      nir_intrinsic_set_base(load, 0);
1499bf215546Sopenharmony_ci      nir_intrinsic_set_range(load, b->shader->constant_data_size);
1500bf215546Sopenharmony_ci   } else if (mode == nir_var_mem_push_const) {
1501bf215546Sopenharmony_ci      /* Push constants are required to be able to be chased back to the
1502bf215546Sopenharmony_ci       * variable so we can provide a base/range.
1503bf215546Sopenharmony_ci       */
1504bf215546Sopenharmony_ci      nir_variable *var = nir_deref_instr_get_variable(deref);
1505bf215546Sopenharmony_ci      nir_intrinsic_set_base(load, 0);
1506bf215546Sopenharmony_ci      nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1507bf215546Sopenharmony_ci   }
1508bf215546Sopenharmony_ci
1509bf215546Sopenharmony_ci   unsigned bit_size = intrin->dest.ssa.bit_size;
1510bf215546Sopenharmony_ci   if (bit_size == 1) {
1511bf215546Sopenharmony_ci      /* TODO: Make the native bool bit_size an option. */
1512bf215546Sopenharmony_ci      bit_size = 32;
1513bf215546Sopenharmony_ci   }
1514bf215546Sopenharmony_ci
1515bf215546Sopenharmony_ci   if (nir_intrinsic_has_align(load))
1516bf215546Sopenharmony_ci      nir_intrinsic_set_align(load, align_mul, align_offset);
1517bf215546Sopenharmony_ci
1518bf215546Sopenharmony_ci   if (nir_intrinsic_has_range_base(load)) {
1519bf215546Sopenharmony_ci      unsigned base, range;
1520bf215546Sopenharmony_ci      nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1521bf215546Sopenharmony_ci      nir_intrinsic_set_range_base(load, base);
1522bf215546Sopenharmony_ci      nir_intrinsic_set_range(load, range);
1523bf215546Sopenharmony_ci   }
1524bf215546Sopenharmony_ci
1525bf215546Sopenharmony_ci   assert(intrin->dest.is_ssa);
1526bf215546Sopenharmony_ci   load->num_components = num_components;
1527bf215546Sopenharmony_ci   nir_ssa_dest_init(&load->instr, &load->dest, num_components,
1528bf215546Sopenharmony_ci                     bit_size, NULL);
1529bf215546Sopenharmony_ci
1530bf215546Sopenharmony_ci   assert(bit_size % 8 == 0);
1531bf215546Sopenharmony_ci
1532bf215546Sopenharmony_ci   nir_ssa_def *result;
1533bf215546Sopenharmony_ci   if (addr_format_needs_bounds_check(addr_format) &&
1534bf215546Sopenharmony_ci       op != nir_intrinsic_load_global_constant_bounded) {
1535bf215546Sopenharmony_ci      /* We don't need to bounds-check global_constant_bounded because bounds
1536bf215546Sopenharmony_ci       * checking is handled by the intrinsic itself.
1537bf215546Sopenharmony_ci       *
1538bf215546Sopenharmony_ci       * The Vulkan spec for robustBufferAccess gives us quite a few options
1539bf215546Sopenharmony_ci       * as to what we can do with an OOB read.  Unfortunately, returning
1540bf215546Sopenharmony_ci       * undefined values isn't one of them so we return an actual zero.
1541bf215546Sopenharmony_ci       */
1542bf215546Sopenharmony_ci      nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1543bf215546Sopenharmony_ci
1544bf215546Sopenharmony_ci      /* TODO: Better handle block_intel. */
1545bf215546Sopenharmony_ci      const unsigned load_size = (bit_size / 8) * load->num_components;
1546bf215546Sopenharmony_ci      nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1547bf215546Sopenharmony_ci
1548bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &load->instr);
1549bf215546Sopenharmony_ci
1550bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
1551bf215546Sopenharmony_ci
1552bf215546Sopenharmony_ci      result = nir_if_phi(b, &load->dest.ssa, zero);
1553bf215546Sopenharmony_ci   } else {
1554bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &load->instr);
1555bf215546Sopenharmony_ci      result = &load->dest.ssa;
1556bf215546Sopenharmony_ci   }
1557bf215546Sopenharmony_ci
1558bf215546Sopenharmony_ci   if (intrin->dest.ssa.bit_size == 1) {
1559bf215546Sopenharmony_ci      /* For shared, we can go ahead and use NIR's and/or the back-end's
1560bf215546Sopenharmony_ci       * standard encoding for booleans rather than forcing a 0/1 boolean.
1561bf215546Sopenharmony_ci       * This should save an instruction or two.
1562bf215546Sopenharmony_ci       */
1563bf215546Sopenharmony_ci      if (mode == nir_var_mem_shared ||
1564bf215546Sopenharmony_ci          mode == nir_var_shader_temp ||
1565bf215546Sopenharmony_ci          mode == nir_var_function_temp)
1566bf215546Sopenharmony_ci         result = nir_b2b1(b, result);
1567bf215546Sopenharmony_ci      else
1568bf215546Sopenharmony_ci         result = nir_i2b(b, result);
1569bf215546Sopenharmony_ci   }
1570bf215546Sopenharmony_ci
1571bf215546Sopenharmony_ci   return result;
1572bf215546Sopenharmony_ci}
1573bf215546Sopenharmony_ci
1574bf215546Sopenharmony_cistatic void
1575bf215546Sopenharmony_cibuild_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1576bf215546Sopenharmony_ci                        nir_ssa_def *addr, nir_address_format addr_format,
1577bf215546Sopenharmony_ci                        nir_variable_mode modes,
1578bf215546Sopenharmony_ci                        uint32_t align_mul, uint32_t align_offset,
1579bf215546Sopenharmony_ci                        nir_ssa_def *value, nir_component_mask_t write_mask)
1580bf215546Sopenharmony_ci{
1581bf215546Sopenharmony_ci   modes = canonicalize_generic_modes(modes);
1582bf215546Sopenharmony_ci
1583bf215546Sopenharmony_ci   if (util_bitcount(modes) > 1) {
1584bf215546Sopenharmony_ci      if (addr_format_is_global(addr_format, modes)) {
1585bf215546Sopenharmony_ci         build_explicit_io_store(b, intrin, addr, addr_format,
1586bf215546Sopenharmony_ci                                 nir_var_mem_global,
1587bf215546Sopenharmony_ci                                 align_mul, align_offset,
1588bf215546Sopenharmony_ci                                 value, write_mask);
1589bf215546Sopenharmony_ci      } else if (modes & nir_var_function_temp) {
1590bf215546Sopenharmony_ci         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1591bf215546Sopenharmony_ci                                                      nir_var_function_temp));
1592bf215546Sopenharmony_ci         build_explicit_io_store(b, intrin, addr, addr_format,
1593bf215546Sopenharmony_ci                                 nir_var_function_temp,
1594bf215546Sopenharmony_ci                                 align_mul, align_offset,
1595bf215546Sopenharmony_ci                                 value, write_mask);
1596bf215546Sopenharmony_ci         nir_push_else(b, NULL);
1597bf215546Sopenharmony_ci         build_explicit_io_store(b, intrin, addr, addr_format,
1598bf215546Sopenharmony_ci                                 modes & ~nir_var_function_temp,
1599bf215546Sopenharmony_ci                                 align_mul, align_offset,
1600bf215546Sopenharmony_ci                                 value, write_mask);
1601bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1602bf215546Sopenharmony_ci      } else {
1603bf215546Sopenharmony_ci         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1604bf215546Sopenharmony_ci                                                      nir_var_mem_shared));
1605bf215546Sopenharmony_ci         assert(modes & nir_var_mem_shared);
1606bf215546Sopenharmony_ci         build_explicit_io_store(b, intrin, addr, addr_format,
1607bf215546Sopenharmony_ci                                 nir_var_mem_shared,
1608bf215546Sopenharmony_ci                                 align_mul, align_offset,
1609bf215546Sopenharmony_ci                                 value, write_mask);
1610bf215546Sopenharmony_ci         nir_push_else(b, NULL);
1611bf215546Sopenharmony_ci         assert(modes & nir_var_mem_global);
1612bf215546Sopenharmony_ci         build_explicit_io_store(b, intrin, addr, addr_format,
1613bf215546Sopenharmony_ci                                 nir_var_mem_global,
1614bf215546Sopenharmony_ci                                 align_mul, align_offset,
1615bf215546Sopenharmony_ci                                 value, write_mask);
1616bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1617bf215546Sopenharmony_ci      }
1618bf215546Sopenharmony_ci      return;
1619bf215546Sopenharmony_ci   }
1620bf215546Sopenharmony_ci
1621bf215546Sopenharmony_ci   assert(util_bitcount(modes) == 1);
1622bf215546Sopenharmony_ci   const nir_variable_mode mode = modes;
1623bf215546Sopenharmony_ci
1624bf215546Sopenharmony_ci   nir_intrinsic_op op;
1625bf215546Sopenharmony_ci   switch (intrin->intrinsic) {
1626bf215546Sopenharmony_ci   case nir_intrinsic_store_deref:
1627bf215546Sopenharmony_ci      assert(write_mask != 0);
1628bf215546Sopenharmony_ci
1629bf215546Sopenharmony_ci      switch (mode) {
1630bf215546Sopenharmony_ci      case nir_var_mem_ssbo:
1631bf215546Sopenharmony_ci         if (addr_format_is_global(addr_format, mode))
1632bf215546Sopenharmony_ci            op = get_store_global_op_from_addr_format(addr_format);
1633bf215546Sopenharmony_ci         else
1634bf215546Sopenharmony_ci            op = nir_intrinsic_store_ssbo;
1635bf215546Sopenharmony_ci         break;
1636bf215546Sopenharmony_ci      case nir_var_mem_global:
1637bf215546Sopenharmony_ci         assert(addr_format_is_global(addr_format, mode));
1638bf215546Sopenharmony_ci         op = get_store_global_op_from_addr_format(addr_format);
1639bf215546Sopenharmony_ci         break;
1640bf215546Sopenharmony_ci      case nir_var_mem_shared:
1641bf215546Sopenharmony_ci         assert(addr_format_is_offset(addr_format, mode));
1642bf215546Sopenharmony_ci         op = nir_intrinsic_store_shared;
1643bf215546Sopenharmony_ci         break;
1644bf215546Sopenharmony_ci      case nir_var_mem_task_payload:
1645bf215546Sopenharmony_ci         assert(addr_format_is_offset(addr_format, mode));
1646bf215546Sopenharmony_ci         op = nir_intrinsic_store_task_payload;
1647bf215546Sopenharmony_ci         break;
1648bf215546Sopenharmony_ci      case nir_var_shader_temp:
1649bf215546Sopenharmony_ci      case nir_var_function_temp:
1650bf215546Sopenharmony_ci         if (addr_format_is_offset(addr_format, mode)) {
1651bf215546Sopenharmony_ci            op = nir_intrinsic_store_scratch;
1652bf215546Sopenharmony_ci         } else {
1653bf215546Sopenharmony_ci            assert(addr_format_is_global(addr_format, mode));
1654bf215546Sopenharmony_ci            op = get_store_global_op_from_addr_format(addr_format);
1655bf215546Sopenharmony_ci         }
1656bf215546Sopenharmony_ci         break;
1657bf215546Sopenharmony_ci      default:
1658bf215546Sopenharmony_ci         unreachable("Unsupported explicit IO variable mode");
1659bf215546Sopenharmony_ci      }
1660bf215546Sopenharmony_ci      break;
1661bf215546Sopenharmony_ci
1662bf215546Sopenharmony_ci   case nir_intrinsic_store_deref_block_intel:
1663bf215546Sopenharmony_ci      assert(write_mask == 0);
1664bf215546Sopenharmony_ci
1665bf215546Sopenharmony_ci      switch (mode) {
1666bf215546Sopenharmony_ci      case nir_var_mem_ssbo:
1667bf215546Sopenharmony_ci         if (addr_format_is_global(addr_format, mode))
1668bf215546Sopenharmony_ci            op = nir_intrinsic_store_global_block_intel;
1669bf215546Sopenharmony_ci         else
1670bf215546Sopenharmony_ci            op = nir_intrinsic_store_ssbo_block_intel;
1671bf215546Sopenharmony_ci         break;
1672bf215546Sopenharmony_ci      case nir_var_mem_global:
1673bf215546Sopenharmony_ci         op = nir_intrinsic_store_global_block_intel;
1674bf215546Sopenharmony_ci         break;
1675bf215546Sopenharmony_ci      case nir_var_mem_shared:
1676bf215546Sopenharmony_ci         op = nir_intrinsic_store_shared_block_intel;
1677bf215546Sopenharmony_ci         break;
1678bf215546Sopenharmony_ci      default:
1679bf215546Sopenharmony_ci         unreachable("Unsupported explicit IO variable mode");
1680bf215546Sopenharmony_ci      }
1681bf215546Sopenharmony_ci      break;
1682bf215546Sopenharmony_ci
1683bf215546Sopenharmony_ci   default:
1684bf215546Sopenharmony_ci      unreachable("Invalid intrinsic");
1685bf215546Sopenharmony_ci   }
1686bf215546Sopenharmony_ci
1687bf215546Sopenharmony_ci   nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1688bf215546Sopenharmony_ci
1689bf215546Sopenharmony_ci   if (value->bit_size == 1) {
1690bf215546Sopenharmony_ci      /* For shared, we can go ahead and use NIR's and/or the back-end's
1691bf215546Sopenharmony_ci       * standard encoding for booleans rather than forcing a 0/1 boolean.
1692bf215546Sopenharmony_ci       * This should save an instruction or two.
1693bf215546Sopenharmony_ci       *
1694bf215546Sopenharmony_ci       * TODO: Make the native bool bit_size an option.
1695bf215546Sopenharmony_ci       */
1696bf215546Sopenharmony_ci      if (mode == nir_var_mem_shared ||
1697bf215546Sopenharmony_ci          mode == nir_var_shader_temp ||
1698bf215546Sopenharmony_ci          mode == nir_var_function_temp)
1699bf215546Sopenharmony_ci         value = nir_b2b32(b, value);
1700bf215546Sopenharmony_ci      else
1701bf215546Sopenharmony_ci         value = nir_b2i(b, value, 32);
1702bf215546Sopenharmony_ci   }
1703bf215546Sopenharmony_ci
1704bf215546Sopenharmony_ci   store->src[0] = nir_src_for_ssa(value);
1705bf215546Sopenharmony_ci   if (addr_format_is_global(addr_format, mode)) {
1706bf215546Sopenharmony_ci      store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1707bf215546Sopenharmony_ci   } else if (addr_format_is_offset(addr_format, mode)) {
1708bf215546Sopenharmony_ci      assert(addr->num_components == 1);
1709bf215546Sopenharmony_ci      store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1710bf215546Sopenharmony_ci   } else {
1711bf215546Sopenharmony_ci      store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1712bf215546Sopenharmony_ci      store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1713bf215546Sopenharmony_ci   }
1714bf215546Sopenharmony_ci
1715bf215546Sopenharmony_ci   nir_intrinsic_set_write_mask(store, write_mask);
1716bf215546Sopenharmony_ci
1717bf215546Sopenharmony_ci   if (nir_intrinsic_has_access(store))
1718bf215546Sopenharmony_ci      nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1719bf215546Sopenharmony_ci
1720bf215546Sopenharmony_ci   nir_intrinsic_set_align(store, align_mul, align_offset);
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_ci   assert(value->num_components == 1 ||
1723bf215546Sopenharmony_ci          value->num_components == intrin->num_components);
1724bf215546Sopenharmony_ci   store->num_components = value->num_components;
1725bf215546Sopenharmony_ci
1726bf215546Sopenharmony_ci   assert(value->bit_size % 8 == 0);
1727bf215546Sopenharmony_ci
1728bf215546Sopenharmony_ci   if (addr_format_needs_bounds_check(addr_format)) {
1729bf215546Sopenharmony_ci      /* TODO: Better handle block_intel. */
1730bf215546Sopenharmony_ci      const unsigned store_size = (value->bit_size / 8) * store->num_components;
1731bf215546Sopenharmony_ci      nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1732bf215546Sopenharmony_ci
1733bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &store->instr);
1734bf215546Sopenharmony_ci
1735bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
1736bf215546Sopenharmony_ci   } else {
1737bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &store->instr);
1738bf215546Sopenharmony_ci   }
1739bf215546Sopenharmony_ci}
1740bf215546Sopenharmony_ci
1741bf215546Sopenharmony_cistatic nir_ssa_def *
1742bf215546Sopenharmony_cibuild_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1743bf215546Sopenharmony_ci                         nir_ssa_def *addr, nir_address_format addr_format,
1744bf215546Sopenharmony_ci                         nir_variable_mode modes)
1745bf215546Sopenharmony_ci{
1746bf215546Sopenharmony_ci   modes = canonicalize_generic_modes(modes);
1747bf215546Sopenharmony_ci
1748bf215546Sopenharmony_ci   if (util_bitcount(modes) > 1) {
1749bf215546Sopenharmony_ci      if (addr_format_is_global(addr_format, modes)) {
1750bf215546Sopenharmony_ci         return build_explicit_io_atomic(b, intrin, addr, addr_format,
1751bf215546Sopenharmony_ci                                         nir_var_mem_global);
1752bf215546Sopenharmony_ci      } else if (modes & nir_var_function_temp) {
1753bf215546Sopenharmony_ci         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1754bf215546Sopenharmony_ci                                                      nir_var_function_temp));
1755bf215546Sopenharmony_ci         nir_ssa_def *res1 =
1756bf215546Sopenharmony_ci            build_explicit_io_atomic(b, intrin, addr, addr_format,
1757bf215546Sopenharmony_ci                                     nir_var_function_temp);
1758bf215546Sopenharmony_ci         nir_push_else(b, NULL);
1759bf215546Sopenharmony_ci         nir_ssa_def *res2 =
1760bf215546Sopenharmony_ci            build_explicit_io_atomic(b, intrin, addr, addr_format,
1761bf215546Sopenharmony_ci                                     modes & ~nir_var_function_temp);
1762bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1763bf215546Sopenharmony_ci         return nir_if_phi(b, res1, res2);
1764bf215546Sopenharmony_ci      } else {
1765bf215546Sopenharmony_ci         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1766bf215546Sopenharmony_ci                                                      nir_var_mem_shared));
1767bf215546Sopenharmony_ci         assert(modes & nir_var_mem_shared);
1768bf215546Sopenharmony_ci         nir_ssa_def *res1 =
1769bf215546Sopenharmony_ci            build_explicit_io_atomic(b, intrin, addr, addr_format,
1770bf215546Sopenharmony_ci                                     nir_var_mem_shared);
1771bf215546Sopenharmony_ci         nir_push_else(b, NULL);
1772bf215546Sopenharmony_ci         assert(modes & nir_var_mem_global);
1773bf215546Sopenharmony_ci         nir_ssa_def *res2 =
1774bf215546Sopenharmony_ci            build_explicit_io_atomic(b, intrin, addr, addr_format,
1775bf215546Sopenharmony_ci                                     nir_var_mem_global);
1776bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1777bf215546Sopenharmony_ci         return nir_if_phi(b, res1, res2);
1778bf215546Sopenharmony_ci      }
1779bf215546Sopenharmony_ci   }
1780bf215546Sopenharmony_ci
1781bf215546Sopenharmony_ci   assert(util_bitcount(modes) == 1);
1782bf215546Sopenharmony_ci   const nir_variable_mode mode = modes;
1783bf215546Sopenharmony_ci
1784bf215546Sopenharmony_ci   const unsigned num_data_srcs =
1785bf215546Sopenharmony_ci      nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1786bf215546Sopenharmony_ci
1787bf215546Sopenharmony_ci   nir_intrinsic_op op;
1788bf215546Sopenharmony_ci   switch (mode) {
1789bf215546Sopenharmony_ci   case nir_var_mem_ssbo:
1790bf215546Sopenharmony_ci      if (addr_format_is_global(addr_format, mode))
1791bf215546Sopenharmony_ci         op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1792bf215546Sopenharmony_ci      else
1793bf215546Sopenharmony_ci         op = ssbo_atomic_for_deref(intrin->intrinsic);
1794bf215546Sopenharmony_ci      break;
1795bf215546Sopenharmony_ci   case nir_var_mem_global:
1796bf215546Sopenharmony_ci      assert(addr_format_is_global(addr_format, mode));
1797bf215546Sopenharmony_ci      op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1798bf215546Sopenharmony_ci      break;
1799bf215546Sopenharmony_ci   case nir_var_mem_shared:
1800bf215546Sopenharmony_ci      assert(addr_format_is_offset(addr_format, mode));
1801bf215546Sopenharmony_ci      op = shared_atomic_for_deref(intrin->intrinsic);
1802bf215546Sopenharmony_ci      break;
1803bf215546Sopenharmony_ci   case nir_var_mem_task_payload:
1804bf215546Sopenharmony_ci      assert(addr_format_is_offset(addr_format, mode));
1805bf215546Sopenharmony_ci      op = task_payload_atomic_for_deref(intrin->intrinsic);
1806bf215546Sopenharmony_ci      break;
1807bf215546Sopenharmony_ci   default:
1808bf215546Sopenharmony_ci      unreachable("Unsupported explicit IO variable mode");
1809bf215546Sopenharmony_ci   }
1810bf215546Sopenharmony_ci
1811bf215546Sopenharmony_ci   nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1812bf215546Sopenharmony_ci
1813bf215546Sopenharmony_ci   unsigned src = 0;
1814bf215546Sopenharmony_ci   if (addr_format_is_global(addr_format, mode)) {
1815bf215546Sopenharmony_ci      atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1816bf215546Sopenharmony_ci   } else if (addr_format_is_offset(addr_format, mode)) {
1817bf215546Sopenharmony_ci      assert(addr->num_components == 1);
1818bf215546Sopenharmony_ci      atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1819bf215546Sopenharmony_ci   } else {
1820bf215546Sopenharmony_ci      atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1821bf215546Sopenharmony_ci      atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1822bf215546Sopenharmony_ci   }
1823bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_data_srcs; i++) {
1824bf215546Sopenharmony_ci      atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1825bf215546Sopenharmony_ci   }
1826bf215546Sopenharmony_ci
1827bf215546Sopenharmony_ci   /* Global atomics don't have access flags because they assume that the
1828bf215546Sopenharmony_ci    * address may be non-uniform.
1829bf215546Sopenharmony_ci    */
1830bf215546Sopenharmony_ci   if (nir_intrinsic_has_access(atomic))
1831bf215546Sopenharmony_ci      nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1832bf215546Sopenharmony_ci
1833bf215546Sopenharmony_ci   assert(intrin->dest.ssa.num_components == 1);
1834bf215546Sopenharmony_ci   nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1835bf215546Sopenharmony_ci                     1, intrin->dest.ssa.bit_size, NULL);
1836bf215546Sopenharmony_ci
1837bf215546Sopenharmony_ci   assert(atomic->dest.ssa.bit_size % 8 == 0);
1838bf215546Sopenharmony_ci
1839bf215546Sopenharmony_ci   if (addr_format_needs_bounds_check(addr_format)) {
1840bf215546Sopenharmony_ci      const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1841bf215546Sopenharmony_ci      nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1842bf215546Sopenharmony_ci
1843bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &atomic->instr);
1844bf215546Sopenharmony_ci
1845bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
1846bf215546Sopenharmony_ci      return nir_if_phi(b, &atomic->dest.ssa,
1847bf215546Sopenharmony_ci                           nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1848bf215546Sopenharmony_ci   } else {
1849bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &atomic->instr);
1850bf215546Sopenharmony_ci      return &atomic->dest.ssa;
1851bf215546Sopenharmony_ci   }
1852bf215546Sopenharmony_ci}
1853bf215546Sopenharmony_ci
1854bf215546Sopenharmony_cinir_ssa_def *
1855bf215546Sopenharmony_cinir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1856bf215546Sopenharmony_ci                                   nir_ssa_def *base_addr,
1857bf215546Sopenharmony_ci                                   nir_address_format addr_format)
1858bf215546Sopenharmony_ci{
1859bf215546Sopenharmony_ci   assert(deref->dest.is_ssa);
1860bf215546Sopenharmony_ci   switch (deref->deref_type) {
1861bf215546Sopenharmony_ci   case nir_deref_type_var:
1862bf215546Sopenharmony_ci      return build_addr_for_var(b, deref->var, addr_format);
1863bf215546Sopenharmony_ci
1864bf215546Sopenharmony_ci   case nir_deref_type_ptr_as_array:
1865bf215546Sopenharmony_ci   case nir_deref_type_array: {
1866bf215546Sopenharmony_ci      unsigned stride = nir_deref_instr_array_stride(deref);
1867bf215546Sopenharmony_ci      assert(stride > 0);
1868bf215546Sopenharmony_ci
1869bf215546Sopenharmony_ci      unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format);
1870bf215546Sopenharmony_ci      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1871bf215546Sopenharmony_ci      nir_ssa_def *offset;
1872bf215546Sopenharmony_ci
1873bf215546Sopenharmony_ci      /* If the access chain has been declared in-bounds, then we know it doesn't
1874bf215546Sopenharmony_ci       * overflow the type.  For nir_deref_type_array, this implies it cannot be
1875bf215546Sopenharmony_ci       * negative. Also, since types in NIR have a maximum 32-bit size, we know the
1876bf215546Sopenharmony_ci       * final result will fit in a 32-bit value so we can convert the index to
1877bf215546Sopenharmony_ci       * 32-bit before multiplying and save ourselves from a 64-bit multiply.
1878bf215546Sopenharmony_ci       */
1879bf215546Sopenharmony_ci      if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) {
1880bf215546Sopenharmony_ci         index = nir_u2u32(b, index);
1881bf215546Sopenharmony_ci         offset = nir_u2u(b, nir_amul_imm(b, index, stride), offset_bit_size);
1882bf215546Sopenharmony_ci      } else {
1883bf215546Sopenharmony_ci         index = nir_i2i(b, index, offset_bit_size);
1884bf215546Sopenharmony_ci         offset = nir_amul_imm(b, index, stride);
1885bf215546Sopenharmony_ci      }
1886bf215546Sopenharmony_ci
1887bf215546Sopenharmony_ci      return build_addr_iadd(b, base_addr, addr_format, deref->modes, offset);
1888bf215546Sopenharmony_ci   }
1889bf215546Sopenharmony_ci
1890bf215546Sopenharmony_ci   case nir_deref_type_array_wildcard:
1891bf215546Sopenharmony_ci      unreachable("Wildcards should be lowered by now");
1892bf215546Sopenharmony_ci      break;
1893bf215546Sopenharmony_ci
1894bf215546Sopenharmony_ci   case nir_deref_type_struct: {
1895bf215546Sopenharmony_ci      nir_deref_instr *parent = nir_deref_instr_parent(deref);
1896bf215546Sopenharmony_ci      int offset = glsl_get_struct_field_offset(parent->type,
1897bf215546Sopenharmony_ci                                                deref->strct.index);
1898bf215546Sopenharmony_ci      assert(offset >= 0);
1899bf215546Sopenharmony_ci      return build_addr_iadd_imm(b, base_addr, addr_format,
1900bf215546Sopenharmony_ci                                 deref->modes, offset);
1901bf215546Sopenharmony_ci   }
1902bf215546Sopenharmony_ci
1903bf215546Sopenharmony_ci   case nir_deref_type_cast:
1904bf215546Sopenharmony_ci      /* Nothing to do here */
1905bf215546Sopenharmony_ci      return base_addr;
1906bf215546Sopenharmony_ci   }
1907bf215546Sopenharmony_ci
1908bf215546Sopenharmony_ci   unreachable("Invalid NIR deref type");
1909bf215546Sopenharmony_ci}
1910bf215546Sopenharmony_ci
1911bf215546Sopenharmony_civoid
1912bf215546Sopenharmony_cinir_lower_explicit_io_instr(nir_builder *b,
1913bf215546Sopenharmony_ci                            nir_intrinsic_instr *intrin,
1914bf215546Sopenharmony_ci                            nir_ssa_def *addr,
1915bf215546Sopenharmony_ci                            nir_address_format addr_format)
1916bf215546Sopenharmony_ci{
1917bf215546Sopenharmony_ci   b->cursor = nir_after_instr(&intrin->instr);
1918bf215546Sopenharmony_ci
1919bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1920bf215546Sopenharmony_ci   unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1921bf215546Sopenharmony_ci   unsigned scalar_size = type_scalar_size_bytes(deref->type);
1922bf215546Sopenharmony_ci   assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1923bf215546Sopenharmony_ci   assert(vec_stride == 0 || vec_stride >= scalar_size);
1924bf215546Sopenharmony_ci
1925bf215546Sopenharmony_ci   uint32_t align_mul, align_offset;
1926bf215546Sopenharmony_ci   if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1927bf215546Sopenharmony_ci      /* If we don't have an alignment from the deref, assume scalar */
1928bf215546Sopenharmony_ci      align_mul = scalar_size;
1929bf215546Sopenharmony_ci      align_offset = 0;
1930bf215546Sopenharmony_ci   }
1931bf215546Sopenharmony_ci
1932bf215546Sopenharmony_ci   switch (intrin->intrinsic) {
1933bf215546Sopenharmony_ci   case nir_intrinsic_load_deref: {
1934bf215546Sopenharmony_ci      nir_ssa_def *value;
1935bf215546Sopenharmony_ci      if (vec_stride > scalar_size) {
1936bf215546Sopenharmony_ci         nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
1937bf215546Sopenharmony_ci         for (unsigned i = 0; i < intrin->num_components; i++) {
1938bf215546Sopenharmony_ci            unsigned comp_offset = i * vec_stride;
1939bf215546Sopenharmony_ci            nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1940bf215546Sopenharmony_ci                                                         deref->modes,
1941bf215546Sopenharmony_ci                                                         comp_offset);
1942bf215546Sopenharmony_ci            comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1943bf215546Sopenharmony_ci                                              addr_format, deref->modes,
1944bf215546Sopenharmony_ci                                              align_mul,
1945bf215546Sopenharmony_ci                                              (align_offset + comp_offset) %
1946bf215546Sopenharmony_ci                                                 align_mul,
1947bf215546Sopenharmony_ci                                              1);
1948bf215546Sopenharmony_ci         }
1949bf215546Sopenharmony_ci         value = nir_vec(b, comps, intrin->num_components);
1950bf215546Sopenharmony_ci      } else {
1951bf215546Sopenharmony_ci         value = build_explicit_io_load(b, intrin, addr, addr_format,
1952bf215546Sopenharmony_ci                                        deref->modes, align_mul, align_offset,
1953bf215546Sopenharmony_ci                                        intrin->num_components);
1954bf215546Sopenharmony_ci      }
1955bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1956bf215546Sopenharmony_ci      break;
1957bf215546Sopenharmony_ci   }
1958bf215546Sopenharmony_ci
1959bf215546Sopenharmony_ci   case nir_intrinsic_store_deref: {
1960bf215546Sopenharmony_ci      assert(intrin->src[1].is_ssa);
1961bf215546Sopenharmony_ci      nir_ssa_def *value = intrin->src[1].ssa;
1962bf215546Sopenharmony_ci      nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1963bf215546Sopenharmony_ci      if (vec_stride > scalar_size) {
1964bf215546Sopenharmony_ci         for (unsigned i = 0; i < intrin->num_components; i++) {
1965bf215546Sopenharmony_ci            if (!(write_mask & (1 << i)))
1966bf215546Sopenharmony_ci               continue;
1967bf215546Sopenharmony_ci
1968bf215546Sopenharmony_ci            unsigned comp_offset = i * vec_stride;
1969bf215546Sopenharmony_ci            nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1970bf215546Sopenharmony_ci                                                         deref->modes,
1971bf215546Sopenharmony_ci                                                         comp_offset);
1972bf215546Sopenharmony_ci            build_explicit_io_store(b, intrin, comp_addr, addr_format,
1973bf215546Sopenharmony_ci                                    deref->modes, align_mul,
1974bf215546Sopenharmony_ci                                    (align_offset + comp_offset) % align_mul,
1975bf215546Sopenharmony_ci                                    nir_channel(b, value, i), 1);
1976bf215546Sopenharmony_ci         }
1977bf215546Sopenharmony_ci      } else {
1978bf215546Sopenharmony_ci         build_explicit_io_store(b, intrin, addr, addr_format,
1979bf215546Sopenharmony_ci                                 deref->modes, align_mul, align_offset,
1980bf215546Sopenharmony_ci                                 value, write_mask);
1981bf215546Sopenharmony_ci      }
1982bf215546Sopenharmony_ci      break;
1983bf215546Sopenharmony_ci   }
1984bf215546Sopenharmony_ci
1985bf215546Sopenharmony_ci   case nir_intrinsic_load_deref_block_intel: {
1986bf215546Sopenharmony_ci      nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
1987bf215546Sopenharmony_ci                                                  deref->modes,
1988bf215546Sopenharmony_ci                                                  align_mul, align_offset,
1989bf215546Sopenharmony_ci                                                  intrin->num_components);
1990bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1991bf215546Sopenharmony_ci      break;
1992bf215546Sopenharmony_ci   }
1993bf215546Sopenharmony_ci
1994bf215546Sopenharmony_ci   case nir_intrinsic_store_deref_block_intel: {
1995bf215546Sopenharmony_ci      assert(intrin->src[1].is_ssa);
1996bf215546Sopenharmony_ci      nir_ssa_def *value = intrin->src[1].ssa;
1997bf215546Sopenharmony_ci      const nir_component_mask_t write_mask = 0;
1998bf215546Sopenharmony_ci      build_explicit_io_store(b, intrin, addr, addr_format,
1999bf215546Sopenharmony_ci                              deref->modes, align_mul, align_offset,
2000bf215546Sopenharmony_ci                              value, write_mask);
2001bf215546Sopenharmony_ci      break;
2002bf215546Sopenharmony_ci   }
2003bf215546Sopenharmony_ci
2004bf215546Sopenharmony_ci   default: {
2005bf215546Sopenharmony_ci      nir_ssa_def *value =
2006bf215546Sopenharmony_ci         build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
2007bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
2008bf215546Sopenharmony_ci      break;
2009bf215546Sopenharmony_ci   }
2010bf215546Sopenharmony_ci   }
2011bf215546Sopenharmony_ci
2012bf215546Sopenharmony_ci   nir_instr_remove(&intrin->instr);
2013bf215546Sopenharmony_ci}
2014bf215546Sopenharmony_ci
2015bf215546Sopenharmony_cibool
2016bf215546Sopenharmony_cinir_get_explicit_deref_align(nir_deref_instr *deref,
2017bf215546Sopenharmony_ci                             bool default_to_type_align,
2018bf215546Sopenharmony_ci                             uint32_t *align_mul,
2019bf215546Sopenharmony_ci                             uint32_t *align_offset)
2020bf215546Sopenharmony_ci{
2021bf215546Sopenharmony_ci   if (deref->deref_type == nir_deref_type_var) {
2022bf215546Sopenharmony_ci      /* If we see a variable, align_mul is effectively infinite because we
2023bf215546Sopenharmony_ci       * know the offset exactly (up to the offset of the base pointer for the
2024bf215546Sopenharmony_ci       * given variable mode).   We have to pick something so we choose 256B
2025bf215546Sopenharmony_ci       * as an arbitrary alignment which seems high enough for any reasonable
2026bf215546Sopenharmony_ci       * wide-load use-case.  Back-ends should clamp alignments down if 256B
2027bf215546Sopenharmony_ci       * is too large for some reason.
2028bf215546Sopenharmony_ci       */
2029bf215546Sopenharmony_ci      *align_mul = 256;
2030bf215546Sopenharmony_ci      *align_offset = deref->var->data.driver_location % 256;
2031bf215546Sopenharmony_ci      return true;
2032bf215546Sopenharmony_ci   }
2033bf215546Sopenharmony_ci
2034bf215546Sopenharmony_ci   /* If we're a cast deref that has an alignment, use that. */
2035bf215546Sopenharmony_ci   if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
2036bf215546Sopenharmony_ci      *align_mul = deref->cast.align_mul;
2037bf215546Sopenharmony_ci      *align_offset = deref->cast.align_offset;
2038bf215546Sopenharmony_ci      return true;
2039bf215546Sopenharmony_ci   }
2040bf215546Sopenharmony_ci
2041bf215546Sopenharmony_ci   /* Otherwise, we need to compute the alignment based on the parent */
2042bf215546Sopenharmony_ci   nir_deref_instr *parent = nir_deref_instr_parent(deref);
2043bf215546Sopenharmony_ci   if (parent == NULL) {
2044bf215546Sopenharmony_ci      assert(deref->deref_type == nir_deref_type_cast);
2045bf215546Sopenharmony_ci      if (default_to_type_align) {
2046bf215546Sopenharmony_ci         /* If we don't have a parent, assume the type's alignment, if any. */
2047bf215546Sopenharmony_ci         unsigned type_align = glsl_get_explicit_alignment(deref->type);
2048bf215546Sopenharmony_ci         if (type_align == 0)
2049bf215546Sopenharmony_ci            return false;
2050bf215546Sopenharmony_ci
2051bf215546Sopenharmony_ci         *align_mul = type_align;
2052bf215546Sopenharmony_ci         *align_offset = 0;
2053bf215546Sopenharmony_ci         return true;
2054bf215546Sopenharmony_ci      } else {
2055bf215546Sopenharmony_ci         return false;
2056bf215546Sopenharmony_ci      }
2057bf215546Sopenharmony_ci   }
2058bf215546Sopenharmony_ci
2059bf215546Sopenharmony_ci   uint32_t parent_mul, parent_offset;
2060bf215546Sopenharmony_ci   if (!nir_get_explicit_deref_align(parent, default_to_type_align,
2061bf215546Sopenharmony_ci                                     &parent_mul, &parent_offset))
2062bf215546Sopenharmony_ci      return false;
2063bf215546Sopenharmony_ci
2064bf215546Sopenharmony_ci   switch (deref->deref_type) {
2065bf215546Sopenharmony_ci   case nir_deref_type_var:
2066bf215546Sopenharmony_ci      unreachable("Handled above");
2067bf215546Sopenharmony_ci
2068bf215546Sopenharmony_ci   case nir_deref_type_array:
2069bf215546Sopenharmony_ci   case nir_deref_type_array_wildcard:
2070bf215546Sopenharmony_ci   case nir_deref_type_ptr_as_array: {
2071bf215546Sopenharmony_ci      const unsigned stride = nir_deref_instr_array_stride(deref);
2072bf215546Sopenharmony_ci      if (stride == 0)
2073bf215546Sopenharmony_ci         return false;
2074bf215546Sopenharmony_ci
2075bf215546Sopenharmony_ci      if (deref->deref_type != nir_deref_type_array_wildcard &&
2076bf215546Sopenharmony_ci          nir_src_is_const(deref->arr.index)) {
2077bf215546Sopenharmony_ci         unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2078bf215546Sopenharmony_ci         *align_mul = parent_mul;
2079bf215546Sopenharmony_ci         *align_offset = (parent_offset + offset) % parent_mul;
2080bf215546Sopenharmony_ci      } else {
2081bf215546Sopenharmony_ci         /* If this is a wildcard or an indirect deref, we have to go with the
2082bf215546Sopenharmony_ci          * power-of-two gcd.
2083bf215546Sopenharmony_ci          */
2084bf215546Sopenharmony_ci         *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2085bf215546Sopenharmony_ci         *align_offset = parent_offset % *align_mul;
2086bf215546Sopenharmony_ci      }
2087bf215546Sopenharmony_ci      return true;
2088bf215546Sopenharmony_ci   }
2089bf215546Sopenharmony_ci
2090bf215546Sopenharmony_ci   case nir_deref_type_struct: {
2091bf215546Sopenharmony_ci      const int offset = glsl_get_struct_field_offset(parent->type,
2092bf215546Sopenharmony_ci                                                      deref->strct.index);
2093bf215546Sopenharmony_ci      if (offset < 0)
2094bf215546Sopenharmony_ci         return false;
2095bf215546Sopenharmony_ci
2096bf215546Sopenharmony_ci      *align_mul = parent_mul;
2097bf215546Sopenharmony_ci      *align_offset = (parent_offset + offset) % parent_mul;
2098bf215546Sopenharmony_ci      return true;
2099bf215546Sopenharmony_ci   }
2100bf215546Sopenharmony_ci
2101bf215546Sopenharmony_ci   case nir_deref_type_cast:
2102bf215546Sopenharmony_ci      /* We handled the explicit alignment case above. */
2103bf215546Sopenharmony_ci      assert(deref->cast.align_mul == 0);
2104bf215546Sopenharmony_ci      *align_mul = parent_mul;
2105bf215546Sopenharmony_ci      *align_offset = parent_offset;
2106bf215546Sopenharmony_ci      return true;
2107bf215546Sopenharmony_ci   }
2108bf215546Sopenharmony_ci
2109bf215546Sopenharmony_ci   unreachable("Invalid deref_instr_type");
2110bf215546Sopenharmony_ci}
2111bf215546Sopenharmony_ci
2112bf215546Sopenharmony_cistatic void
2113bf215546Sopenharmony_cilower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2114bf215546Sopenharmony_ci                        nir_address_format addr_format)
2115bf215546Sopenharmony_ci{
2116bf215546Sopenharmony_ci   /* Just delete the deref if it's not used.  We can't use
2117bf215546Sopenharmony_ci    * nir_deref_instr_remove_if_unused here because it may remove more than
2118bf215546Sopenharmony_ci    * one deref which could break our list walking since we walk the list
2119bf215546Sopenharmony_ci    * backwards.
2120bf215546Sopenharmony_ci    */
2121bf215546Sopenharmony_ci   assert(list_is_empty(&deref->dest.ssa.if_uses));
2122bf215546Sopenharmony_ci   if (list_is_empty(&deref->dest.ssa.uses)) {
2123bf215546Sopenharmony_ci      nir_instr_remove(&deref->instr);
2124bf215546Sopenharmony_ci      return;
2125bf215546Sopenharmony_ci   }
2126bf215546Sopenharmony_ci
2127bf215546Sopenharmony_ci   b->cursor = nir_after_instr(&deref->instr);
2128bf215546Sopenharmony_ci
2129bf215546Sopenharmony_ci   nir_ssa_def *base_addr = NULL;
2130bf215546Sopenharmony_ci   if (deref->deref_type != nir_deref_type_var) {
2131bf215546Sopenharmony_ci      assert(deref->parent.is_ssa);
2132bf215546Sopenharmony_ci      base_addr = deref->parent.ssa;
2133bf215546Sopenharmony_ci   }
2134bf215546Sopenharmony_ci
2135bf215546Sopenharmony_ci   nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2136bf215546Sopenharmony_ci                                                          addr_format);
2137bf215546Sopenharmony_ci   assert(addr->bit_size == deref->dest.ssa.bit_size);
2138bf215546Sopenharmony_ci   assert(addr->num_components == deref->dest.ssa.num_components);
2139bf215546Sopenharmony_ci
2140bf215546Sopenharmony_ci   nir_instr_remove(&deref->instr);
2141bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr);
2142bf215546Sopenharmony_ci}
2143bf215546Sopenharmony_ci
2144bf215546Sopenharmony_cistatic void
2145bf215546Sopenharmony_cilower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2146bf215546Sopenharmony_ci                         nir_address_format addr_format)
2147bf215546Sopenharmony_ci{
2148bf215546Sopenharmony_ci   assert(intrin->src[0].is_ssa);
2149bf215546Sopenharmony_ci   nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2150bf215546Sopenharmony_ci}
2151bf215546Sopenharmony_ci
2152bf215546Sopenharmony_cistatic void
2153bf215546Sopenharmony_cilower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2154bf215546Sopenharmony_ci                               nir_address_format addr_format)
2155bf215546Sopenharmony_ci{
2156bf215546Sopenharmony_ci   b->cursor = nir_after_instr(&intrin->instr);
2157bf215546Sopenharmony_ci
2158bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2159bf215546Sopenharmony_ci
2160bf215546Sopenharmony_ci   assert(glsl_type_is_array(deref->type));
2161bf215546Sopenharmony_ci   assert(glsl_get_length(deref->type) == 0);
2162bf215546Sopenharmony_ci   assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2163bf215546Sopenharmony_ci   unsigned stride = glsl_get_explicit_stride(deref->type);
2164bf215546Sopenharmony_ci   assert(stride > 0);
2165bf215546Sopenharmony_ci
2166bf215546Sopenharmony_ci   nir_ssa_def *addr = &deref->dest.ssa;
2167bf215546Sopenharmony_ci   nir_ssa_def *index = addr_to_index(b, addr, addr_format);
2168bf215546Sopenharmony_ci   nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
2169bf215546Sopenharmony_ci   unsigned access = nir_intrinsic_access(intrin);
2170bf215546Sopenharmony_ci
2171bf215546Sopenharmony_ci   nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access);
2172bf215546Sopenharmony_ci   arr_size = nir_usub_sat(b, arr_size, offset);
2173bf215546Sopenharmony_ci   arr_size = nir_udiv_imm(b, arr_size, stride);
2174bf215546Sopenharmony_ci
2175bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size);
2176bf215546Sopenharmony_ci   nir_instr_remove(&intrin->instr);
2177bf215546Sopenharmony_ci}
2178bf215546Sopenharmony_ci
2179bf215546Sopenharmony_cistatic void
2180bf215546Sopenharmony_cilower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2181bf215546Sopenharmony_ci                             nir_address_format addr_format)
2182bf215546Sopenharmony_ci{
2183bf215546Sopenharmony_ci   if (addr_format_is_global(addr_format, 0)) {
2184bf215546Sopenharmony_ci      /* If the address format is always global, then the driver can use
2185bf215546Sopenharmony_ci       * global addresses regardless of the mode.  In that case, don't create
2186bf215546Sopenharmony_ci       * a check, just whack the intrinsic to addr_mode_is and delegate to the
2187bf215546Sopenharmony_ci       * driver lowering.
2188bf215546Sopenharmony_ci       */
2189bf215546Sopenharmony_ci      intrin->intrinsic = nir_intrinsic_addr_mode_is;
2190bf215546Sopenharmony_ci      return;
2191bf215546Sopenharmony_ci   }
2192bf215546Sopenharmony_ci
2193bf215546Sopenharmony_ci   assert(intrin->src[0].is_ssa);
2194bf215546Sopenharmony_ci   nir_ssa_def *addr = intrin->src[0].ssa;
2195bf215546Sopenharmony_ci
2196bf215546Sopenharmony_ci   b->cursor = nir_instr_remove(&intrin->instr);
2197bf215546Sopenharmony_ci
2198bf215546Sopenharmony_ci   nir_ssa_def *is_mode =
2199bf215546Sopenharmony_ci      build_runtime_addr_mode_check(b, addr, addr_format,
2200bf215546Sopenharmony_ci                                    nir_intrinsic_memory_modes(intrin));
2201bf215546Sopenharmony_ci
2202bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode);
2203bf215546Sopenharmony_ci}
2204bf215546Sopenharmony_ci
2205bf215546Sopenharmony_cistatic bool
2206bf215546Sopenharmony_cinir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2207bf215546Sopenharmony_ci                           nir_address_format addr_format)
2208bf215546Sopenharmony_ci{
2209bf215546Sopenharmony_ci   bool progress = false;
2210bf215546Sopenharmony_ci
2211bf215546Sopenharmony_ci   nir_builder b;
2212bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
2213bf215546Sopenharmony_ci
2214bf215546Sopenharmony_ci   /* Walk in reverse order so that we can see the full deref chain when we
2215bf215546Sopenharmony_ci    * lower the access operations.  We lower them assuming that the derefs
2216bf215546Sopenharmony_ci    * will be turned into address calculations later.
2217bf215546Sopenharmony_ci    */
2218bf215546Sopenharmony_ci   nir_foreach_block_reverse(block, impl) {
2219bf215546Sopenharmony_ci      nir_foreach_instr_reverse_safe(instr, block) {
2220bf215546Sopenharmony_ci         switch (instr->type) {
2221bf215546Sopenharmony_ci         case nir_instr_type_deref: {
2222bf215546Sopenharmony_ci            nir_deref_instr *deref = nir_instr_as_deref(instr);
2223bf215546Sopenharmony_ci            if (nir_deref_mode_is_in_set(deref, modes)) {
2224bf215546Sopenharmony_ci               lower_explicit_io_deref(&b, deref, addr_format);
2225bf215546Sopenharmony_ci               progress = true;
2226bf215546Sopenharmony_ci            }
2227bf215546Sopenharmony_ci            break;
2228bf215546Sopenharmony_ci         }
2229bf215546Sopenharmony_ci
2230bf215546Sopenharmony_ci         case nir_instr_type_intrinsic: {
2231bf215546Sopenharmony_ci            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2232bf215546Sopenharmony_ci            switch (intrin->intrinsic) {
2233bf215546Sopenharmony_ci            case nir_intrinsic_load_deref:
2234bf215546Sopenharmony_ci            case nir_intrinsic_store_deref:
2235bf215546Sopenharmony_ci            case nir_intrinsic_load_deref_block_intel:
2236bf215546Sopenharmony_ci            case nir_intrinsic_store_deref_block_intel:
2237bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_add:
2238bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_imin:
2239bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_umin:
2240bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_imax:
2241bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_umax:
2242bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_and:
2243bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_or:
2244bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_xor:
2245bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_exchange:
2246bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_comp_swap:
2247bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_fadd:
2248bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_fmin:
2249bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_fmax:
2250bf215546Sopenharmony_ci            case nir_intrinsic_deref_atomic_fcomp_swap: {
2251bf215546Sopenharmony_ci               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2252bf215546Sopenharmony_ci               if (nir_deref_mode_is_in_set(deref, modes)) {
2253bf215546Sopenharmony_ci                  lower_explicit_io_access(&b, intrin, addr_format);
2254bf215546Sopenharmony_ci                  progress = true;
2255bf215546Sopenharmony_ci               }
2256bf215546Sopenharmony_ci               break;
2257bf215546Sopenharmony_ci            }
2258bf215546Sopenharmony_ci
2259bf215546Sopenharmony_ci            case nir_intrinsic_deref_buffer_array_length: {
2260bf215546Sopenharmony_ci               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2261bf215546Sopenharmony_ci               if (nir_deref_mode_is_in_set(deref, modes)) {
2262bf215546Sopenharmony_ci                  lower_explicit_io_array_length(&b, intrin, addr_format);
2263bf215546Sopenharmony_ci                  progress = true;
2264bf215546Sopenharmony_ci               }
2265bf215546Sopenharmony_ci               break;
2266bf215546Sopenharmony_ci            }
2267bf215546Sopenharmony_ci
2268bf215546Sopenharmony_ci            case nir_intrinsic_deref_mode_is: {
2269bf215546Sopenharmony_ci               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2270bf215546Sopenharmony_ci               if (nir_deref_mode_is_in_set(deref, modes)) {
2271bf215546Sopenharmony_ci                  lower_explicit_io_mode_check(&b, intrin, addr_format);
2272bf215546Sopenharmony_ci                  progress = true;
2273bf215546Sopenharmony_ci               }
2274bf215546Sopenharmony_ci               break;
2275bf215546Sopenharmony_ci            }
2276bf215546Sopenharmony_ci
2277bf215546Sopenharmony_ci            default:
2278bf215546Sopenharmony_ci               break;
2279bf215546Sopenharmony_ci            }
2280bf215546Sopenharmony_ci            break;
2281bf215546Sopenharmony_ci         }
2282bf215546Sopenharmony_ci
2283bf215546Sopenharmony_ci         default:
2284bf215546Sopenharmony_ci            /* Nothing to do */
2285bf215546Sopenharmony_ci            break;
2286bf215546Sopenharmony_ci         }
2287bf215546Sopenharmony_ci      }
2288bf215546Sopenharmony_ci   }
2289bf215546Sopenharmony_ci
2290bf215546Sopenharmony_ci   if (progress) {
2291bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index |
2292bf215546Sopenharmony_ci                                  nir_metadata_dominance);
2293bf215546Sopenharmony_ci   } else {
2294bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
2295bf215546Sopenharmony_ci   }
2296bf215546Sopenharmony_ci
2297bf215546Sopenharmony_ci   return progress;
2298bf215546Sopenharmony_ci}
2299bf215546Sopenharmony_ci
2300bf215546Sopenharmony_ci/** Lower explicitly laid out I/O access to byte offset/address intrinsics
2301bf215546Sopenharmony_ci *
2302bf215546Sopenharmony_ci * This pass is intended to be used for any I/O which touches memory external
2303bf215546Sopenharmony_ci * to the shader or which is directly visible to the client.  It requires that
2304bf215546Sopenharmony_ci * all data types in the given modes have a explicit stride/offset decorations
2305bf215546Sopenharmony_ci * to tell it exactly how to calculate the offset/address for the given load,
2306bf215546Sopenharmony_ci * store, or atomic operation.  If the offset/stride information does not come
2307bf215546Sopenharmony_ci * from the client explicitly (as with shared variables in GL or Vulkan),
2308bf215546Sopenharmony_ci * nir_lower_vars_to_explicit_types() can be used to add them.
2309bf215546Sopenharmony_ci *
2310bf215546Sopenharmony_ci * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2311bf215546Sopenharmony_ci * pointer chains which may contain cast derefs.  It does so by walking the
2312bf215546Sopenharmony_ci * deref chain backwards and simply replacing each deref, one at a time, with
2313bf215546Sopenharmony_ci * the appropriate address calculation.  The pass takes a nir_address_format
2314bf215546Sopenharmony_ci * parameter which describes how the offset or address is to be represented
2315bf215546Sopenharmony_ci * during calculations.  By ensuring that the address is always in a
2316bf215546Sopenharmony_ci * consistent format, pointers can safely be conjured from thin air by the
2317bf215546Sopenharmony_ci * driver, stored to variables, passed through phis, etc.
2318bf215546Sopenharmony_ci *
2319bf215546Sopenharmony_ci * The one exception to the simple algorithm described above is for handling
2320bf215546Sopenharmony_ci * row-major matrices in which case we may look down one additional level of
2321bf215546Sopenharmony_ci * the deref chain.
2322bf215546Sopenharmony_ci *
2323bf215546Sopenharmony_ci * This pass is also capable of handling OpenCL generic pointers.  If the
2324bf215546Sopenharmony_ci * address mode is global, it will lower any ambiguous (more than one mode)
2325bf215546Sopenharmony_ci * access to global and pass through the deref_mode_is run-time checks as
2326bf215546Sopenharmony_ci * addr_mode_is.  This assumes the driver has somehow mapped shared and
2327bf215546Sopenharmony_ci * scratch memory to the global address space.  For other modes such as
2328bf215546Sopenharmony_ci * 62bit_generic, there is an enum embedded in the address and we lower
2329bf215546Sopenharmony_ci * ambiguous access to an if-ladder and deref_mode_is to a check against the
2330bf215546Sopenharmony_ci * embedded enum.  If nir_lower_explicit_io is called on any shader that
2331bf215546Sopenharmony_ci * contains generic pointers, it must either be used on all of the generic
2332bf215546Sopenharmony_ci * modes or none.
2333bf215546Sopenharmony_ci */
2334bf215546Sopenharmony_cibool
2335bf215546Sopenharmony_cinir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2336bf215546Sopenharmony_ci                      nir_address_format addr_format)
2337bf215546Sopenharmony_ci{
2338bf215546Sopenharmony_ci   bool progress = false;
2339bf215546Sopenharmony_ci
2340bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
2341bf215546Sopenharmony_ci      if (function->impl &&
2342bf215546Sopenharmony_ci          nir_lower_explicit_io_impl(function->impl, modes, addr_format))
2343bf215546Sopenharmony_ci         progress = true;
2344bf215546Sopenharmony_ci   }
2345bf215546Sopenharmony_ci
2346bf215546Sopenharmony_ci   return progress;
2347bf215546Sopenharmony_ci}
2348bf215546Sopenharmony_ci
2349bf215546Sopenharmony_cistatic bool
2350bf215546Sopenharmony_cinir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2351bf215546Sopenharmony_ci                                      nir_variable_mode modes,
2352bf215546Sopenharmony_ci                                      glsl_type_size_align_func type_info)
2353bf215546Sopenharmony_ci{
2354bf215546Sopenharmony_ci   bool progress = false;
2355bf215546Sopenharmony_ci
2356bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
2357bf215546Sopenharmony_ci      nir_foreach_instr(instr, block) {
2358bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_deref)
2359bf215546Sopenharmony_ci            continue;
2360bf215546Sopenharmony_ci
2361bf215546Sopenharmony_ci         nir_deref_instr *deref = nir_instr_as_deref(instr);
2362bf215546Sopenharmony_ci         if (!nir_deref_mode_is_in_set(deref, modes))
2363bf215546Sopenharmony_ci            continue;
2364bf215546Sopenharmony_ci
2365bf215546Sopenharmony_ci         unsigned size, alignment;
2366bf215546Sopenharmony_ci         const struct glsl_type *new_type =
2367bf215546Sopenharmony_ci            glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2368bf215546Sopenharmony_ci         if (new_type != deref->type) {
2369bf215546Sopenharmony_ci            progress = true;
2370bf215546Sopenharmony_ci            deref->type = new_type;
2371bf215546Sopenharmony_ci         }
2372bf215546Sopenharmony_ci         if (deref->deref_type == nir_deref_type_cast) {
2373bf215546Sopenharmony_ci            /* See also glsl_type::get_explicit_type_for_size_align() */
2374bf215546Sopenharmony_ci            unsigned new_stride = align(size, alignment);
2375bf215546Sopenharmony_ci            if (new_stride != deref->cast.ptr_stride) {
2376bf215546Sopenharmony_ci               deref->cast.ptr_stride = new_stride;
2377bf215546Sopenharmony_ci               progress = true;
2378bf215546Sopenharmony_ci            }
2379bf215546Sopenharmony_ci         }
2380bf215546Sopenharmony_ci      }
2381bf215546Sopenharmony_ci   }
2382bf215546Sopenharmony_ci
2383bf215546Sopenharmony_ci   if (progress) {
2384bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index |
2385bf215546Sopenharmony_ci                                  nir_metadata_dominance |
2386bf215546Sopenharmony_ci                                  nir_metadata_live_ssa_defs |
2387bf215546Sopenharmony_ci                                  nir_metadata_loop_analysis);
2388bf215546Sopenharmony_ci   } else {
2389bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
2390bf215546Sopenharmony_ci   }
2391bf215546Sopenharmony_ci
2392bf215546Sopenharmony_ci   return progress;
2393bf215546Sopenharmony_ci}
2394bf215546Sopenharmony_ci
2395bf215546Sopenharmony_cistatic bool
2396bf215546Sopenharmony_cilower_vars_to_explicit(nir_shader *shader,
2397bf215546Sopenharmony_ci                       struct exec_list *vars, nir_variable_mode mode,
2398bf215546Sopenharmony_ci                       glsl_type_size_align_func type_info)
2399bf215546Sopenharmony_ci{
2400bf215546Sopenharmony_ci   bool progress = false;
2401bf215546Sopenharmony_ci   unsigned offset;
2402bf215546Sopenharmony_ci   switch (mode) {
2403bf215546Sopenharmony_ci   case nir_var_uniform:
2404bf215546Sopenharmony_ci      assert(shader->info.stage == MESA_SHADER_KERNEL);
2405bf215546Sopenharmony_ci      offset = 0;
2406bf215546Sopenharmony_ci      break;
2407bf215546Sopenharmony_ci   case nir_var_function_temp:
2408bf215546Sopenharmony_ci   case nir_var_shader_temp:
2409bf215546Sopenharmony_ci      offset = shader->scratch_size;
2410bf215546Sopenharmony_ci      break;
2411bf215546Sopenharmony_ci   case nir_var_mem_shared:
2412bf215546Sopenharmony_ci      offset = shader->info.shared_size;
2413bf215546Sopenharmony_ci      break;
2414bf215546Sopenharmony_ci   case nir_var_mem_task_payload:
2415bf215546Sopenharmony_ci      offset = shader->info.task_payload_size;
2416bf215546Sopenharmony_ci      break;
2417bf215546Sopenharmony_ci   case nir_var_mem_global:
2418bf215546Sopenharmony_ci      offset = shader->global_mem_size;
2419bf215546Sopenharmony_ci      break;
2420bf215546Sopenharmony_ci   case nir_var_mem_constant:
2421bf215546Sopenharmony_ci      offset = shader->constant_data_size;
2422bf215546Sopenharmony_ci      break;
2423bf215546Sopenharmony_ci   case nir_var_shader_call_data:
2424bf215546Sopenharmony_ci   case nir_var_ray_hit_attrib:
2425bf215546Sopenharmony_ci      offset = 0;
2426bf215546Sopenharmony_ci      break;
2427bf215546Sopenharmony_ci   default:
2428bf215546Sopenharmony_ci      unreachable("Unsupported mode");
2429bf215546Sopenharmony_ci   }
2430bf215546Sopenharmony_ci   nir_foreach_variable_in_list(var, vars) {
2431bf215546Sopenharmony_ci      if (var->data.mode != mode)
2432bf215546Sopenharmony_ci         continue;
2433bf215546Sopenharmony_ci
2434bf215546Sopenharmony_ci      unsigned size, align;
2435bf215546Sopenharmony_ci      const struct glsl_type *explicit_type =
2436bf215546Sopenharmony_ci         glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
2437bf215546Sopenharmony_ci
2438bf215546Sopenharmony_ci      if (explicit_type != var->type)
2439bf215546Sopenharmony_ci         var->type = explicit_type;
2440bf215546Sopenharmony_ci
2441bf215546Sopenharmony_ci      UNUSED bool is_empty_struct =
2442bf215546Sopenharmony_ci         glsl_type_is_struct_or_ifc(explicit_type) &&
2443bf215546Sopenharmony_ci         glsl_get_length(explicit_type) == 0;
2444bf215546Sopenharmony_ci
2445bf215546Sopenharmony_ci      assert(util_is_power_of_two_nonzero(align) || is_empty_struct);
2446bf215546Sopenharmony_ci      var->data.driver_location = ALIGN_POT(offset, align);
2447bf215546Sopenharmony_ci      offset = var->data.driver_location + size;
2448bf215546Sopenharmony_ci      progress = true;
2449bf215546Sopenharmony_ci   }
2450bf215546Sopenharmony_ci
2451bf215546Sopenharmony_ci   switch (mode) {
2452bf215546Sopenharmony_ci   case nir_var_uniform:
2453bf215546Sopenharmony_ci      assert(shader->info.stage == MESA_SHADER_KERNEL);
2454bf215546Sopenharmony_ci      shader->num_uniforms = offset;
2455bf215546Sopenharmony_ci      break;
2456bf215546Sopenharmony_ci   case nir_var_shader_temp:
2457bf215546Sopenharmony_ci   case nir_var_function_temp:
2458bf215546Sopenharmony_ci      shader->scratch_size = offset;
2459bf215546Sopenharmony_ci      break;
2460bf215546Sopenharmony_ci   case nir_var_mem_shared:
2461bf215546Sopenharmony_ci      shader->info.shared_size = offset;
2462bf215546Sopenharmony_ci      break;
2463bf215546Sopenharmony_ci   case nir_var_mem_task_payload:
2464bf215546Sopenharmony_ci      shader->info.task_payload_size = offset;
2465bf215546Sopenharmony_ci      break;
2466bf215546Sopenharmony_ci   case nir_var_mem_global:
2467bf215546Sopenharmony_ci      shader->global_mem_size = offset;
2468bf215546Sopenharmony_ci      break;
2469bf215546Sopenharmony_ci   case nir_var_mem_constant:
2470bf215546Sopenharmony_ci      shader->constant_data_size = offset;
2471bf215546Sopenharmony_ci      break;
2472bf215546Sopenharmony_ci   case nir_var_shader_call_data:
2473bf215546Sopenharmony_ci   case nir_var_ray_hit_attrib:
2474bf215546Sopenharmony_ci      break;
2475bf215546Sopenharmony_ci   default:
2476bf215546Sopenharmony_ci      unreachable("Unsupported mode");
2477bf215546Sopenharmony_ci   }
2478bf215546Sopenharmony_ci
2479bf215546Sopenharmony_ci   return progress;
2480bf215546Sopenharmony_ci}
2481bf215546Sopenharmony_ci
2482bf215546Sopenharmony_ci/* If nir_lower_vars_to_explicit_types is called on any shader that contains
2483bf215546Sopenharmony_ci * generic pointers, it must either be used on all of the generic modes or
2484bf215546Sopenharmony_ci * none.
2485bf215546Sopenharmony_ci */
2486bf215546Sopenharmony_cibool
2487bf215546Sopenharmony_cinir_lower_vars_to_explicit_types(nir_shader *shader,
2488bf215546Sopenharmony_ci                                 nir_variable_mode modes,
2489bf215546Sopenharmony_ci                                 glsl_type_size_align_func type_info)
2490bf215546Sopenharmony_ci{
2491bf215546Sopenharmony_ci   /* TODO: Situations which need to be handled to support more modes:
2492bf215546Sopenharmony_ci    * - row-major matrices
2493bf215546Sopenharmony_ci    * - compact shader inputs/outputs
2494bf215546Sopenharmony_ci    * - interface types
2495bf215546Sopenharmony_ci    */
2496bf215546Sopenharmony_ci   ASSERTED nir_variable_mode supported =
2497bf215546Sopenharmony_ci      nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2498bf215546Sopenharmony_ci      nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2499bf215546Sopenharmony_ci      nir_var_shader_call_data | nir_var_ray_hit_attrib |
2500bf215546Sopenharmony_ci      nir_var_mem_task_payload;
2501bf215546Sopenharmony_ci   assert(!(modes & ~supported) && "unsupported");
2502bf215546Sopenharmony_ci
2503bf215546Sopenharmony_ci   bool progress = false;
2504bf215546Sopenharmony_ci
2505bf215546Sopenharmony_ci   if (modes & nir_var_uniform)
2506bf215546Sopenharmony_ci      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2507bf215546Sopenharmony_ci   if (modes & nir_var_mem_global)
2508bf215546Sopenharmony_ci      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2509bf215546Sopenharmony_ci
2510bf215546Sopenharmony_ci   if (modes & nir_var_mem_shared) {
2511bf215546Sopenharmony_ci      assert(!shader->info.shared_memory_explicit_layout);
2512bf215546Sopenharmony_ci      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2513bf215546Sopenharmony_ci   }
2514bf215546Sopenharmony_ci
2515bf215546Sopenharmony_ci   if (modes & nir_var_shader_temp)
2516bf215546Sopenharmony_ci      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2517bf215546Sopenharmony_ci   if (modes & nir_var_mem_constant)
2518bf215546Sopenharmony_ci      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2519bf215546Sopenharmony_ci   if (modes & nir_var_shader_call_data)
2520bf215546Sopenharmony_ci      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2521bf215546Sopenharmony_ci   if (modes & nir_var_ray_hit_attrib)
2522bf215546Sopenharmony_ci      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2523bf215546Sopenharmony_ci   if (modes & nir_var_mem_task_payload)
2524bf215546Sopenharmony_ci      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2525bf215546Sopenharmony_ci
2526bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
2527bf215546Sopenharmony_ci      if (function->impl) {
2528bf215546Sopenharmony_ci         if (modes & nir_var_function_temp)
2529bf215546Sopenharmony_ci            progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
2530bf215546Sopenharmony_ci
2531bf215546Sopenharmony_ci         progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
2532bf215546Sopenharmony_ci      }
2533bf215546Sopenharmony_ci   }
2534bf215546Sopenharmony_ci
2535bf215546Sopenharmony_ci   return progress;
2536bf215546Sopenharmony_ci}
2537bf215546Sopenharmony_ci
2538bf215546Sopenharmony_cistatic void
2539bf215546Sopenharmony_ciwrite_constant(void *dst, size_t dst_size,
2540bf215546Sopenharmony_ci               const nir_constant *c, const struct glsl_type *type)
2541bf215546Sopenharmony_ci{
2542bf215546Sopenharmony_ci   if (glsl_type_is_vector_or_scalar(type)) {
2543bf215546Sopenharmony_ci      const unsigned num_components = glsl_get_vector_elements(type);
2544bf215546Sopenharmony_ci      const unsigned bit_size = glsl_get_bit_size(type);
2545bf215546Sopenharmony_ci      if (bit_size == 1) {
2546bf215546Sopenharmony_ci         /* Booleans are special-cased to be 32-bit
2547bf215546Sopenharmony_ci          *
2548bf215546Sopenharmony_ci          * TODO: Make the native bool bit_size an option.
2549bf215546Sopenharmony_ci          */
2550bf215546Sopenharmony_ci         assert(num_components * 4 <= dst_size);
2551bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_components; i++) {
2552bf215546Sopenharmony_ci            int32_t b32 = -(int)c->values[i].b;
2553bf215546Sopenharmony_ci            memcpy((char *)dst + i * 4, &b32, 4);
2554bf215546Sopenharmony_ci         }
2555bf215546Sopenharmony_ci      } else {
2556bf215546Sopenharmony_ci         assert(bit_size >= 8 && bit_size % 8 == 0);
2557bf215546Sopenharmony_ci         const unsigned byte_size = bit_size / 8;
2558bf215546Sopenharmony_ci         assert(num_components * byte_size <= dst_size);
2559bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_components; i++) {
2560bf215546Sopenharmony_ci            /* Annoyingly, thanks to packed structs, we can't make any
2561bf215546Sopenharmony_ci             * assumptions about the alignment of dst.  To avoid any strange
2562bf215546Sopenharmony_ci             * issues with unaligned writes, we always use memcpy.
2563bf215546Sopenharmony_ci             */
2564bf215546Sopenharmony_ci            memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2565bf215546Sopenharmony_ci         }
2566bf215546Sopenharmony_ci      }
2567bf215546Sopenharmony_ci   } else if (glsl_type_is_array_or_matrix(type)) {
2568bf215546Sopenharmony_ci      const unsigned array_len = glsl_get_length(type);
2569bf215546Sopenharmony_ci      const unsigned stride = glsl_get_explicit_stride(type);
2570bf215546Sopenharmony_ci      assert(stride > 0);
2571bf215546Sopenharmony_ci      const struct glsl_type *elem_type = glsl_get_array_element(type);
2572bf215546Sopenharmony_ci      for (unsigned i = 0; i < array_len; i++) {
2573bf215546Sopenharmony_ci         unsigned elem_offset = i * stride;
2574bf215546Sopenharmony_ci         assert(elem_offset < dst_size);
2575bf215546Sopenharmony_ci         write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2576bf215546Sopenharmony_ci                        c->elements[i], elem_type);
2577bf215546Sopenharmony_ci      }
2578bf215546Sopenharmony_ci   } else {
2579bf215546Sopenharmony_ci      assert(glsl_type_is_struct_or_ifc(type));
2580bf215546Sopenharmony_ci      const unsigned num_fields = glsl_get_length(type);
2581bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_fields; i++) {
2582bf215546Sopenharmony_ci         const int field_offset = glsl_get_struct_field_offset(type, i);
2583bf215546Sopenharmony_ci         assert(field_offset >= 0 && field_offset < dst_size);
2584bf215546Sopenharmony_ci         const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2585bf215546Sopenharmony_ci         write_constant((char *)dst + field_offset, dst_size - field_offset,
2586bf215546Sopenharmony_ci                        c->elements[i], field_type);
2587bf215546Sopenharmony_ci      }
2588bf215546Sopenharmony_ci   }
2589bf215546Sopenharmony_ci}
2590bf215546Sopenharmony_ci
2591bf215546Sopenharmony_civoid
2592bf215546Sopenharmony_cinir_gather_explicit_io_initializers(nir_shader *shader,
2593bf215546Sopenharmony_ci                                    void *dst, size_t dst_size,
2594bf215546Sopenharmony_ci                                    nir_variable_mode mode)
2595bf215546Sopenharmony_ci{
2596bf215546Sopenharmony_ci   /* It doesn't really make sense to gather initializers for more than one
2597bf215546Sopenharmony_ci    * mode at a time.  If this ever becomes well-defined, we can drop the
2598bf215546Sopenharmony_ci    * assert then.
2599bf215546Sopenharmony_ci    */
2600bf215546Sopenharmony_ci   assert(util_bitcount(mode) == 1);
2601bf215546Sopenharmony_ci
2602bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, shader, mode) {
2603bf215546Sopenharmony_ci      assert(var->data.driver_location < dst_size);
2604bf215546Sopenharmony_ci      write_constant((char *)dst + var->data.driver_location,
2605bf215546Sopenharmony_ci                     dst_size - var->data.driver_location,
2606bf215546Sopenharmony_ci                     var->constant_initializer, var->type);
2607bf215546Sopenharmony_ci   }
2608bf215546Sopenharmony_ci}
2609bf215546Sopenharmony_ci
2610bf215546Sopenharmony_ci/**
2611bf215546Sopenharmony_ci * Return the offset source for a load/store intrinsic.
2612bf215546Sopenharmony_ci */
2613bf215546Sopenharmony_cinir_src *
2614bf215546Sopenharmony_cinir_get_io_offset_src(nir_intrinsic_instr *instr)
2615bf215546Sopenharmony_ci{
2616bf215546Sopenharmony_ci   switch (instr->intrinsic) {
2617bf215546Sopenharmony_ci   case nir_intrinsic_load_input:
2618bf215546Sopenharmony_ci   case nir_intrinsic_load_output:
2619bf215546Sopenharmony_ci   case nir_intrinsic_load_shared:
2620bf215546Sopenharmony_ci   case nir_intrinsic_load_task_payload:
2621bf215546Sopenharmony_ci   case nir_intrinsic_load_uniform:
2622bf215546Sopenharmony_ci   case nir_intrinsic_load_kernel_input:
2623bf215546Sopenharmony_ci   case nir_intrinsic_load_global:
2624bf215546Sopenharmony_ci   case nir_intrinsic_load_global_2x32:
2625bf215546Sopenharmony_ci   case nir_intrinsic_load_global_constant:
2626bf215546Sopenharmony_ci   case nir_intrinsic_load_scratch:
2627bf215546Sopenharmony_ci   case nir_intrinsic_load_fs_input_interp_deltas:
2628bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_add:
2629bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_and:
2630bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_comp_swap:
2631bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_exchange:
2632bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_fadd:
2633bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_fcomp_swap:
2634bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_fmax:
2635bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_fmin:
2636bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_imax:
2637bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_imin:
2638bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_or:
2639bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_umax:
2640bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_umin:
2641bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_xor:
2642bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_add:
2643bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_imin:
2644bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_umin:
2645bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_imax:
2646bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_umax:
2647bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_and:
2648bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_or:
2649bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_xor:
2650bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_exchange:
2651bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_comp_swap:
2652bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_fadd:
2653bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_fmin:
2654bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_fmax:
2655bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_fcomp_swap:
2656bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_add:
2657bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_and:
2658bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_comp_swap:
2659bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_exchange:
2660bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fadd:
2661bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fcomp_swap:
2662bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fmax:
2663bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fmin:
2664bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_imax:
2665bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_imin:
2666bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_or:
2667bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_umax:
2668bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_umin:
2669bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_xor:
2670bf215546Sopenharmony_ci      return &instr->src[0];
2671bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo:
2672bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo:
2673bf215546Sopenharmony_ci   case nir_intrinsic_load_input_vertex:
2674bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_input:
2675bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_output:
2676bf215546Sopenharmony_ci   case nir_intrinsic_load_per_primitive_output:
2677bf215546Sopenharmony_ci   case nir_intrinsic_load_interpolated_input:
2678bf215546Sopenharmony_ci   case nir_intrinsic_store_output:
2679bf215546Sopenharmony_ci   case nir_intrinsic_store_shared:
2680bf215546Sopenharmony_ci   case nir_intrinsic_store_task_payload:
2681bf215546Sopenharmony_ci   case nir_intrinsic_store_global:
2682bf215546Sopenharmony_ci   case nir_intrinsic_store_global_2x32:
2683bf215546Sopenharmony_ci   case nir_intrinsic_store_scratch:
2684bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_add:
2685bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imin:
2686bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umin:
2687bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imax:
2688bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umax:
2689bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_and:
2690bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_or:
2691bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_xor:
2692bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_exchange:
2693bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_comp_swap:
2694bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fadd:
2695bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fmin:
2696bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fmax:
2697bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fcomp_swap:
2698bf215546Sopenharmony_ci      return &instr->src[1];
2699bf215546Sopenharmony_ci   case nir_intrinsic_store_ssbo:
2700bf215546Sopenharmony_ci   case nir_intrinsic_store_per_vertex_output:
2701bf215546Sopenharmony_ci   case nir_intrinsic_store_per_primitive_output:
2702bf215546Sopenharmony_ci      return &instr->src[2];
2703bf215546Sopenharmony_ci   default:
2704bf215546Sopenharmony_ci      return NULL;
2705bf215546Sopenharmony_ci   }
2706bf215546Sopenharmony_ci}
2707bf215546Sopenharmony_ci
2708bf215546Sopenharmony_ci/**
2709bf215546Sopenharmony_ci * Return the vertex index source for a load/store per_vertex intrinsic.
2710bf215546Sopenharmony_ci */
2711bf215546Sopenharmony_cinir_src *
2712bf215546Sopenharmony_cinir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2713bf215546Sopenharmony_ci{
2714bf215546Sopenharmony_ci   switch (instr->intrinsic) {
2715bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_input:
2716bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_output:
2717bf215546Sopenharmony_ci   case nir_intrinsic_load_per_primitive_output:
2718bf215546Sopenharmony_ci      return &instr->src[0];
2719bf215546Sopenharmony_ci   case nir_intrinsic_store_per_vertex_output:
2720bf215546Sopenharmony_ci   case nir_intrinsic_store_per_primitive_output:
2721bf215546Sopenharmony_ci      return &instr->src[1];
2722bf215546Sopenharmony_ci   default:
2723bf215546Sopenharmony_ci      return NULL;
2724bf215546Sopenharmony_ci   }
2725bf215546Sopenharmony_ci}
2726bf215546Sopenharmony_ci
2727bf215546Sopenharmony_ci/**
2728bf215546Sopenharmony_ci * Return the numeric constant that identify a NULL pointer for each address
2729bf215546Sopenharmony_ci * format.
2730bf215546Sopenharmony_ci */
2731bf215546Sopenharmony_ciconst nir_const_value *
2732bf215546Sopenharmony_cinir_address_format_null_value(nir_address_format addr_format)
2733bf215546Sopenharmony_ci{
2734bf215546Sopenharmony_ci   const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2735bf215546Sopenharmony_ci      [nir_address_format_32bit_global] = {{0}},
2736bf215546Sopenharmony_ci      [nir_address_format_2x32bit_global] = {{0}},
2737bf215546Sopenharmony_ci      [nir_address_format_64bit_global] = {{0}},
2738bf215546Sopenharmony_ci      [nir_address_format_64bit_global_32bit_offset] = {{0}},
2739bf215546Sopenharmony_ci      [nir_address_format_64bit_bounded_global] = {{0}},
2740bf215546Sopenharmony_ci      [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
2741bf215546Sopenharmony_ci      [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}},
2742bf215546Sopenharmony_ci      [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
2743bf215546Sopenharmony_ci      [nir_address_format_32bit_offset] = {{.u32 = ~0}},
2744bf215546Sopenharmony_ci      [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}},
2745bf215546Sopenharmony_ci      [nir_address_format_62bit_generic] = {{.u64 = 0}},
2746bf215546Sopenharmony_ci      [nir_address_format_logical] = {{.u32 = ~0}},
2747bf215546Sopenharmony_ci   };
2748bf215546Sopenharmony_ci
2749bf215546Sopenharmony_ci   assert(addr_format < ARRAY_SIZE(null_values));
2750bf215546Sopenharmony_ci   return null_values[addr_format];
2751bf215546Sopenharmony_ci}
2752bf215546Sopenharmony_ci
2753bf215546Sopenharmony_cinir_ssa_def *
2754bf215546Sopenharmony_cinir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2755bf215546Sopenharmony_ci                   nir_address_format addr_format)
2756bf215546Sopenharmony_ci{
2757bf215546Sopenharmony_ci   switch (addr_format) {
2758bf215546Sopenharmony_ci   case nir_address_format_32bit_global:
2759bf215546Sopenharmony_ci   case nir_address_format_2x32bit_global:
2760bf215546Sopenharmony_ci   case nir_address_format_64bit_global:
2761bf215546Sopenharmony_ci   case nir_address_format_64bit_bounded_global:
2762bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset:
2763bf215546Sopenharmony_ci   case nir_address_format_vec2_index_32bit_offset:
2764bf215546Sopenharmony_ci   case nir_address_format_32bit_offset:
2765bf215546Sopenharmony_ci   case nir_address_format_62bit_generic:
2766bf215546Sopenharmony_ci      return nir_ball_iequal(b, addr0, addr1);
2767bf215546Sopenharmony_ci
2768bf215546Sopenharmony_ci   case nir_address_format_64bit_global_32bit_offset:
2769bf215546Sopenharmony_ci      return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2770bf215546Sopenharmony_ci                                nir_channels(b, addr1, 0xb));
2771bf215546Sopenharmony_ci
2772bf215546Sopenharmony_ci   case nir_address_format_32bit_offset_as_64bit:
2773bf215546Sopenharmony_ci      assert(addr0->num_components == 1 && addr1->num_components == 1);
2774bf215546Sopenharmony_ci      return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2775bf215546Sopenharmony_ci
2776bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset_pack64:
2777bf215546Sopenharmony_ci      assert(addr0->num_components == 1 && addr1->num_components == 1);
2778bf215546Sopenharmony_ci      return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2779bf215546Sopenharmony_ci
2780bf215546Sopenharmony_ci   case nir_address_format_logical:
2781bf215546Sopenharmony_ci      unreachable("Unsupported address format");
2782bf215546Sopenharmony_ci   }
2783bf215546Sopenharmony_ci
2784bf215546Sopenharmony_ci   unreachable("Invalid address format");
2785bf215546Sopenharmony_ci}
2786bf215546Sopenharmony_ci
2787bf215546Sopenharmony_cinir_ssa_def *
2788bf215546Sopenharmony_cinir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2789bf215546Sopenharmony_ci                    nir_address_format addr_format)
2790bf215546Sopenharmony_ci{
2791bf215546Sopenharmony_ci   switch (addr_format) {
2792bf215546Sopenharmony_ci   case nir_address_format_32bit_global:
2793bf215546Sopenharmony_ci   case nir_address_format_64bit_global:
2794bf215546Sopenharmony_ci   case nir_address_format_32bit_offset:
2795bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset_pack64:
2796bf215546Sopenharmony_ci   case nir_address_format_62bit_generic:
2797bf215546Sopenharmony_ci      assert(addr0->num_components == 1);
2798bf215546Sopenharmony_ci      assert(addr1->num_components == 1);
2799bf215546Sopenharmony_ci      return nir_isub(b, addr0, addr1);
2800bf215546Sopenharmony_ci
2801bf215546Sopenharmony_ci   case nir_address_format_2x32bit_global:
2802bf215546Sopenharmony_ci      return nir_isub(b, addr_to_global(b, addr0, addr_format),
2803bf215546Sopenharmony_ci                         addr_to_global(b, addr1, addr_format));
2804bf215546Sopenharmony_ci
2805bf215546Sopenharmony_ci   case nir_address_format_32bit_offset_as_64bit:
2806bf215546Sopenharmony_ci      assert(addr0->num_components == 1);
2807bf215546Sopenharmony_ci      assert(addr1->num_components == 1);
2808bf215546Sopenharmony_ci      return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2809bf215546Sopenharmony_ci
2810bf215546Sopenharmony_ci   case nir_address_format_64bit_global_32bit_offset:
2811bf215546Sopenharmony_ci   case nir_address_format_64bit_bounded_global:
2812bf215546Sopenharmony_ci      return nir_isub(b, addr_to_global(b, addr0, addr_format),
2813bf215546Sopenharmony_ci                         addr_to_global(b, addr1, addr_format));
2814bf215546Sopenharmony_ci
2815bf215546Sopenharmony_ci   case nir_address_format_32bit_index_offset:
2816bf215546Sopenharmony_ci      assert(addr0->num_components == 2);
2817bf215546Sopenharmony_ci      assert(addr1->num_components == 2);
2818bf215546Sopenharmony_ci      /* Assume the same buffer index. */
2819bf215546Sopenharmony_ci      return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2820bf215546Sopenharmony_ci
2821bf215546Sopenharmony_ci   case nir_address_format_vec2_index_32bit_offset:
2822bf215546Sopenharmony_ci      assert(addr0->num_components == 3);
2823bf215546Sopenharmony_ci      assert(addr1->num_components == 3);
2824bf215546Sopenharmony_ci      /* Assume the same buffer index. */
2825bf215546Sopenharmony_ci      return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2826bf215546Sopenharmony_ci
2827bf215546Sopenharmony_ci   case nir_address_format_logical:
2828bf215546Sopenharmony_ci      unreachable("Unsupported address format");
2829bf215546Sopenharmony_ci   }
2830bf215546Sopenharmony_ci
2831bf215546Sopenharmony_ci   unreachable("Invalid address format");
2832bf215546Sopenharmony_ci}
2833bf215546Sopenharmony_ci
2834bf215546Sopenharmony_cistatic bool
2835bf215546Sopenharmony_ciis_input(nir_intrinsic_instr *intrin)
2836bf215546Sopenharmony_ci{
2837bf215546Sopenharmony_ci   return intrin->intrinsic == nir_intrinsic_load_input ||
2838bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2839bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2840bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2841bf215546Sopenharmony_ci}
2842bf215546Sopenharmony_ci
2843bf215546Sopenharmony_cistatic bool
2844bf215546Sopenharmony_ciis_output(nir_intrinsic_instr *intrin)
2845bf215546Sopenharmony_ci{
2846bf215546Sopenharmony_ci   return intrin->intrinsic == nir_intrinsic_load_output ||
2847bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2848bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2849bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_store_output ||
2850bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2851bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2852bf215546Sopenharmony_ci}
2853bf215546Sopenharmony_ci
2854bf215546Sopenharmony_cistatic bool is_dual_slot(nir_intrinsic_instr *intrin)
2855bf215546Sopenharmony_ci{
2856bf215546Sopenharmony_ci   if (intrin->intrinsic == nir_intrinsic_store_output ||
2857bf215546Sopenharmony_ci       intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2858bf215546Sopenharmony_ci       intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
2859bf215546Sopenharmony_ci      return nir_src_bit_size(intrin->src[0]) == 64 &&
2860bf215546Sopenharmony_ci             nir_src_num_components(intrin->src[0]) >= 3;
2861bf215546Sopenharmony_ci   }
2862bf215546Sopenharmony_ci
2863bf215546Sopenharmony_ci   return nir_dest_bit_size(intrin->dest) == 64 &&
2864bf215546Sopenharmony_ci          nir_dest_num_components(intrin->dest) >= 3;
2865bf215546Sopenharmony_ci}
2866bf215546Sopenharmony_ci
2867bf215546Sopenharmony_ci/**
2868bf215546Sopenharmony_ci * This pass adds constant offsets to instr->const_index[0] for input/output
2869bf215546Sopenharmony_ci * intrinsics, and resets the offset source to 0.  Non-constant offsets remain
2870bf215546Sopenharmony_ci * unchanged - since we don't know what part of a compound variable is
2871bf215546Sopenharmony_ci * accessed, we allocate storage for the entire thing. For drivers that use
2872bf215546Sopenharmony_ci * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2873bf215546Sopenharmony_ci * the offset source will be 0, so that they don't have to add it in manually.
2874bf215546Sopenharmony_ci */
2875bf215546Sopenharmony_ci
2876bf215546Sopenharmony_cistatic bool
2877bf215546Sopenharmony_ciadd_const_offset_to_base_block(nir_block *block, nir_builder *b,
2878bf215546Sopenharmony_ci                               nir_variable_mode modes)
2879bf215546Sopenharmony_ci{
2880bf215546Sopenharmony_ci   bool progress = false;
2881bf215546Sopenharmony_ci   nir_foreach_instr_safe(instr, block) {
2882bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_intrinsic)
2883bf215546Sopenharmony_ci         continue;
2884bf215546Sopenharmony_ci
2885bf215546Sopenharmony_ci      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2886bf215546Sopenharmony_ci
2887bf215546Sopenharmony_ci      if (((modes & nir_var_shader_in) && is_input(intrin)) ||
2888bf215546Sopenharmony_ci          ((modes & nir_var_shader_out) && is_output(intrin))) {
2889bf215546Sopenharmony_ci         nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
2890bf215546Sopenharmony_ci
2891bf215546Sopenharmony_ci         /* NV_mesh_shader: ignore MS primitive indices. */
2892bf215546Sopenharmony_ci         if (b->shader->info.stage == MESA_SHADER_MESH &&
2893bf215546Sopenharmony_ci             sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
2894bf215546Sopenharmony_ci             !(b->shader->info.per_primitive_outputs &
2895bf215546Sopenharmony_ci               BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
2896bf215546Sopenharmony_ci            continue;
2897bf215546Sopenharmony_ci
2898bf215546Sopenharmony_ci         nir_src *offset = nir_get_io_offset_src(intrin);
2899bf215546Sopenharmony_ci
2900bf215546Sopenharmony_ci         /* TODO: Better handling of per-view variables here */
2901bf215546Sopenharmony_ci         if (nir_src_is_const(*offset) &&
2902bf215546Sopenharmony_ci             !nir_intrinsic_io_semantics(intrin).per_view) {
2903bf215546Sopenharmony_ci            unsigned off = nir_src_as_uint(*offset);
2904bf215546Sopenharmony_ci
2905bf215546Sopenharmony_ci            nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
2906bf215546Sopenharmony_ci
2907bf215546Sopenharmony_ci            sem.location += off;
2908bf215546Sopenharmony_ci            /* non-indirect indexing should reduce num_slots */
2909bf215546Sopenharmony_ci            sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
2910bf215546Sopenharmony_ci            nir_intrinsic_set_io_semantics(intrin, sem);
2911bf215546Sopenharmony_ci
2912bf215546Sopenharmony_ci            b->cursor = nir_before_instr(&intrin->instr);
2913bf215546Sopenharmony_ci            nir_instr_rewrite_src(&intrin->instr, offset,
2914bf215546Sopenharmony_ci                                  nir_src_for_ssa(nir_imm_int(b, 0)));
2915bf215546Sopenharmony_ci            progress = true;
2916bf215546Sopenharmony_ci         }
2917bf215546Sopenharmony_ci      }
2918bf215546Sopenharmony_ci   }
2919bf215546Sopenharmony_ci
2920bf215546Sopenharmony_ci   return progress;
2921bf215546Sopenharmony_ci}
2922bf215546Sopenharmony_ci
2923bf215546Sopenharmony_cibool
2924bf215546Sopenharmony_cinir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
2925bf215546Sopenharmony_ci{
2926bf215546Sopenharmony_ci   bool progress = false;
2927bf215546Sopenharmony_ci
2928bf215546Sopenharmony_ci   nir_foreach_function(f, nir) {
2929bf215546Sopenharmony_ci      if (f->impl) {
2930bf215546Sopenharmony_ci         bool impl_progress = false;
2931bf215546Sopenharmony_ci         nir_builder b;
2932bf215546Sopenharmony_ci         nir_builder_init(&b, f->impl);
2933bf215546Sopenharmony_ci         nir_foreach_block(block, f->impl) {
2934bf215546Sopenharmony_ci            impl_progress |= add_const_offset_to_base_block(block, &b, modes);
2935bf215546Sopenharmony_ci         }
2936bf215546Sopenharmony_ci         progress |= impl_progress;
2937bf215546Sopenharmony_ci         if (impl_progress)
2938bf215546Sopenharmony_ci            nir_metadata_preserve(f->impl, nir_metadata_block_index | nir_metadata_dominance);
2939bf215546Sopenharmony_ci         else
2940bf215546Sopenharmony_ci            nir_metadata_preserve(f->impl, nir_metadata_all);
2941bf215546Sopenharmony_ci      }
2942bf215546Sopenharmony_ci   }
2943bf215546Sopenharmony_ci
2944bf215546Sopenharmony_ci   return progress;
2945bf215546Sopenharmony_ci}
2946bf215546Sopenharmony_ci
2947bf215546Sopenharmony_cistatic bool
2948bf215546Sopenharmony_cinir_lower_color_inputs(nir_shader *nir)
2949bf215546Sopenharmony_ci{
2950bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2951bf215546Sopenharmony_ci   bool progress = false;
2952bf215546Sopenharmony_ci
2953bf215546Sopenharmony_ci   nir_builder b;
2954bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
2955bf215546Sopenharmony_ci
2956bf215546Sopenharmony_ci   nir_foreach_block (block, impl) {
2957bf215546Sopenharmony_ci      nir_foreach_instr_safe (instr, block) {
2958bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
2959bf215546Sopenharmony_ci            continue;
2960bf215546Sopenharmony_ci
2961bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2962bf215546Sopenharmony_ci
2963bf215546Sopenharmony_ci         if (intrin->intrinsic != nir_intrinsic_load_deref)
2964bf215546Sopenharmony_ci            continue;
2965bf215546Sopenharmony_ci
2966bf215546Sopenharmony_ci         nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2967bf215546Sopenharmony_ci         if (!nir_deref_mode_is(deref, nir_var_shader_in))
2968bf215546Sopenharmony_ci            continue;
2969bf215546Sopenharmony_ci
2970bf215546Sopenharmony_ci         b.cursor = nir_before_instr(instr);
2971bf215546Sopenharmony_ci         nir_variable *var = nir_deref_instr_get_variable(deref);
2972bf215546Sopenharmony_ci         nir_ssa_def *def;
2973bf215546Sopenharmony_ci
2974bf215546Sopenharmony_ci         if (var->data.location == VARYING_SLOT_COL0) {
2975bf215546Sopenharmony_ci            def = nir_load_color0(&b);
2976bf215546Sopenharmony_ci            nir->info.fs.color0_interp = var->data.interpolation;
2977bf215546Sopenharmony_ci            nir->info.fs.color0_sample = var->data.sample;
2978bf215546Sopenharmony_ci            nir->info.fs.color0_centroid = var->data.centroid;
2979bf215546Sopenharmony_ci         } else if (var->data.location == VARYING_SLOT_COL1) {
2980bf215546Sopenharmony_ci            def = nir_load_color1(&b);
2981bf215546Sopenharmony_ci            nir->info.fs.color1_interp = var->data.interpolation;
2982bf215546Sopenharmony_ci            nir->info.fs.color1_sample = var->data.sample;
2983bf215546Sopenharmony_ci            nir->info.fs.color1_centroid = var->data.centroid;
2984bf215546Sopenharmony_ci         } else {
2985bf215546Sopenharmony_ci            continue;
2986bf215546Sopenharmony_ci         }
2987bf215546Sopenharmony_ci
2988bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
2989bf215546Sopenharmony_ci         nir_instr_remove(instr);
2990bf215546Sopenharmony_ci         progress = true;
2991bf215546Sopenharmony_ci      }
2992bf215546Sopenharmony_ci   }
2993bf215546Sopenharmony_ci
2994bf215546Sopenharmony_ci   if (progress) {
2995bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_dominance |
2996bf215546Sopenharmony_ci                                  nir_metadata_block_index);
2997bf215546Sopenharmony_ci   } else {
2998bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
2999bf215546Sopenharmony_ci   }
3000bf215546Sopenharmony_ci   return progress;
3001bf215546Sopenharmony_ci}
3002bf215546Sopenharmony_ci
3003bf215546Sopenharmony_cibool
3004bf215546Sopenharmony_cinir_io_add_intrinsic_xfb_info(nir_shader *nir)
3005bf215546Sopenharmony_ci{
3006bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3007bf215546Sopenharmony_ci   bool progress = false;
3008bf215546Sopenharmony_ci
3009bf215546Sopenharmony_ci   for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
3010bf215546Sopenharmony_ci      nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4;
3011bf215546Sopenharmony_ci
3012bf215546Sopenharmony_ci   nir_foreach_block (block, impl) {
3013bf215546Sopenharmony_ci      nir_foreach_instr_safe (instr, block) {
3014bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
3015bf215546Sopenharmony_ci            continue;
3016bf215546Sopenharmony_ci
3017bf215546Sopenharmony_ci         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3018bf215546Sopenharmony_ci
3019bf215546Sopenharmony_ci         if (!nir_intrinsic_has_io_xfb(intr))
3020bf215546Sopenharmony_ci            continue;
3021bf215546Sopenharmony_ci
3022bf215546Sopenharmony_ci         /* No indirect indexing allowed. The index is implied to be 0. */
3023bf215546Sopenharmony_ci         ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
3024bf215546Sopenharmony_ci         assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
3025bf215546Sopenharmony_ci
3026bf215546Sopenharmony_ci         /* Calling this pass for the second time shouldn't do anything. */
3027bf215546Sopenharmony_ci         if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
3028bf215546Sopenharmony_ci             nir_intrinsic_io_xfb(intr).out[1].num_components ||
3029bf215546Sopenharmony_ci             nir_intrinsic_io_xfb2(intr).out[0].num_components ||
3030bf215546Sopenharmony_ci             nir_intrinsic_io_xfb2(intr).out[1].num_components)
3031bf215546Sopenharmony_ci            continue;
3032bf215546Sopenharmony_ci
3033bf215546Sopenharmony_ci         nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3034bf215546Sopenharmony_ci         unsigned writemask = nir_intrinsic_write_mask(intr) <<
3035bf215546Sopenharmony_ci                            nir_intrinsic_component(intr);
3036bf215546Sopenharmony_ci
3037bf215546Sopenharmony_ci         nir_io_xfb xfb[2];
3038bf215546Sopenharmony_ci         memset(xfb, 0, sizeof(xfb));
3039bf215546Sopenharmony_ci
3040bf215546Sopenharmony_ci         for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
3041bf215546Sopenharmony_ci            nir_xfb_output_info *out = &nir->xfb_info->outputs[i];
3042bf215546Sopenharmony_ci            if (out->location == sem.location) {
3043bf215546Sopenharmony_ci               unsigned xfb_mask = writemask & out->component_mask;
3044bf215546Sopenharmony_ci
3045bf215546Sopenharmony_ci               /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
3046bf215546Sopenharmony_ci                           "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
3047bf215546Sopenharmony_ci                       i, out->buffer,
3048bf215546Sopenharmony_ci                       out->offset,
3049bf215546Sopenharmony_ci                       out->location,
3050bf215546Sopenharmony_ci                       out->component_offset,
3051bf215546Sopenharmony_ci                       out->component_mask,
3052bf215546Sopenharmony_ci                       xfb_mask, sem.num_slots);*/
3053bf215546Sopenharmony_ci
3054bf215546Sopenharmony_ci               while (xfb_mask) {
3055bf215546Sopenharmony_ci                  int start, count;
3056bf215546Sopenharmony_ci                  u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
3057bf215546Sopenharmony_ci
3058bf215546Sopenharmony_ci                  xfb[start / 2].out[start % 2].num_components = count;
3059bf215546Sopenharmony_ci                  xfb[start / 2].out[start % 2].buffer = out->buffer;
3060bf215546Sopenharmony_ci                  /* out->offset is relative to the first stored xfb component */
3061bf215546Sopenharmony_ci                  /* start is relative to component 0 */
3062bf215546Sopenharmony_ci                  xfb[start / 2].out[start % 2].offset =
3063bf215546Sopenharmony_ci                     out->offset / 4 - out->component_offset + start;
3064bf215546Sopenharmony_ci
3065bf215546Sopenharmony_ci                  progress = true;
3066bf215546Sopenharmony_ci               }
3067bf215546Sopenharmony_ci            }
3068bf215546Sopenharmony_ci         }
3069bf215546Sopenharmony_ci
3070bf215546Sopenharmony_ci         nir_intrinsic_set_io_xfb(intr, xfb[0]);
3071bf215546Sopenharmony_ci         nir_intrinsic_set_io_xfb2(intr, xfb[1]);
3072bf215546Sopenharmony_ci      }
3073bf215546Sopenharmony_ci   }
3074bf215546Sopenharmony_ci
3075bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_all);
3076bf215546Sopenharmony_ci   return progress;
3077bf215546Sopenharmony_ci}
3078bf215546Sopenharmony_ci
3079bf215546Sopenharmony_cistatic int
3080bf215546Sopenharmony_citype_size_vec4(const struct glsl_type *type, bool bindless)
3081bf215546Sopenharmony_ci{
3082bf215546Sopenharmony_ci   return glsl_count_attribute_slots(type, false);
3083bf215546Sopenharmony_ci}
3084bf215546Sopenharmony_ci
3085bf215546Sopenharmony_civoid
3086bf215546Sopenharmony_cinir_lower_io_passes(nir_shader *nir)
3087bf215546Sopenharmony_ci{
3088bf215546Sopenharmony_ci   if (!nir->options->lower_io_variables)
3089bf215546Sopenharmony_ci      return;
3090bf215546Sopenharmony_ci
3091bf215546Sopenharmony_ci   bool has_indirect_inputs =
3092bf215546Sopenharmony_ci      (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
3093bf215546Sopenharmony_ci
3094bf215546Sopenharmony_ci   /* Transform feedback requires that indirect outputs are lowered. */
3095bf215546Sopenharmony_ci   bool has_indirect_outputs =
3096bf215546Sopenharmony_ci      (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 &&
3097bf215546Sopenharmony_ci      nir->xfb_info == NULL;
3098bf215546Sopenharmony_ci
3099bf215546Sopenharmony_ci   if (!has_indirect_inputs || !has_indirect_outputs) {
3100bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3101bf215546Sopenharmony_ci                 nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3102bf215546Sopenharmony_ci                 !has_indirect_inputs);
3103bf215546Sopenharmony_ci
3104bf215546Sopenharmony_ci      /* We need to lower all the copy_deref's introduced by lower_io_to-
3105bf215546Sopenharmony_ci       * _temporaries before calling nir_lower_io.
3106bf215546Sopenharmony_ci       */
3107bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_split_var_copies);
3108bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_var_copies);
3109bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3110bf215546Sopenharmony_ci   }
3111bf215546Sopenharmony_ci
3112bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_FRAGMENT &&
3113bf215546Sopenharmony_ci       nir->options->lower_fs_color_inputs)
3114bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_color_inputs);
3115bf215546Sopenharmony_ci
3116bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3117bf215546Sopenharmony_ci              type_size_vec4, nir_lower_io_lower_64bit_to_32);
3118bf215546Sopenharmony_ci
3119bf215546Sopenharmony_ci   /* nir_io_add_const_offset_to_base needs actual constants. */
3120bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_constant_folding);
3121bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3122bf215546Sopenharmony_ci                                                    nir_var_shader_out);
3123bf215546Sopenharmony_ci
3124bf215546Sopenharmony_ci   /* Lower and remove dead derefs and variables to clean up the IR. */
3125bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3126bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_dce);
3127bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp |
3128bf215546Sopenharmony_ci              nir_var_shader_in | nir_var_shader_out, NULL);
3129bf215546Sopenharmony_ci
3130bf215546Sopenharmony_ci   if (nir->xfb_info)
3131bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
3132bf215546Sopenharmony_ci
3133bf215546Sopenharmony_ci   nir->info.io_lowered = true;
3134bf215546Sopenharmony_ci}
3135