1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © Microsoft Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "dxil_nir.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "nir_builder.h"
27bf215546Sopenharmony_ci#include "nir_deref.h"
28bf215546Sopenharmony_ci#include "nir_to_dxil.h"
29bf215546Sopenharmony_ci#include "util/u_math.h"
30bf215546Sopenharmony_ci#include "vulkan/vulkan_core.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistatic void
33bf215546Sopenharmony_cicl_type_size_align(const struct glsl_type *type, unsigned *size,
34bf215546Sopenharmony_ci                   unsigned *align)
35bf215546Sopenharmony_ci{
36bf215546Sopenharmony_ci   *size = glsl_get_cl_size(type);
37bf215546Sopenharmony_ci   *align = glsl_get_cl_alignment(type);
38bf215546Sopenharmony_ci}
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic void
41bf215546Sopenharmony_ciextract_comps_from_vec32(nir_builder *b, nir_ssa_def *vec32,
42bf215546Sopenharmony_ci                         unsigned dst_bit_size,
43bf215546Sopenharmony_ci                         nir_ssa_def **dst_comps,
44bf215546Sopenharmony_ci                         unsigned num_dst_comps)
45bf215546Sopenharmony_ci{
46bf215546Sopenharmony_ci   unsigned step = DIV_ROUND_UP(dst_bit_size, 32);
47bf215546Sopenharmony_ci   unsigned comps_per32b = 32 / dst_bit_size;
48bf215546Sopenharmony_ci   nir_ssa_def *tmp;
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci   for (unsigned i = 0; i < vec32->num_components; i += step) {
51bf215546Sopenharmony_ci      switch (dst_bit_size) {
52bf215546Sopenharmony_ci      case 64:
53bf215546Sopenharmony_ci         tmp = nir_pack_64_2x32_split(b, nir_channel(b, vec32, i),
54bf215546Sopenharmony_ci                                         nir_channel(b, vec32, i + 1));
55bf215546Sopenharmony_ci         dst_comps[i / 2] = tmp;
56bf215546Sopenharmony_ci         break;
57bf215546Sopenharmony_ci      case 32:
58bf215546Sopenharmony_ci         dst_comps[i] = nir_channel(b, vec32, i);
59bf215546Sopenharmony_ci         break;
60bf215546Sopenharmony_ci      case 16:
61bf215546Sopenharmony_ci      case 8: {
62bf215546Sopenharmony_ci         unsigned dst_offs = i * comps_per32b;
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci         tmp = nir_unpack_bits(b, nir_channel(b, vec32, i), dst_bit_size);
65bf215546Sopenharmony_ci         for (unsigned j = 0; j < comps_per32b && dst_offs + j < num_dst_comps; j++)
66bf215546Sopenharmony_ci            dst_comps[dst_offs + j] = nir_channel(b, tmp, j);
67bf215546Sopenharmony_ci         }
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci         break;
70bf215546Sopenharmony_ci      }
71bf215546Sopenharmony_ci   }
72bf215546Sopenharmony_ci}
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_cistatic nir_ssa_def *
75bf215546Sopenharmony_ciload_comps_to_vec32(nir_builder *b, unsigned src_bit_size,
76bf215546Sopenharmony_ci                    nir_ssa_def **src_comps, unsigned num_src_comps)
77bf215546Sopenharmony_ci{
78bf215546Sopenharmony_ci   unsigned num_vec32comps = DIV_ROUND_UP(num_src_comps * src_bit_size, 32);
79bf215546Sopenharmony_ci   unsigned step = DIV_ROUND_UP(src_bit_size, 32);
80bf215546Sopenharmony_ci   unsigned comps_per32b = 32 / src_bit_size;
81bf215546Sopenharmony_ci   nir_ssa_def *vec32comps[4];
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_vec32comps; i += step) {
84bf215546Sopenharmony_ci      switch (src_bit_size) {
85bf215546Sopenharmony_ci      case 64:
86bf215546Sopenharmony_ci         vec32comps[i] = nir_unpack_64_2x32_split_x(b, src_comps[i / 2]);
87bf215546Sopenharmony_ci         vec32comps[i + 1] = nir_unpack_64_2x32_split_y(b, src_comps[i / 2]);
88bf215546Sopenharmony_ci         break;
89bf215546Sopenharmony_ci      case 32:
90bf215546Sopenharmony_ci         vec32comps[i] = src_comps[i];
91bf215546Sopenharmony_ci         break;
92bf215546Sopenharmony_ci      case 16:
93bf215546Sopenharmony_ci      case 8: {
94bf215546Sopenharmony_ci         unsigned src_offs = i * comps_per32b;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci         vec32comps[i] = nir_u2u32(b, src_comps[src_offs]);
97bf215546Sopenharmony_ci         for (unsigned j = 1; j < comps_per32b && src_offs + j < num_src_comps; j++) {
98bf215546Sopenharmony_ci            nir_ssa_def *tmp = nir_ishl(b, nir_u2u32(b, src_comps[src_offs + j]),
99bf215546Sopenharmony_ci                                           nir_imm_int(b, j * src_bit_size));
100bf215546Sopenharmony_ci            vec32comps[i] = nir_ior(b, vec32comps[i], tmp);
101bf215546Sopenharmony_ci         }
102bf215546Sopenharmony_ci         break;
103bf215546Sopenharmony_ci      }
104bf215546Sopenharmony_ci      }
105bf215546Sopenharmony_ci   }
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci   return nir_vec(b, vec32comps, num_vec32comps);
108bf215546Sopenharmony_ci}
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_cistatic nir_ssa_def *
111bf215546Sopenharmony_cibuild_load_ptr_dxil(nir_builder *b, nir_deref_instr *deref, nir_ssa_def *idx)
112bf215546Sopenharmony_ci{
113bf215546Sopenharmony_ci   return nir_load_ptr_dxil(b, 1, 32, &deref->dest.ssa, idx);
114bf215546Sopenharmony_ci}
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_cistatic bool
117bf215546Sopenharmony_cilower_load_deref(nir_builder *b, nir_intrinsic_instr *intr)
118bf215546Sopenharmony_ci{
119bf215546Sopenharmony_ci   assert(intr->dest.is_ssa);
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
124bf215546Sopenharmony_ci   if (!nir_deref_mode_is(deref, nir_var_shader_temp))
125bf215546Sopenharmony_ci      return false;
126bf215546Sopenharmony_ci   nir_ssa_def *ptr = nir_u2u32(b, nir_build_deref_offset(b, deref, cl_type_size_align));
127bf215546Sopenharmony_ci   nir_ssa_def *offset = nir_iand(b, ptr, nir_inot(b, nir_imm_int(b, 3)));
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci   assert(intr->dest.is_ssa);
130bf215546Sopenharmony_ci   unsigned num_components = nir_dest_num_components(intr->dest);
131bf215546Sopenharmony_ci   unsigned bit_size = nir_dest_bit_size(intr->dest);
132bf215546Sopenharmony_ci   unsigned load_size = MAX2(32, bit_size);
133bf215546Sopenharmony_ci   unsigned num_bits = num_components * bit_size;
134bf215546Sopenharmony_ci   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
135bf215546Sopenharmony_ci   unsigned comp_idx = 0;
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   nir_deref_path path;
138bf215546Sopenharmony_ci   nir_deref_path_init(&path, deref, NULL);
139bf215546Sopenharmony_ci   nir_ssa_def *base_idx = nir_ishr(b, offset, nir_imm_int(b, 2 /* log2(32 / 8) */));
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci   /* Split loads into 32-bit chunks */
142bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_bits; i += load_size) {
143bf215546Sopenharmony_ci      unsigned subload_num_bits = MIN2(num_bits - i, load_size);
144bf215546Sopenharmony_ci      nir_ssa_def *idx = nir_iadd(b, base_idx, nir_imm_int(b, i / 32));
145bf215546Sopenharmony_ci      nir_ssa_def *vec32 = build_load_ptr_dxil(b, path.path[0], idx);
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci      if (load_size == 64) {
148bf215546Sopenharmony_ci         idx = nir_iadd(b, idx, nir_imm_int(b, 1));
149bf215546Sopenharmony_ci         vec32 = nir_vec2(b, vec32,
150bf215546Sopenharmony_ci                             build_load_ptr_dxil(b, path.path[0], idx));
151bf215546Sopenharmony_ci      }
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci      /* If we have 2 bytes or less to load we need to adjust the u32 value so
154bf215546Sopenharmony_ci       * we can always extract the LSB.
155bf215546Sopenharmony_ci       */
156bf215546Sopenharmony_ci      if (subload_num_bits <= 16) {
157bf215546Sopenharmony_ci         nir_ssa_def *shift = nir_imul(b, nir_iand(b, ptr, nir_imm_int(b, 3)),
158bf215546Sopenharmony_ci                                          nir_imm_int(b, 8));
159bf215546Sopenharmony_ci         vec32 = nir_ushr(b, vec32, shift);
160bf215546Sopenharmony_ci      }
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci      /* And now comes the pack/unpack step to match the original type. */
163bf215546Sopenharmony_ci      extract_comps_from_vec32(b, vec32, bit_size, &comps[comp_idx],
164bf215546Sopenharmony_ci                               subload_num_bits / bit_size);
165bf215546Sopenharmony_ci      comp_idx += subload_num_bits / bit_size;
166bf215546Sopenharmony_ci   }
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   nir_deref_path_finish(&path);
169bf215546Sopenharmony_ci   assert(comp_idx == num_components);
170bf215546Sopenharmony_ci   nir_ssa_def *result = nir_vec(b, comps, num_components);
171bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
172bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
173bf215546Sopenharmony_ci   return true;
174bf215546Sopenharmony_ci}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_cistatic nir_ssa_def *
177bf215546Sopenharmony_ciubo_load_select_32b_comps(nir_builder *b, nir_ssa_def *vec32,
178bf215546Sopenharmony_ci                          nir_ssa_def *offset, unsigned num_bytes)
179bf215546Sopenharmony_ci{
180bf215546Sopenharmony_ci   assert(num_bytes == 16 || num_bytes == 12 || num_bytes == 8 ||
181bf215546Sopenharmony_ci          num_bytes == 4 || num_bytes == 3 || num_bytes == 2 ||
182bf215546Sopenharmony_ci          num_bytes == 1);
183bf215546Sopenharmony_ci   assert(vec32->num_components == 4);
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   /* 16 and 12 byte types are always aligned on 16 bytes. */
186bf215546Sopenharmony_ci   if (num_bytes > 8)
187bf215546Sopenharmony_ci      return vec32;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   nir_ssa_def *comps[4];
190bf215546Sopenharmony_ci   nir_ssa_def *cond;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   for (unsigned i = 0; i < 4; i++)
193bf215546Sopenharmony_ci      comps[i] = nir_channel(b, vec32, i);
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   /* If we have 8bytes or less to load, select which half the vec4 should
196bf215546Sopenharmony_ci    * be used.
197bf215546Sopenharmony_ci    */
198bf215546Sopenharmony_ci   cond = nir_ine(b, nir_iand(b, offset, nir_imm_int(b, 0x8)),
199bf215546Sopenharmony_ci                                 nir_imm_int(b, 0));
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   comps[0] = nir_bcsel(b, cond, comps[2], comps[0]);
202bf215546Sopenharmony_ci   comps[1] = nir_bcsel(b, cond, comps[3], comps[1]);
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci   /* Thanks to the CL alignment constraints, if we want 8 bytes we're done. */
205bf215546Sopenharmony_ci   if (num_bytes == 8)
206bf215546Sopenharmony_ci      return nir_vec(b, comps, 2);
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   /* 4 bytes or less needed, select which of the 32bit component should be
209bf215546Sopenharmony_ci    * used and return it. The sub-32bit split is handled in
210bf215546Sopenharmony_ci    * extract_comps_from_vec32().
211bf215546Sopenharmony_ci    */
212bf215546Sopenharmony_ci   cond = nir_ine(b, nir_iand(b, offset, nir_imm_int(b, 0x4)),
213bf215546Sopenharmony_ci                                 nir_imm_int(b, 0));
214bf215546Sopenharmony_ci   return nir_bcsel(b, cond, comps[1], comps[0]);
215bf215546Sopenharmony_ci}
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_cinir_ssa_def *
218bf215546Sopenharmony_cibuild_load_ubo_dxil(nir_builder *b, nir_ssa_def *buffer,
219bf215546Sopenharmony_ci                    nir_ssa_def *offset, unsigned num_components,
220bf215546Sopenharmony_ci                    unsigned bit_size)
221bf215546Sopenharmony_ci{
222bf215546Sopenharmony_ci   nir_ssa_def *idx = nir_ushr(b, offset, nir_imm_int(b, 4));
223bf215546Sopenharmony_ci   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
224bf215546Sopenharmony_ci   unsigned num_bits = num_components * bit_size;
225bf215546Sopenharmony_ci   unsigned comp_idx = 0;
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   /* We need to split loads in 16byte chunks because that's the
228bf215546Sopenharmony_ci    * granularity of cBufferLoadLegacy().
229bf215546Sopenharmony_ci    */
230bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_bits; i += (16 * 8)) {
231bf215546Sopenharmony_ci      /* For each 16byte chunk (or smaller) we generate a 32bit ubo vec
232bf215546Sopenharmony_ci       * load.
233bf215546Sopenharmony_ci       */
234bf215546Sopenharmony_ci      unsigned subload_num_bits = MIN2(num_bits - i, 16 * 8);
235bf215546Sopenharmony_ci      nir_ssa_def *vec32 =
236bf215546Sopenharmony_ci         nir_load_ubo_dxil(b, 4, 32, buffer, nir_iadd(b, idx, nir_imm_int(b, i / (16 * 8))));
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci      /* First re-arrange the vec32 to account for intra 16-byte offset. */
239bf215546Sopenharmony_ci      vec32 = ubo_load_select_32b_comps(b, vec32, offset, subload_num_bits / 8);
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci      /* If we have 2 bytes or less to load we need to adjust the u32 value so
242bf215546Sopenharmony_ci       * we can always extract the LSB.
243bf215546Sopenharmony_ci       */
244bf215546Sopenharmony_ci      if (subload_num_bits <= 16) {
245bf215546Sopenharmony_ci         nir_ssa_def *shift = nir_imul(b, nir_iand(b, offset,
246bf215546Sopenharmony_ci                                                      nir_imm_int(b, 3)),
247bf215546Sopenharmony_ci                                          nir_imm_int(b, 8));
248bf215546Sopenharmony_ci         vec32 = nir_ushr(b, vec32, shift);
249bf215546Sopenharmony_ci      }
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci      /* And now comes the pack/unpack step to match the original type. */
252bf215546Sopenharmony_ci      extract_comps_from_vec32(b, vec32, bit_size, &comps[comp_idx],
253bf215546Sopenharmony_ci                               subload_num_bits / bit_size);
254bf215546Sopenharmony_ci      comp_idx += subload_num_bits / bit_size;
255bf215546Sopenharmony_ci   }
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ci   assert(comp_idx == num_components);
258bf215546Sopenharmony_ci   return nir_vec(b, comps, num_components);
259bf215546Sopenharmony_ci}
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_cistatic bool
262bf215546Sopenharmony_cilower_load_ssbo(nir_builder *b, nir_intrinsic_instr *intr)
263bf215546Sopenharmony_ci{
264bf215546Sopenharmony_ci   assert(intr->dest.is_ssa);
265bf215546Sopenharmony_ci   assert(intr->src[0].is_ssa);
266bf215546Sopenharmony_ci   assert(intr->src[1].is_ssa);
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci   nir_ssa_def *buffer = intr->src[0].ssa;
271bf215546Sopenharmony_ci   nir_ssa_def *offset = nir_iand(b, intr->src[1].ssa, nir_imm_int(b, ~3));
272bf215546Sopenharmony_ci   enum gl_access_qualifier access = nir_intrinsic_access(intr);
273bf215546Sopenharmony_ci   unsigned bit_size = nir_dest_bit_size(intr->dest);
274bf215546Sopenharmony_ci   unsigned num_components = nir_dest_num_components(intr->dest);
275bf215546Sopenharmony_ci   unsigned num_bits = num_components * bit_size;
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
278bf215546Sopenharmony_ci   unsigned comp_idx = 0;
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci   /* We need to split loads in 16byte chunks because that's the optimal
281bf215546Sopenharmony_ci    * granularity of bufferLoad(). Minimum alignment is 4byte, which saves
282bf215546Sopenharmony_ci    * from us from extra complexity to extract >= 32 bit components.
283bf215546Sopenharmony_ci    */
284bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_bits; i += 4 * 32) {
285bf215546Sopenharmony_ci      /* For each 16byte chunk (or smaller) we generate a 32bit ssbo vec
286bf215546Sopenharmony_ci       * load.
287bf215546Sopenharmony_ci       */
288bf215546Sopenharmony_ci      unsigned subload_num_bits = MIN2(num_bits - i, 4 * 32);
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci      /* The number of components to store depends on the number of bytes. */
291bf215546Sopenharmony_ci      nir_ssa_def *vec32 =
292bf215546Sopenharmony_ci         nir_load_ssbo(b, DIV_ROUND_UP(subload_num_bits, 32), 32,
293bf215546Sopenharmony_ci                       buffer, nir_iadd(b, offset, nir_imm_int(b, i / 8)),
294bf215546Sopenharmony_ci                       .align_mul = 4,
295bf215546Sopenharmony_ci                       .align_offset = 0,
296bf215546Sopenharmony_ci                       .access = access);
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci      /* If we have 2 bytes or less to load we need to adjust the u32 value so
299bf215546Sopenharmony_ci       * we can always extract the LSB.
300bf215546Sopenharmony_ci       */
301bf215546Sopenharmony_ci      if (subload_num_bits <= 16) {
302bf215546Sopenharmony_ci         nir_ssa_def *shift = nir_imul(b, nir_iand(b, intr->src[1].ssa, nir_imm_int(b, 3)),
303bf215546Sopenharmony_ci                                          nir_imm_int(b, 8));
304bf215546Sopenharmony_ci         vec32 = nir_ushr(b, vec32, shift);
305bf215546Sopenharmony_ci      }
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci      /* And now comes the pack/unpack step to match the original type. */
308bf215546Sopenharmony_ci      extract_comps_from_vec32(b, vec32, bit_size, &comps[comp_idx],
309bf215546Sopenharmony_ci                               subload_num_bits / bit_size);
310bf215546Sopenharmony_ci      comp_idx += subload_num_bits / bit_size;
311bf215546Sopenharmony_ci   }
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   assert(comp_idx == num_components);
314bf215546Sopenharmony_ci   nir_ssa_def *result = nir_vec(b, comps, num_components);
315bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
316bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
317bf215546Sopenharmony_ci   return true;
318bf215546Sopenharmony_ci}
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_cistatic bool
321bf215546Sopenharmony_cilower_store_ssbo(nir_builder *b, nir_intrinsic_instr *intr)
322bf215546Sopenharmony_ci{
323bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci   assert(intr->src[0].is_ssa);
326bf215546Sopenharmony_ci   assert(intr->src[1].is_ssa);
327bf215546Sopenharmony_ci   assert(intr->src[2].is_ssa);
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci   nir_ssa_def *val = intr->src[0].ssa;
330bf215546Sopenharmony_ci   nir_ssa_def *buffer = intr->src[1].ssa;
331bf215546Sopenharmony_ci   nir_ssa_def *offset = nir_iand(b, intr->src[2].ssa, nir_imm_int(b, ~3));
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci   unsigned bit_size = val->bit_size;
334bf215546Sopenharmony_ci   unsigned num_components = val->num_components;
335bf215546Sopenharmony_ci   unsigned num_bits = num_components * bit_size;
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
338bf215546Sopenharmony_ci   unsigned comp_idx = 0;
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   unsigned write_mask = nir_intrinsic_write_mask(intr);
341bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_components; i++)
342bf215546Sopenharmony_ci      if (write_mask & (1 << i))
343bf215546Sopenharmony_ci         comps[i] = nir_channel(b, val, i);
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci   /* We split stores in 16byte chunks because that's the optimal granularity
346bf215546Sopenharmony_ci    * of bufferStore(). Minimum alignment is 4byte, which saves from us from
347bf215546Sopenharmony_ci    * extra complexity to store >= 32 bit components.
348bf215546Sopenharmony_ci    */
349bf215546Sopenharmony_ci   unsigned bit_offset = 0;
350bf215546Sopenharmony_ci   while (true) {
351bf215546Sopenharmony_ci      /* Skip over holes in the write mask */
352bf215546Sopenharmony_ci      while (comp_idx < num_components && comps[comp_idx] == NULL) {
353bf215546Sopenharmony_ci         comp_idx++;
354bf215546Sopenharmony_ci         bit_offset += bit_size;
355bf215546Sopenharmony_ci      }
356bf215546Sopenharmony_ci      if (comp_idx >= num_components)
357bf215546Sopenharmony_ci         break;
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci      /* For each 16byte chunk (or smaller) we generate a 32bit ssbo vec
360bf215546Sopenharmony_ci       * store. If a component is skipped by the write mask, do a smaller
361bf215546Sopenharmony_ci       * sub-store
362bf215546Sopenharmony_ci       */
363bf215546Sopenharmony_ci      unsigned num_src_comps_stored = 0, substore_num_bits = 0;
364bf215546Sopenharmony_ci      while(num_src_comps_stored + comp_idx < num_components &&
365bf215546Sopenharmony_ci            substore_num_bits + bit_offset < num_bits &&
366bf215546Sopenharmony_ci            substore_num_bits < 4 * 32 &&
367bf215546Sopenharmony_ci            comps[comp_idx + num_src_comps_stored]) {
368bf215546Sopenharmony_ci         ++num_src_comps_stored;
369bf215546Sopenharmony_ci         substore_num_bits += bit_size;
370bf215546Sopenharmony_ci      }
371bf215546Sopenharmony_ci      nir_ssa_def *local_offset = nir_iadd(b, offset, nir_imm_int(b, bit_offset / 8));
372bf215546Sopenharmony_ci      nir_ssa_def *vec32 = load_comps_to_vec32(b, bit_size, &comps[comp_idx],
373bf215546Sopenharmony_ci                                               num_src_comps_stored);
374bf215546Sopenharmony_ci      nir_intrinsic_instr *store;
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci      if (substore_num_bits < 32) {
377bf215546Sopenharmony_ci         nir_ssa_def *mask = nir_imm_int(b, (1 << substore_num_bits) - 1);
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci        /* If we have 16 bits or less to store we need to place them
380bf215546Sopenharmony_ci         * correctly in the u32 component. Anything greater than 16 bits
381bf215546Sopenharmony_ci         * (including uchar3) is naturally aligned on 32bits.
382bf215546Sopenharmony_ci         */
383bf215546Sopenharmony_ci         if (substore_num_bits <= 16) {
384bf215546Sopenharmony_ci            nir_ssa_def *pos = nir_iand(b, intr->src[2].ssa, nir_imm_int(b, 3));
385bf215546Sopenharmony_ci            nir_ssa_def *shift = nir_imul_imm(b, pos, 8);
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci            vec32 = nir_ishl(b, vec32, shift);
388bf215546Sopenharmony_ci            mask = nir_ishl(b, mask, shift);
389bf215546Sopenharmony_ci         }
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci         store = nir_intrinsic_instr_create(b->shader,
392bf215546Sopenharmony_ci                                            nir_intrinsic_store_ssbo_masked_dxil);
393bf215546Sopenharmony_ci         store->src[0] = nir_src_for_ssa(vec32);
394bf215546Sopenharmony_ci         store->src[1] = nir_src_for_ssa(nir_inot(b, mask));
395bf215546Sopenharmony_ci         store->src[2] = nir_src_for_ssa(buffer);
396bf215546Sopenharmony_ci         store->src[3] = nir_src_for_ssa(local_offset);
397bf215546Sopenharmony_ci      } else {
398bf215546Sopenharmony_ci         store = nir_intrinsic_instr_create(b->shader,
399bf215546Sopenharmony_ci                                            nir_intrinsic_store_ssbo);
400bf215546Sopenharmony_ci         store->src[0] = nir_src_for_ssa(vec32);
401bf215546Sopenharmony_ci         store->src[1] = nir_src_for_ssa(buffer);
402bf215546Sopenharmony_ci         store->src[2] = nir_src_for_ssa(local_offset);
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci         nir_intrinsic_set_align(store, 4, 0);
405bf215546Sopenharmony_ci      }
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci      /* The number of components to store depends on the number of bits. */
408bf215546Sopenharmony_ci      store->num_components = DIV_ROUND_UP(substore_num_bits, 32);
409bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &store->instr);
410bf215546Sopenharmony_ci      comp_idx += num_src_comps_stored;
411bf215546Sopenharmony_ci      bit_offset += substore_num_bits;
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci      if (nir_intrinsic_has_write_mask(store))
414bf215546Sopenharmony_ci         nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
415bf215546Sopenharmony_ci   }
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
418bf215546Sopenharmony_ci   return true;
419bf215546Sopenharmony_ci}
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_cistatic void
422bf215546Sopenharmony_cilower_load_vec32(nir_builder *b, nir_ssa_def *index, unsigned num_comps, nir_ssa_def **comps, nir_intrinsic_op op)
423bf215546Sopenharmony_ci{
424bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_comps; i++) {
425bf215546Sopenharmony_ci      nir_intrinsic_instr *load =
426bf215546Sopenharmony_ci         nir_intrinsic_instr_create(b->shader, op);
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci      load->num_components = 1;
429bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(nir_iadd(b, index, nir_imm_int(b, i)));
430bf215546Sopenharmony_ci      nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
431bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &load->instr);
432bf215546Sopenharmony_ci      comps[i] = &load->dest.ssa;
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_cistatic bool
437bf215546Sopenharmony_cilower_32b_offset_load(nir_builder *b, nir_intrinsic_instr *intr)
438bf215546Sopenharmony_ci{
439bf215546Sopenharmony_ci   assert(intr->dest.is_ssa);
440bf215546Sopenharmony_ci   unsigned bit_size = nir_dest_bit_size(intr->dest);
441bf215546Sopenharmony_ci   unsigned num_components = nir_dest_num_components(intr->dest);
442bf215546Sopenharmony_ci   unsigned num_bits = num_components * bit_size;
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
445bf215546Sopenharmony_ci   nir_intrinsic_op op = intr->intrinsic;
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci   assert(intr->src[0].is_ssa);
448bf215546Sopenharmony_ci   nir_ssa_def *offset = intr->src[0].ssa;
449bf215546Sopenharmony_ci   if (op == nir_intrinsic_load_shared) {
450bf215546Sopenharmony_ci      offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_base(intr)));
451bf215546Sopenharmony_ci      op = nir_intrinsic_load_shared_dxil;
452bf215546Sopenharmony_ci   } else {
453bf215546Sopenharmony_ci      offset = nir_u2u32(b, offset);
454bf215546Sopenharmony_ci      op = nir_intrinsic_load_scratch_dxil;
455bf215546Sopenharmony_ci   }
456bf215546Sopenharmony_ci   nir_ssa_def *index = nir_ushr(b, offset, nir_imm_int(b, 2));
457bf215546Sopenharmony_ci   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
458bf215546Sopenharmony_ci   nir_ssa_def *comps_32bit[NIR_MAX_VEC_COMPONENTS * 2];
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci   /* We need to split loads in 32-bit accesses because the buffer
461bf215546Sopenharmony_ci    * is an i32 array and DXIL does not support type casts.
462bf215546Sopenharmony_ci    */
463bf215546Sopenharmony_ci   unsigned num_32bit_comps = DIV_ROUND_UP(num_bits, 32);
464bf215546Sopenharmony_ci   lower_load_vec32(b, index, num_32bit_comps, comps_32bit, op);
465bf215546Sopenharmony_ci   unsigned num_comps_per_pass = MIN2(num_32bit_comps, 4);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_32bit_comps; i += num_comps_per_pass) {
468bf215546Sopenharmony_ci      unsigned num_vec32_comps = MIN2(num_32bit_comps - i, 4);
469bf215546Sopenharmony_ci      unsigned num_dest_comps = num_vec32_comps * 32 / bit_size;
470bf215546Sopenharmony_ci      nir_ssa_def *vec32 = nir_vec(b, &comps_32bit[i], num_vec32_comps);
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci      /* If we have 16 bits or less to load we need to adjust the u32 value so
473bf215546Sopenharmony_ci       * we can always extract the LSB.
474bf215546Sopenharmony_ci       */
475bf215546Sopenharmony_ci      if (num_bits <= 16) {
476bf215546Sopenharmony_ci         nir_ssa_def *shift =
477bf215546Sopenharmony_ci            nir_imul(b, nir_iand(b, offset, nir_imm_int(b, 3)),
478bf215546Sopenharmony_ci                        nir_imm_int(b, 8));
479bf215546Sopenharmony_ci         vec32 = nir_ushr(b, vec32, shift);
480bf215546Sopenharmony_ci      }
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci      /* And now comes the pack/unpack step to match the original type. */
483bf215546Sopenharmony_ci      unsigned dest_index = i * 32 / bit_size;
484bf215546Sopenharmony_ci      extract_comps_from_vec32(b, vec32, bit_size, &comps[dest_index], num_dest_comps);
485bf215546Sopenharmony_ci   }
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   nir_ssa_def *result = nir_vec(b, comps, num_components);
488bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
489bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci   return true;
492bf215546Sopenharmony_ci}
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_cistatic void
495bf215546Sopenharmony_cilower_store_vec32(nir_builder *b, nir_ssa_def *index, nir_ssa_def *vec32, nir_intrinsic_op op)
496bf215546Sopenharmony_ci{
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   for (unsigned i = 0; i < vec32->num_components; i++) {
499bf215546Sopenharmony_ci      nir_intrinsic_instr *store =
500bf215546Sopenharmony_ci         nir_intrinsic_instr_create(b->shader, op);
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci      store->src[0] = nir_src_for_ssa(nir_channel(b, vec32, i));
503bf215546Sopenharmony_ci      store->src[1] = nir_src_for_ssa(nir_iadd(b, index, nir_imm_int(b, i)));
504bf215546Sopenharmony_ci      store->num_components = 1;
505bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &store->instr);
506bf215546Sopenharmony_ci   }
507bf215546Sopenharmony_ci}
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_cistatic void
510bf215546Sopenharmony_cilower_masked_store_vec32(nir_builder *b, nir_ssa_def *offset, nir_ssa_def *index,
511bf215546Sopenharmony_ci                         nir_ssa_def *vec32, unsigned num_bits, nir_intrinsic_op op)
512bf215546Sopenharmony_ci{
513bf215546Sopenharmony_ci   nir_ssa_def *mask = nir_imm_int(b, (1 << num_bits) - 1);
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci   /* If we have 16 bits or less to store we need to place them correctly in
516bf215546Sopenharmony_ci    * the u32 component. Anything greater than 16 bits (including uchar3) is
517bf215546Sopenharmony_ci    * naturally aligned on 32bits.
518bf215546Sopenharmony_ci    */
519bf215546Sopenharmony_ci   if (num_bits <= 16) {
520bf215546Sopenharmony_ci      nir_ssa_def *shift =
521bf215546Sopenharmony_ci         nir_imul_imm(b, nir_iand(b, offset, nir_imm_int(b, 3)), 8);
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci      vec32 = nir_ishl(b, vec32, shift);
524bf215546Sopenharmony_ci      mask = nir_ishl(b, mask, shift);
525bf215546Sopenharmony_ci   }
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci   if (op == nir_intrinsic_store_shared_dxil) {
528bf215546Sopenharmony_ci      /* Use the dedicated masked intrinsic */
529bf215546Sopenharmony_ci      nir_store_shared_masked_dxil(b, vec32, nir_inot(b, mask), index);
530bf215546Sopenharmony_ci   } else {
531bf215546Sopenharmony_ci      /* For scratch, since we don't need atomics, just generate the read-modify-write in NIR */
532bf215546Sopenharmony_ci      nir_ssa_def *load = nir_load_scratch_dxil(b, 1, 32, index);
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci      nir_ssa_def *new_val = nir_ior(b, vec32,
535bf215546Sopenharmony_ci                                     nir_iand(b,
536bf215546Sopenharmony_ci                                              nir_inot(b, mask),
537bf215546Sopenharmony_ci                                              load));
538bf215546Sopenharmony_ci
539bf215546Sopenharmony_ci      lower_store_vec32(b, index, new_val, op);
540bf215546Sopenharmony_ci   }
541bf215546Sopenharmony_ci}
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_cistatic bool
544bf215546Sopenharmony_cilower_32b_offset_store(nir_builder *b, nir_intrinsic_instr *intr)
545bf215546Sopenharmony_ci{
546bf215546Sopenharmony_ci   assert(intr->src[0].is_ssa);
547bf215546Sopenharmony_ci   unsigned num_components = nir_src_num_components(intr->src[0]);
548bf215546Sopenharmony_ci   unsigned bit_size = nir_src_bit_size(intr->src[0]);
549bf215546Sopenharmony_ci   unsigned num_bits = num_components * bit_size;
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
552bf215546Sopenharmony_ci   nir_intrinsic_op op = intr->intrinsic;
553bf215546Sopenharmony_ci
554bf215546Sopenharmony_ci   nir_ssa_def *offset = intr->src[1].ssa;
555bf215546Sopenharmony_ci   if (op == nir_intrinsic_store_shared) {
556bf215546Sopenharmony_ci      offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_base(intr)));
557bf215546Sopenharmony_ci      op = nir_intrinsic_store_shared_dxil;
558bf215546Sopenharmony_ci   } else {
559bf215546Sopenharmony_ci      offset = nir_u2u32(b, offset);
560bf215546Sopenharmony_ci      op = nir_intrinsic_store_scratch_dxil;
561bf215546Sopenharmony_ci   }
562bf215546Sopenharmony_ci   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci   unsigned comp_idx = 0;
565bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_components; i++)
566bf215546Sopenharmony_ci      comps[i] = nir_channel(b, intr->src[0].ssa, i);
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_bits; i += 4 * 32) {
569bf215546Sopenharmony_ci      /* For each 4byte chunk (or smaller) we generate a 32bit scalar store.
570bf215546Sopenharmony_ci       */
571bf215546Sopenharmony_ci      unsigned substore_num_bits = MIN2(num_bits - i, 4 * 32);
572bf215546Sopenharmony_ci      nir_ssa_def *local_offset = nir_iadd(b, offset, nir_imm_int(b, i / 8));
573bf215546Sopenharmony_ci      nir_ssa_def *vec32 = load_comps_to_vec32(b, bit_size, &comps[comp_idx],
574bf215546Sopenharmony_ci                                               substore_num_bits / bit_size);
575bf215546Sopenharmony_ci      nir_ssa_def *index = nir_ushr(b, local_offset, nir_imm_int(b, 2));
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci      /* For anything less than 32bits we need to use the masked version of the
578bf215546Sopenharmony_ci       * intrinsic to preserve data living in the same 32bit slot.
579bf215546Sopenharmony_ci       */
580bf215546Sopenharmony_ci      if (num_bits < 32) {
581bf215546Sopenharmony_ci         lower_masked_store_vec32(b, local_offset, index, vec32, num_bits, op);
582bf215546Sopenharmony_ci      } else {
583bf215546Sopenharmony_ci         lower_store_vec32(b, index, vec32, op);
584bf215546Sopenharmony_ci      }
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci      comp_idx += substore_num_bits / bit_size;
587bf215546Sopenharmony_ci   }
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci   return true;
592bf215546Sopenharmony_ci}
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_cistatic void
595bf215546Sopenharmony_ciubo_to_temp_patch_deref_mode(nir_deref_instr *deref)
596bf215546Sopenharmony_ci{
597bf215546Sopenharmony_ci   deref->modes = nir_var_shader_temp;
598bf215546Sopenharmony_ci   nir_foreach_use(use_src, &deref->dest.ssa) {
599bf215546Sopenharmony_ci      if (use_src->parent_instr->type != nir_instr_type_deref)
600bf215546Sopenharmony_ci         continue;
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci      nir_deref_instr *parent = nir_instr_as_deref(use_src->parent_instr);
603bf215546Sopenharmony_ci      ubo_to_temp_patch_deref_mode(parent);
604bf215546Sopenharmony_ci   }
605bf215546Sopenharmony_ci}
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_cistatic void
608bf215546Sopenharmony_ciubo_to_temp_update_entry(nir_deref_instr *deref, struct hash_entry *he)
609bf215546Sopenharmony_ci{
610bf215546Sopenharmony_ci   assert(nir_deref_mode_is(deref, nir_var_mem_constant));
611bf215546Sopenharmony_ci   assert(deref->dest.is_ssa);
612bf215546Sopenharmony_ci   assert(he->data);
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci   nir_foreach_use(use_src, &deref->dest.ssa) {
615bf215546Sopenharmony_ci      if (use_src->parent_instr->type == nir_instr_type_deref) {
616bf215546Sopenharmony_ci         ubo_to_temp_update_entry(nir_instr_as_deref(use_src->parent_instr), he);
617bf215546Sopenharmony_ci      } else if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
618bf215546Sopenharmony_ci         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_src->parent_instr);
619bf215546Sopenharmony_ci         if (intr->intrinsic != nir_intrinsic_load_deref)
620bf215546Sopenharmony_ci            he->data = NULL;
621bf215546Sopenharmony_ci      } else {
622bf215546Sopenharmony_ci         he->data = NULL;
623bf215546Sopenharmony_ci      }
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci      if (!he->data)
626bf215546Sopenharmony_ci         break;
627bf215546Sopenharmony_ci   }
628bf215546Sopenharmony_ci}
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_cibool
631bf215546Sopenharmony_cidxil_nir_lower_ubo_to_temp(nir_shader *nir)
632bf215546Sopenharmony_ci{
633bf215546Sopenharmony_ci   struct hash_table *ubo_to_temp = _mesa_pointer_hash_table_create(NULL);
634bf215546Sopenharmony_ci   bool progress = false;
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   /* First pass: collect all UBO accesses that could be turned into
637bf215546Sopenharmony_ci    * shader temp accesses.
638bf215546Sopenharmony_ci    */
639bf215546Sopenharmony_ci   foreach_list_typed(nir_function, func, node, &nir->functions) {
640bf215546Sopenharmony_ci      if (!func->is_entrypoint)
641bf215546Sopenharmony_ci         continue;
642bf215546Sopenharmony_ci      assert(func->impl);
643bf215546Sopenharmony_ci
644bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
645bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
646bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_deref)
647bf215546Sopenharmony_ci               continue;
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci            nir_deref_instr *deref = nir_instr_as_deref(instr);
650bf215546Sopenharmony_ci            if (!nir_deref_mode_is(deref, nir_var_mem_constant) ||
651bf215546Sopenharmony_ci                deref->deref_type != nir_deref_type_var)
652bf215546Sopenharmony_ci                  continue;
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci            struct hash_entry *he =
655bf215546Sopenharmony_ci               _mesa_hash_table_search(ubo_to_temp, deref->var);
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci            if (!he)
658bf215546Sopenharmony_ci               he = _mesa_hash_table_insert(ubo_to_temp, deref->var, deref->var);
659bf215546Sopenharmony_ci
660bf215546Sopenharmony_ci            if (!he->data)
661bf215546Sopenharmony_ci               continue;
662bf215546Sopenharmony_ci
663bf215546Sopenharmony_ci            ubo_to_temp_update_entry(deref, he);
664bf215546Sopenharmony_ci         }
665bf215546Sopenharmony_ci      }
666bf215546Sopenharmony_ci   }
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci   hash_table_foreach(ubo_to_temp, he) {
669bf215546Sopenharmony_ci      nir_variable *var = he->data;
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_ci      if (!var)
672bf215546Sopenharmony_ci         continue;
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_ci      /* Change the variable mode. */
675bf215546Sopenharmony_ci      var->data.mode = nir_var_shader_temp;
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci      /* Make sure the variable has a name.
678bf215546Sopenharmony_ci       * DXIL variables must have names.
679bf215546Sopenharmony_ci       */
680bf215546Sopenharmony_ci      if (!var->name)
681bf215546Sopenharmony_ci         var->name = ralloc_asprintf(nir, "global_%d", exec_list_length(&nir->variables));
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci      progress = true;
684bf215546Sopenharmony_ci   }
685bf215546Sopenharmony_ci   _mesa_hash_table_destroy(ubo_to_temp, NULL);
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci   /* Second pass: patch all derefs that were accessing the converted UBOs
688bf215546Sopenharmony_ci    * variables.
689bf215546Sopenharmony_ci    */
690bf215546Sopenharmony_ci   foreach_list_typed(nir_function, func, node, &nir->functions) {
691bf215546Sopenharmony_ci      if (!func->is_entrypoint)
692bf215546Sopenharmony_ci         continue;
693bf215546Sopenharmony_ci      assert(func->impl);
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
696bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
697bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_deref)
698bf215546Sopenharmony_ci               continue;
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci            nir_deref_instr *deref = nir_instr_as_deref(instr);
701bf215546Sopenharmony_ci            if (nir_deref_mode_is(deref, nir_var_mem_constant) &&
702bf215546Sopenharmony_ci                deref->deref_type == nir_deref_type_var &&
703bf215546Sopenharmony_ci                deref->var->data.mode == nir_var_shader_temp)
704bf215546Sopenharmony_ci               ubo_to_temp_patch_deref_mode(deref);
705bf215546Sopenharmony_ci         }
706bf215546Sopenharmony_ci      }
707bf215546Sopenharmony_ci   }
708bf215546Sopenharmony_ci
709bf215546Sopenharmony_ci   return progress;
710bf215546Sopenharmony_ci}
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_cistatic bool
713bf215546Sopenharmony_cilower_load_ubo(nir_builder *b, nir_intrinsic_instr *intr)
714bf215546Sopenharmony_ci{
715bf215546Sopenharmony_ci   assert(intr->dest.is_ssa);
716bf215546Sopenharmony_ci   assert(intr->src[0].is_ssa);
717bf215546Sopenharmony_ci   assert(intr->src[1].is_ssa);
718bf215546Sopenharmony_ci
719bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   nir_ssa_def *result =
722bf215546Sopenharmony_ci      build_load_ubo_dxil(b, intr->src[0].ssa, intr->src[1].ssa,
723bf215546Sopenharmony_ci                             nir_dest_num_components(intr->dest),
724bf215546Sopenharmony_ci                             nir_dest_bit_size(intr->dest));
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
727bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
728bf215546Sopenharmony_ci   return true;
729bf215546Sopenharmony_ci}
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_cibool
732bf215546Sopenharmony_cidxil_nir_lower_loads_stores_to_dxil(nir_shader *nir)
733bf215546Sopenharmony_ci{
734bf215546Sopenharmony_ci   bool progress = false;
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci   foreach_list_typed(nir_function, func, node, &nir->functions) {
737bf215546Sopenharmony_ci      if (!func->is_entrypoint)
738bf215546Sopenharmony_ci         continue;
739bf215546Sopenharmony_ci      assert(func->impl);
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci      nir_builder b;
742bf215546Sopenharmony_ci      nir_builder_init(&b, func->impl);
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
745bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
746bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_intrinsic)
747bf215546Sopenharmony_ci               continue;
748bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci            switch (intr->intrinsic) {
751bf215546Sopenharmony_ci            case nir_intrinsic_load_deref:
752bf215546Sopenharmony_ci               progress |= lower_load_deref(&b, intr);
753bf215546Sopenharmony_ci               break;
754bf215546Sopenharmony_ci            case nir_intrinsic_load_shared:
755bf215546Sopenharmony_ci            case nir_intrinsic_load_scratch:
756bf215546Sopenharmony_ci               progress |= lower_32b_offset_load(&b, intr);
757bf215546Sopenharmony_ci               break;
758bf215546Sopenharmony_ci            case nir_intrinsic_load_ssbo:
759bf215546Sopenharmony_ci               progress |= lower_load_ssbo(&b, intr);
760bf215546Sopenharmony_ci               break;
761bf215546Sopenharmony_ci            case nir_intrinsic_load_ubo:
762bf215546Sopenharmony_ci               progress |= lower_load_ubo(&b, intr);
763bf215546Sopenharmony_ci               break;
764bf215546Sopenharmony_ci            case nir_intrinsic_store_shared:
765bf215546Sopenharmony_ci            case nir_intrinsic_store_scratch:
766bf215546Sopenharmony_ci               progress |= lower_32b_offset_store(&b, intr);
767bf215546Sopenharmony_ci               break;
768bf215546Sopenharmony_ci            case nir_intrinsic_store_ssbo:
769bf215546Sopenharmony_ci               progress |= lower_store_ssbo(&b, intr);
770bf215546Sopenharmony_ci               break;
771bf215546Sopenharmony_ci            default:
772bf215546Sopenharmony_ci               break;
773bf215546Sopenharmony_ci            }
774bf215546Sopenharmony_ci         }
775bf215546Sopenharmony_ci      }
776bf215546Sopenharmony_ci   }
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci   return progress;
779bf215546Sopenharmony_ci}
780bf215546Sopenharmony_ci
781bf215546Sopenharmony_cistatic bool
782bf215546Sopenharmony_cilower_shared_atomic(nir_builder *b, nir_intrinsic_instr *intr,
783bf215546Sopenharmony_ci                    nir_intrinsic_op dxil_op)
784bf215546Sopenharmony_ci{
785bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci   assert(intr->src[0].is_ssa);
788bf215546Sopenharmony_ci   nir_ssa_def *offset =
789bf215546Sopenharmony_ci      nir_iadd(b, intr->src[0].ssa, nir_imm_int(b, nir_intrinsic_base(intr)));
790bf215546Sopenharmony_ci   nir_ssa_def *index = nir_ushr(b, offset, nir_imm_int(b, 2));
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci   nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, dxil_op);
793bf215546Sopenharmony_ci   atomic->src[0] = nir_src_for_ssa(index);
794bf215546Sopenharmony_ci   assert(intr->src[1].is_ssa);
795bf215546Sopenharmony_ci   atomic->src[1] = nir_src_for_ssa(intr->src[1].ssa);
796bf215546Sopenharmony_ci   if (dxil_op == nir_intrinsic_shared_atomic_comp_swap_dxil) {
797bf215546Sopenharmony_ci      assert(intr->src[2].is_ssa);
798bf215546Sopenharmony_ci      atomic->src[2] = nir_src_for_ssa(intr->src[2].ssa);
799bf215546Sopenharmony_ci   }
800bf215546Sopenharmony_ci   atomic->num_components = 0;
801bf215546Sopenharmony_ci   nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &atomic->instr);
804bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, &atomic->dest.ssa);
805bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
806bf215546Sopenharmony_ci   return true;
807bf215546Sopenharmony_ci}
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_cibool
810bf215546Sopenharmony_cidxil_nir_lower_atomics_to_dxil(nir_shader *nir)
811bf215546Sopenharmony_ci{
812bf215546Sopenharmony_ci   bool progress = false;
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci   foreach_list_typed(nir_function, func, node, &nir->functions) {
815bf215546Sopenharmony_ci      if (!func->is_entrypoint)
816bf215546Sopenharmony_ci         continue;
817bf215546Sopenharmony_ci      assert(func->impl);
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci      nir_builder b;
820bf215546Sopenharmony_ci      nir_builder_init(&b, func->impl);
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
823bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
824bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_intrinsic)
825bf215546Sopenharmony_ci               continue;
826bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
827bf215546Sopenharmony_ci
828bf215546Sopenharmony_ci            switch (intr->intrinsic) {
829bf215546Sopenharmony_ci
830bf215546Sopenharmony_ci#define ATOMIC(op)                                                            \
831bf215546Sopenharmony_ci  case nir_intrinsic_shared_atomic_##op:                                     \
832bf215546Sopenharmony_ci     progress |= lower_shared_atomic(&b, intr,                                \
833bf215546Sopenharmony_ci                                     nir_intrinsic_shared_atomic_##op##_dxil); \
834bf215546Sopenharmony_ci     break
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_ci            ATOMIC(add);
837bf215546Sopenharmony_ci            ATOMIC(imin);
838bf215546Sopenharmony_ci            ATOMIC(umin);
839bf215546Sopenharmony_ci            ATOMIC(imax);
840bf215546Sopenharmony_ci            ATOMIC(umax);
841bf215546Sopenharmony_ci            ATOMIC(and);
842bf215546Sopenharmony_ci            ATOMIC(or);
843bf215546Sopenharmony_ci            ATOMIC(xor);
844bf215546Sopenharmony_ci            ATOMIC(exchange);
845bf215546Sopenharmony_ci            ATOMIC(comp_swap);
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci#undef ATOMIC
848bf215546Sopenharmony_ci            default:
849bf215546Sopenharmony_ci               break;
850bf215546Sopenharmony_ci            }
851bf215546Sopenharmony_ci         }
852bf215546Sopenharmony_ci      }
853bf215546Sopenharmony_ci   }
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci   return progress;
856bf215546Sopenharmony_ci}
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_cistatic bool
859bf215546Sopenharmony_cilower_deref_ssbo(nir_builder *b, nir_deref_instr *deref)
860bf215546Sopenharmony_ci{
861bf215546Sopenharmony_ci   assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
862bf215546Sopenharmony_ci   assert(deref->deref_type == nir_deref_type_var ||
863bf215546Sopenharmony_ci          deref->deref_type == nir_deref_type_cast);
864bf215546Sopenharmony_ci   nir_variable *var = deref->var;
865bf215546Sopenharmony_ci
866bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&deref->instr);
867bf215546Sopenharmony_ci
868bf215546Sopenharmony_ci   if (deref->deref_type == nir_deref_type_var) {
869bf215546Sopenharmony_ci      /* We turn all deref_var into deref_cast and build a pointer value based on
870bf215546Sopenharmony_ci       * the var binding which encodes the UAV id.
871bf215546Sopenharmony_ci       */
872bf215546Sopenharmony_ci      nir_ssa_def *ptr = nir_imm_int64(b, (uint64_t)var->data.binding << 32);
873bf215546Sopenharmony_ci      nir_deref_instr *deref_cast =
874bf215546Sopenharmony_ci         nir_build_deref_cast(b, ptr, nir_var_mem_ssbo, deref->type,
875bf215546Sopenharmony_ci                              glsl_get_explicit_stride(var->type));
876bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&deref->dest.ssa,
877bf215546Sopenharmony_ci                               &deref_cast->dest.ssa);
878bf215546Sopenharmony_ci      nir_instr_remove(&deref->instr);
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci      deref = deref_cast;
881bf215546Sopenharmony_ci      return true;
882bf215546Sopenharmony_ci   }
883bf215546Sopenharmony_ci   return false;
884bf215546Sopenharmony_ci}
885bf215546Sopenharmony_ci
886bf215546Sopenharmony_cibool
887bf215546Sopenharmony_cidxil_nir_lower_deref_ssbo(nir_shader *nir)
888bf215546Sopenharmony_ci{
889bf215546Sopenharmony_ci   bool progress = false;
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci   foreach_list_typed(nir_function, func, node, &nir->functions) {
892bf215546Sopenharmony_ci      if (!func->is_entrypoint)
893bf215546Sopenharmony_ci         continue;
894bf215546Sopenharmony_ci      assert(func->impl);
895bf215546Sopenharmony_ci
896bf215546Sopenharmony_ci      nir_builder b;
897bf215546Sopenharmony_ci      nir_builder_init(&b, func->impl);
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
900bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
901bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_deref)
902bf215546Sopenharmony_ci               continue;
903bf215546Sopenharmony_ci
904bf215546Sopenharmony_ci            nir_deref_instr *deref = nir_instr_as_deref(instr);
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci            if (!nir_deref_mode_is(deref, nir_var_mem_ssbo) ||
907bf215546Sopenharmony_ci                (deref->deref_type != nir_deref_type_var &&
908bf215546Sopenharmony_ci                 deref->deref_type != nir_deref_type_cast))
909bf215546Sopenharmony_ci               continue;
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci            progress |= lower_deref_ssbo(&b, deref);
912bf215546Sopenharmony_ci         }
913bf215546Sopenharmony_ci      }
914bf215546Sopenharmony_ci   }
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci   return progress;
917bf215546Sopenharmony_ci}
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_cistatic bool
920bf215546Sopenharmony_cilower_alu_deref_srcs(nir_builder *b, nir_alu_instr *alu)
921bf215546Sopenharmony_ci{
922bf215546Sopenharmony_ci   const nir_op_info *info = &nir_op_infos[alu->op];
923bf215546Sopenharmony_ci   bool progress = false;
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&alu->instr);
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->num_inputs; i++) {
928bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_src_as_deref(alu->src[i].src);
929bf215546Sopenharmony_ci
930bf215546Sopenharmony_ci      if (!deref)
931bf215546Sopenharmony_ci         continue;
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci      nir_deref_path path;
934bf215546Sopenharmony_ci      nir_deref_path_init(&path, deref, NULL);
935bf215546Sopenharmony_ci      nir_deref_instr *root_deref = path.path[0];
936bf215546Sopenharmony_ci      nir_deref_path_finish(&path);
937bf215546Sopenharmony_ci
938bf215546Sopenharmony_ci      if (root_deref->deref_type != nir_deref_type_cast)
939bf215546Sopenharmony_ci         continue;
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci      nir_ssa_def *ptr =
942bf215546Sopenharmony_ci         nir_iadd(b, root_deref->parent.ssa,
943bf215546Sopenharmony_ci                     nir_build_deref_offset(b, deref, cl_type_size_align));
944bf215546Sopenharmony_ci      nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(ptr));
945bf215546Sopenharmony_ci      progress = true;
946bf215546Sopenharmony_ci   }
947bf215546Sopenharmony_ci
948bf215546Sopenharmony_ci   return progress;
949bf215546Sopenharmony_ci}
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_cibool
952bf215546Sopenharmony_cidxil_nir_opt_alu_deref_srcs(nir_shader *nir)
953bf215546Sopenharmony_ci{
954bf215546Sopenharmony_ci   bool progress = false;
955bf215546Sopenharmony_ci
956bf215546Sopenharmony_ci   foreach_list_typed(nir_function, func, node, &nir->functions) {
957bf215546Sopenharmony_ci      if (!func->is_entrypoint)
958bf215546Sopenharmony_ci         continue;
959bf215546Sopenharmony_ci      assert(func->impl);
960bf215546Sopenharmony_ci
961bf215546Sopenharmony_ci      nir_builder b;
962bf215546Sopenharmony_ci      nir_builder_init(&b, func->impl);
963bf215546Sopenharmony_ci
964bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
965bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
966bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_alu)
967bf215546Sopenharmony_ci               continue;
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci            nir_alu_instr *alu = nir_instr_as_alu(instr);
970bf215546Sopenharmony_ci            progress |= lower_alu_deref_srcs(&b, alu);
971bf215546Sopenharmony_ci         }
972bf215546Sopenharmony_ci      }
973bf215546Sopenharmony_ci   }
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci   return progress;
976bf215546Sopenharmony_ci}
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_cistatic nir_ssa_def *
979bf215546Sopenharmony_cimemcpy_load_deref_elem(nir_builder *b, nir_deref_instr *parent,
980bf215546Sopenharmony_ci                       nir_ssa_def *index)
981bf215546Sopenharmony_ci{
982bf215546Sopenharmony_ci   nir_deref_instr *deref;
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci   index = nir_i2i(b, index, nir_dest_bit_size(parent->dest));
985bf215546Sopenharmony_ci   assert(parent->deref_type == nir_deref_type_cast);
986bf215546Sopenharmony_ci   deref = nir_build_deref_ptr_as_array(b, parent, index);
987bf215546Sopenharmony_ci
988bf215546Sopenharmony_ci   return nir_load_deref(b, deref);
989bf215546Sopenharmony_ci}
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_cistatic void
992bf215546Sopenharmony_cimemcpy_store_deref_elem(nir_builder *b, nir_deref_instr *parent,
993bf215546Sopenharmony_ci                        nir_ssa_def *index, nir_ssa_def *value)
994bf215546Sopenharmony_ci{
995bf215546Sopenharmony_ci   nir_deref_instr *deref;
996bf215546Sopenharmony_ci
997bf215546Sopenharmony_ci   index = nir_i2i(b, index, nir_dest_bit_size(parent->dest));
998bf215546Sopenharmony_ci   assert(parent->deref_type == nir_deref_type_cast);
999bf215546Sopenharmony_ci   deref = nir_build_deref_ptr_as_array(b, parent, index);
1000bf215546Sopenharmony_ci   nir_store_deref(b, deref, value, 1);
1001bf215546Sopenharmony_ci}
1002bf215546Sopenharmony_ci
1003bf215546Sopenharmony_cistatic bool
1004bf215546Sopenharmony_cilower_memcpy_deref(nir_builder *b, nir_intrinsic_instr *intr)
1005bf215546Sopenharmony_ci{
1006bf215546Sopenharmony_ci   nir_deref_instr *dst_deref = nir_src_as_deref(intr->src[0]);
1007bf215546Sopenharmony_ci   nir_deref_instr *src_deref = nir_src_as_deref(intr->src[1]);
1008bf215546Sopenharmony_ci   assert(intr->src[2].is_ssa);
1009bf215546Sopenharmony_ci   nir_ssa_def *num_bytes = intr->src[2].ssa;
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_ci   assert(dst_deref && src_deref);
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_ci   b->cursor = nir_after_instr(&intr->instr);
1014bf215546Sopenharmony_ci
1015bf215546Sopenharmony_ci   dst_deref = nir_build_deref_cast(b, &dst_deref->dest.ssa, dst_deref->modes,
1016bf215546Sopenharmony_ci                                       glsl_uint8_t_type(), 1);
1017bf215546Sopenharmony_ci   src_deref = nir_build_deref_cast(b, &src_deref->dest.ssa, src_deref->modes,
1018bf215546Sopenharmony_ci                                       glsl_uint8_t_type(), 1);
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_ci   /*
1021bf215546Sopenharmony_ci    * We want to avoid 64b instructions, so let's assume we'll always be
1022bf215546Sopenharmony_ci    * passed a value that fits in a 32b type and truncate the 64b value.
1023bf215546Sopenharmony_ci    */
1024bf215546Sopenharmony_ci   num_bytes = nir_u2u32(b, num_bytes);
1025bf215546Sopenharmony_ci
1026bf215546Sopenharmony_ci   nir_variable *loop_index_var =
1027bf215546Sopenharmony_ci     nir_local_variable_create(b->impl, glsl_uint_type(), "loop_index");
1028bf215546Sopenharmony_ci   nir_deref_instr *loop_index_deref = nir_build_deref_var(b, loop_index_var);
1029bf215546Sopenharmony_ci   nir_store_deref(b, loop_index_deref, nir_imm_int(b, 0), 1);
1030bf215546Sopenharmony_ci
1031bf215546Sopenharmony_ci   nir_loop *loop = nir_push_loop(b);
1032bf215546Sopenharmony_ci   nir_ssa_def *loop_index = nir_load_deref(b, loop_index_deref);
1033bf215546Sopenharmony_ci   nir_ssa_def *cmp = nir_ige(b, loop_index, num_bytes);
1034bf215546Sopenharmony_ci   nir_if *loop_check = nir_push_if(b, cmp);
1035bf215546Sopenharmony_ci   nir_jump(b, nir_jump_break);
1036bf215546Sopenharmony_ci   nir_pop_if(b, loop_check);
1037bf215546Sopenharmony_ci   nir_ssa_def *val = memcpy_load_deref_elem(b, src_deref, loop_index);
1038bf215546Sopenharmony_ci   memcpy_store_deref_elem(b, dst_deref, loop_index, val);
1039bf215546Sopenharmony_ci   nir_store_deref(b, loop_index_deref, nir_iadd_imm(b, loop_index, 1), 1);
1040bf215546Sopenharmony_ci   nir_pop_loop(b, loop);
1041bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
1042bf215546Sopenharmony_ci   return true;
1043bf215546Sopenharmony_ci}
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_cibool
1046bf215546Sopenharmony_cidxil_nir_lower_memcpy_deref(nir_shader *nir)
1047bf215546Sopenharmony_ci{
1048bf215546Sopenharmony_ci   bool progress = false;
1049bf215546Sopenharmony_ci
1050bf215546Sopenharmony_ci   foreach_list_typed(nir_function, func, node, &nir->functions) {
1051bf215546Sopenharmony_ci      if (!func->is_entrypoint)
1052bf215546Sopenharmony_ci         continue;
1053bf215546Sopenharmony_ci      assert(func->impl);
1054bf215546Sopenharmony_ci
1055bf215546Sopenharmony_ci      nir_builder b;
1056bf215546Sopenharmony_ci      nir_builder_init(&b, func->impl);
1057bf215546Sopenharmony_ci
1058bf215546Sopenharmony_ci      nir_foreach_block(block, func->impl) {
1059bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
1060bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_intrinsic)
1061bf215546Sopenharmony_ci               continue;
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1064bf215546Sopenharmony_ci
1065bf215546Sopenharmony_ci            if (intr->intrinsic == nir_intrinsic_memcpy_deref)
1066bf215546Sopenharmony_ci               progress |= lower_memcpy_deref(&b, intr);
1067bf215546Sopenharmony_ci         }
1068bf215546Sopenharmony_ci      }
1069bf215546Sopenharmony_ci   }
1070bf215546Sopenharmony_ci
1071bf215546Sopenharmony_ci   return progress;
1072bf215546Sopenharmony_ci}
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_cistatic void
1075bf215546Sopenharmony_cicast_phi(nir_builder *b, nir_phi_instr *phi, unsigned new_bit_size)
1076bf215546Sopenharmony_ci{
1077bf215546Sopenharmony_ci   nir_phi_instr *lowered = nir_phi_instr_create(b->shader);
1078bf215546Sopenharmony_ci   int num_components = 0;
1079bf215546Sopenharmony_ci   int old_bit_size = phi->dest.ssa.bit_size;
1080bf215546Sopenharmony_ci
1081bf215546Sopenharmony_ci   nir_op upcast_op = nir_type_conversion_op(nir_type_uint | old_bit_size,
1082bf215546Sopenharmony_ci                                             nir_type_uint | new_bit_size,
1083bf215546Sopenharmony_ci                                             nir_rounding_mode_undef);
1084bf215546Sopenharmony_ci   nir_op downcast_op = nir_type_conversion_op(nir_type_uint | new_bit_size,
1085bf215546Sopenharmony_ci                                               nir_type_uint | old_bit_size,
1086bf215546Sopenharmony_ci                                               nir_rounding_mode_undef);
1087bf215546Sopenharmony_ci
1088bf215546Sopenharmony_ci   nir_foreach_phi_src(src, phi) {
1089bf215546Sopenharmony_ci      assert(num_components == 0 || num_components == src->src.ssa->num_components);
1090bf215546Sopenharmony_ci      num_components = src->src.ssa->num_components;
1091bf215546Sopenharmony_ci
1092bf215546Sopenharmony_ci      b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci      nir_ssa_def *cast = nir_build_alu(b, upcast_op, src->src.ssa, NULL, NULL, NULL);
1095bf215546Sopenharmony_ci      nir_phi_instr_add_src(lowered, src->pred, nir_src_for_ssa(cast));
1096bf215546Sopenharmony_ci   }
1097bf215546Sopenharmony_ci
1098bf215546Sopenharmony_ci   nir_ssa_dest_init(&lowered->instr, &lowered->dest,
1099bf215546Sopenharmony_ci                     num_components, new_bit_size, NULL);
1100bf215546Sopenharmony_ci
1101bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&phi->instr);
1102bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &lowered->instr);
1103bf215546Sopenharmony_ci
1104bf215546Sopenharmony_ci   b->cursor = nir_after_phis(nir_cursor_current_block(b->cursor));
1105bf215546Sopenharmony_ci   nir_ssa_def *result = nir_build_alu(b, downcast_op, &lowered->dest.ssa, NULL, NULL, NULL);
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&phi->dest.ssa, result);
1108bf215546Sopenharmony_ci   nir_instr_remove(&phi->instr);
1109bf215546Sopenharmony_ci}
1110bf215546Sopenharmony_ci
1111bf215546Sopenharmony_cistatic bool
1112bf215546Sopenharmony_ciupcast_phi_impl(nir_function_impl *impl, unsigned min_bit_size)
1113bf215546Sopenharmony_ci{
1114bf215546Sopenharmony_ci   nir_builder b;
1115bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
1116bf215546Sopenharmony_ci   bool progress = false;
1117bf215546Sopenharmony_ci
1118bf215546Sopenharmony_ci   nir_foreach_block_reverse(block, impl) {
1119bf215546Sopenharmony_ci      nir_foreach_instr_safe(instr, block) {
1120bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_phi)
1121bf215546Sopenharmony_ci            continue;
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci         nir_phi_instr *phi = nir_instr_as_phi(instr);
1124bf215546Sopenharmony_ci         assert(phi->dest.is_ssa);
1125bf215546Sopenharmony_ci
1126bf215546Sopenharmony_ci         if (phi->dest.ssa.bit_size == 1 ||
1127bf215546Sopenharmony_ci             phi->dest.ssa.bit_size >= min_bit_size)
1128bf215546Sopenharmony_ci            continue;
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci         cast_phi(&b, phi, min_bit_size);
1131bf215546Sopenharmony_ci         progress = true;
1132bf215546Sopenharmony_ci      }
1133bf215546Sopenharmony_ci   }
1134bf215546Sopenharmony_ci
1135bf215546Sopenharmony_ci   if (progress) {
1136bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index |
1137bf215546Sopenharmony_ci                                  nir_metadata_dominance);
1138bf215546Sopenharmony_ci   } else {
1139bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
1140bf215546Sopenharmony_ci   }
1141bf215546Sopenharmony_ci
1142bf215546Sopenharmony_ci   return progress;
1143bf215546Sopenharmony_ci}
1144bf215546Sopenharmony_ci
1145bf215546Sopenharmony_cibool
1146bf215546Sopenharmony_cidxil_nir_lower_upcast_phis(nir_shader *shader, unsigned min_bit_size)
1147bf215546Sopenharmony_ci{
1148bf215546Sopenharmony_ci   bool progress = false;
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
1151bf215546Sopenharmony_ci      if (function->impl)
1152bf215546Sopenharmony_ci         progress |= upcast_phi_impl(function->impl, min_bit_size);
1153bf215546Sopenharmony_ci   }
1154bf215546Sopenharmony_ci
1155bf215546Sopenharmony_ci   return progress;
1156bf215546Sopenharmony_ci}
1157bf215546Sopenharmony_ci
1158bf215546Sopenharmony_cistruct dxil_nir_split_clip_cull_distance_params {
1159bf215546Sopenharmony_ci   nir_variable *new_var;
1160bf215546Sopenharmony_ci   nir_shader *shader;
1161bf215546Sopenharmony_ci};
1162bf215546Sopenharmony_ci
1163bf215546Sopenharmony_ci/* In GLSL and SPIR-V, clip and cull distance are arrays of floats (with a limit of 8).
1164bf215546Sopenharmony_ci * In DXIL, clip and cull distances are up to 2 float4s combined.
1165bf215546Sopenharmony_ci * Coming from GLSL, we can request this 2 float4 format, but coming from SPIR-V,
1166bf215546Sopenharmony_ci * we can't, and have to accept a "compact" array of scalar floats.
1167bf215546Sopenharmony_ci *
1168bf215546Sopenharmony_ci * To help emitting a valid input signature for this case, split the variables so that they
1169bf215546Sopenharmony_ci * match what we need to put in the signature (e.g. { float clip[4]; float clip1; float cull[3]; })
1170bf215546Sopenharmony_ci */
1171bf215546Sopenharmony_cistatic bool
1172bf215546Sopenharmony_cidxil_nir_split_clip_cull_distance_instr(nir_builder *b,
1173bf215546Sopenharmony_ci                                        nir_instr *instr,
1174bf215546Sopenharmony_ci                                        void *cb_data)
1175bf215546Sopenharmony_ci{
1176bf215546Sopenharmony_ci   struct dxil_nir_split_clip_cull_distance_params *params = cb_data;
1177bf215546Sopenharmony_ci   nir_variable *new_var = params->new_var;
1178bf215546Sopenharmony_ci
1179bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_deref)
1180bf215546Sopenharmony_ci      return false;
1181bf215546Sopenharmony_ci
1182bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_instr_as_deref(instr);
1183bf215546Sopenharmony_ci   nir_variable *var = nir_deref_instr_get_variable(deref);
1184bf215546Sopenharmony_ci   if (!var ||
1185bf215546Sopenharmony_ci       var->data.location < VARYING_SLOT_CLIP_DIST0 ||
1186bf215546Sopenharmony_ci       var->data.location > VARYING_SLOT_CULL_DIST1 ||
1187bf215546Sopenharmony_ci       !var->data.compact)
1188bf215546Sopenharmony_ci      return false;
1189bf215546Sopenharmony_ci
1190bf215546Sopenharmony_ci   /* The location should only be inside clip distance, because clip
1191bf215546Sopenharmony_ci    * and cull should've been merged by nir_lower_clip_cull_distance_arrays()
1192bf215546Sopenharmony_ci    */
1193bf215546Sopenharmony_ci   assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
1194bf215546Sopenharmony_ci          var->data.location == VARYING_SLOT_CLIP_DIST1);
1195bf215546Sopenharmony_ci
1196bf215546Sopenharmony_ci   /* The deref chain to the clip/cull variables should be simple, just the
1197bf215546Sopenharmony_ci    * var and an array with a constant index, otherwise more lowering/optimization
1198bf215546Sopenharmony_ci    * might be needed before this pass, e.g. copy prop, lower_io_to_temporaries,
1199bf215546Sopenharmony_ci    * split_var_copies, and/or lower_var_copies. In the case of arrayed I/O like
1200bf215546Sopenharmony_ci    * inputs to the tessellation or geometry stages, there might be a second level
1201bf215546Sopenharmony_ci    * of array index.
1202bf215546Sopenharmony_ci    */
1203bf215546Sopenharmony_ci   assert(deref->deref_type == nir_deref_type_var ||
1204bf215546Sopenharmony_ci          deref->deref_type == nir_deref_type_array);
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_ci   b->cursor = nir_before_instr(instr);
1207bf215546Sopenharmony_ci   unsigned arrayed_io_length = 0;
1208bf215546Sopenharmony_ci   const struct glsl_type *old_type = var->type;
1209bf215546Sopenharmony_ci   if (nir_is_arrayed_io(var, b->shader->info.stage)) {
1210bf215546Sopenharmony_ci      arrayed_io_length = glsl_array_size(old_type);
1211bf215546Sopenharmony_ci      old_type = glsl_get_array_element(old_type);
1212bf215546Sopenharmony_ci   }
1213bf215546Sopenharmony_ci   if (!new_var) {
1214bf215546Sopenharmony_ci      /* Update lengths for new and old vars */
1215bf215546Sopenharmony_ci      int old_length = glsl_array_size(old_type);
1216bf215546Sopenharmony_ci      int new_length = (old_length + var->data.location_frac) - 4;
1217bf215546Sopenharmony_ci      old_length -= new_length;
1218bf215546Sopenharmony_ci
1219bf215546Sopenharmony_ci      /* The existing variable fits in the float4 */
1220bf215546Sopenharmony_ci      if (new_length <= 0)
1221bf215546Sopenharmony_ci         return false;
1222bf215546Sopenharmony_ci
1223bf215546Sopenharmony_ci      new_var = nir_variable_clone(var, params->shader);
1224bf215546Sopenharmony_ci      nir_shader_add_variable(params->shader, new_var);
1225bf215546Sopenharmony_ci      assert(glsl_get_base_type(glsl_get_array_element(old_type)) == GLSL_TYPE_FLOAT);
1226bf215546Sopenharmony_ci      var->type = glsl_array_type(glsl_float_type(), old_length, 0);
1227bf215546Sopenharmony_ci      new_var->type = glsl_array_type(glsl_float_type(), new_length, 0);
1228bf215546Sopenharmony_ci      if (arrayed_io_length) {
1229bf215546Sopenharmony_ci         var->type = glsl_array_type(var->type, arrayed_io_length, 0);
1230bf215546Sopenharmony_ci         new_var->type = glsl_array_type(new_var->type, arrayed_io_length, 0);
1231bf215546Sopenharmony_ci      }
1232bf215546Sopenharmony_ci      new_var->data.location++;
1233bf215546Sopenharmony_ci      new_var->data.location_frac = 0;
1234bf215546Sopenharmony_ci      params->new_var = new_var;
1235bf215546Sopenharmony_ci   }
1236bf215546Sopenharmony_ci
1237bf215546Sopenharmony_ci   /* Update the type for derefs of the old var */
1238bf215546Sopenharmony_ci   if (deref->deref_type == nir_deref_type_var) {
1239bf215546Sopenharmony_ci      deref->type = var->type;
1240bf215546Sopenharmony_ci      return false;
1241bf215546Sopenharmony_ci   }
1242bf215546Sopenharmony_ci
1243bf215546Sopenharmony_ci   if (glsl_type_is_array(deref->type)) {
1244bf215546Sopenharmony_ci      assert(arrayed_io_length > 0);
1245bf215546Sopenharmony_ci      deref->type = glsl_get_array_element(var->type);
1246bf215546Sopenharmony_ci      return false;
1247bf215546Sopenharmony_ci   }
1248bf215546Sopenharmony_ci
1249bf215546Sopenharmony_ci   assert(glsl_get_base_type(deref->type) == GLSL_TYPE_FLOAT);
1250bf215546Sopenharmony_ci
1251bf215546Sopenharmony_ci   nir_const_value *index = nir_src_as_const_value(deref->arr.index);
1252bf215546Sopenharmony_ci   assert(index);
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_ci   /* Treat this array as a vector starting at the component index in location_frac,
1255bf215546Sopenharmony_ci    * so if location_frac is 1 and index is 0, then it's accessing the 'y' component
1256bf215546Sopenharmony_ci    * of the vector. If index + location_frac is >= 4, there's no component there,
1257bf215546Sopenharmony_ci    * so we need to add a new variable and adjust the index.
1258bf215546Sopenharmony_ci    */
1259bf215546Sopenharmony_ci   unsigned total_index = index->u32 + var->data.location_frac;
1260bf215546Sopenharmony_ci   if (total_index < 4)
1261bf215546Sopenharmony_ci      return false;
1262bf215546Sopenharmony_ci
1263bf215546Sopenharmony_ci   nir_deref_instr *new_var_deref = nir_build_deref_var(b, new_var);
1264bf215546Sopenharmony_ci   nir_deref_instr *new_intermediate_deref = new_var_deref;
1265bf215546Sopenharmony_ci   if (arrayed_io_length) {
1266bf215546Sopenharmony_ci      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1267bf215546Sopenharmony_ci      assert(parent->deref_type == nir_deref_type_array);
1268bf215546Sopenharmony_ci      new_intermediate_deref = nir_build_deref_array(b, new_intermediate_deref, parent->arr.index.ssa);
1269bf215546Sopenharmony_ci   }
1270bf215546Sopenharmony_ci   nir_deref_instr *new_array_deref = nir_build_deref_array(b, new_intermediate_deref, nir_imm_int(b, total_index % 4));
1271bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&deref->dest.ssa, &new_array_deref->dest.ssa);
1272bf215546Sopenharmony_ci   return true;
1273bf215546Sopenharmony_ci}
1274bf215546Sopenharmony_ci
1275bf215546Sopenharmony_cibool
1276bf215546Sopenharmony_cidxil_nir_split_clip_cull_distance(nir_shader *shader)
1277bf215546Sopenharmony_ci{
1278bf215546Sopenharmony_ci   struct dxil_nir_split_clip_cull_distance_params params = {
1279bf215546Sopenharmony_ci      .new_var = NULL,
1280bf215546Sopenharmony_ci      .shader = shader,
1281bf215546Sopenharmony_ci   };
1282bf215546Sopenharmony_ci   nir_shader_instructions_pass(shader,
1283bf215546Sopenharmony_ci                                dxil_nir_split_clip_cull_distance_instr,
1284bf215546Sopenharmony_ci                                nir_metadata_block_index |
1285bf215546Sopenharmony_ci                                nir_metadata_dominance |
1286bf215546Sopenharmony_ci                                nir_metadata_loop_analysis,
1287bf215546Sopenharmony_ci                                &params);
1288bf215546Sopenharmony_ci   return params.new_var != NULL;
1289bf215546Sopenharmony_ci}
1290bf215546Sopenharmony_ci
1291bf215546Sopenharmony_cistatic bool
1292bf215546Sopenharmony_cidxil_nir_lower_double_math_instr(nir_builder *b,
1293bf215546Sopenharmony_ci                                 nir_instr *instr,
1294bf215546Sopenharmony_ci                                 UNUSED void *cb_data)
1295bf215546Sopenharmony_ci{
1296bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_alu)
1297bf215546Sopenharmony_ci      return false;
1298bf215546Sopenharmony_ci
1299bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_instr_as_alu(instr);
1300bf215546Sopenharmony_ci
1301bf215546Sopenharmony_ci   /* TODO: See if we can apply this explicitly to packs/unpacks that are then
1302bf215546Sopenharmony_ci    * used as a double. As-is, if we had an app explicitly do a 64bit integer op,
1303bf215546Sopenharmony_ci    * then try to bitcast to double (not expressible in HLSL, but it is in other
1304bf215546Sopenharmony_ci    * source languages), this would unpack the integer and repack as a double, when
1305bf215546Sopenharmony_ci    * we probably want to just send the bitcast through to the backend.
1306bf215546Sopenharmony_ci    */
1307bf215546Sopenharmony_ci
1308bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&alu->instr);
1309bf215546Sopenharmony_ci
1310bf215546Sopenharmony_ci   bool progress = false;
1311bf215546Sopenharmony_ci   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i) {
1312bf215546Sopenharmony_ci      if (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i]) == nir_type_float &&
1313bf215546Sopenharmony_ci          alu->src[i].src.ssa->bit_size == 64) {
1314bf215546Sopenharmony_ci         unsigned num_components = nir_op_infos[alu->op].input_sizes[i];
1315bf215546Sopenharmony_ci         if (!num_components)
1316bf215546Sopenharmony_ci            num_components = alu->dest.dest.ssa.num_components;
1317bf215546Sopenharmony_ci         nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
1318bf215546Sopenharmony_ci         for (unsigned c = 0; c < num_components; ++c) {
1319bf215546Sopenharmony_ci            nir_ssa_def *packed_double = nir_channel(b, alu->src[i].src.ssa, alu->src[i].swizzle[c]);
1320bf215546Sopenharmony_ci            nir_ssa_def *unpacked_double = nir_unpack_64_2x32(b, packed_double);
1321bf215546Sopenharmony_ci            components[c] = nir_pack_double_2x32_dxil(b, unpacked_double);
1322bf215546Sopenharmony_ci            alu->src[i].swizzle[c] = c;
1323bf215546Sopenharmony_ci         }
1324bf215546Sopenharmony_ci         nir_instr_rewrite_src_ssa(instr, &alu->src[i].src, nir_vec(b, components, num_components));
1325bf215546Sopenharmony_ci         progress = true;
1326bf215546Sopenharmony_ci      }
1327bf215546Sopenharmony_ci   }
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_ci   if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float &&
1330bf215546Sopenharmony_ci       alu->dest.dest.ssa.bit_size == 64) {
1331bf215546Sopenharmony_ci      b->cursor = nir_after_instr(&alu->instr);
1332bf215546Sopenharmony_ci      nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
1333bf215546Sopenharmony_ci      for (unsigned c = 0; c < alu->dest.dest.ssa.num_components; ++c) {
1334bf215546Sopenharmony_ci         nir_ssa_def *packed_double = nir_channel(b, &alu->dest.dest.ssa, c);
1335bf215546Sopenharmony_ci         nir_ssa_def *unpacked_double = nir_unpack_double_2x32_dxil(b, packed_double);
1336bf215546Sopenharmony_ci         components[c] = nir_pack_64_2x32(b, unpacked_double);
1337bf215546Sopenharmony_ci      }
1338bf215546Sopenharmony_ci      nir_ssa_def *repacked_dvec = nir_vec(b, components, alu->dest.dest.ssa.num_components);
1339bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, repacked_dvec, repacked_dvec->parent_instr);
1340bf215546Sopenharmony_ci      progress = true;
1341bf215546Sopenharmony_ci   }
1342bf215546Sopenharmony_ci
1343bf215546Sopenharmony_ci   return progress;
1344bf215546Sopenharmony_ci}
1345bf215546Sopenharmony_ci
1346bf215546Sopenharmony_cibool
1347bf215546Sopenharmony_cidxil_nir_lower_double_math(nir_shader *shader)
1348bf215546Sopenharmony_ci{
1349bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader,
1350bf215546Sopenharmony_ci                                       dxil_nir_lower_double_math_instr,
1351bf215546Sopenharmony_ci                                       nir_metadata_block_index |
1352bf215546Sopenharmony_ci                                       nir_metadata_dominance |
1353bf215546Sopenharmony_ci                                       nir_metadata_loop_analysis,
1354bf215546Sopenharmony_ci                                       NULL);
1355bf215546Sopenharmony_ci}
1356bf215546Sopenharmony_ci
1357bf215546Sopenharmony_citypedef struct {
1358bf215546Sopenharmony_ci   gl_system_value *values;
1359bf215546Sopenharmony_ci   uint32_t count;
1360bf215546Sopenharmony_ci} zero_system_values_state;
1361bf215546Sopenharmony_ci
1362bf215546Sopenharmony_cistatic bool
1363bf215546Sopenharmony_cilower_system_value_to_zero_filter(const nir_instr* instr, const void* cb_state)
1364bf215546Sopenharmony_ci{
1365bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic) {
1366bf215546Sopenharmony_ci      return false;
1367bf215546Sopenharmony_ci   }
1368bf215546Sopenharmony_ci
1369bf215546Sopenharmony_ci   nir_intrinsic_instr* intrin = nir_instr_as_intrinsic(instr);
1370bf215546Sopenharmony_ci
1371bf215546Sopenharmony_ci   /* All the intrinsics we care about are loads */
1372bf215546Sopenharmony_ci   if (!nir_intrinsic_infos[intrin->intrinsic].has_dest)
1373bf215546Sopenharmony_ci      return false;
1374bf215546Sopenharmony_ci
1375bf215546Sopenharmony_ci   assert(intrin->dest.is_ssa);
1376bf215546Sopenharmony_ci
1377bf215546Sopenharmony_ci   zero_system_values_state* state = (zero_system_values_state*)cb_state;
1378bf215546Sopenharmony_ci   for (uint32_t i = 0; i < state->count; ++i) {
1379bf215546Sopenharmony_ci      gl_system_value value = state->values[i];
1380bf215546Sopenharmony_ci      nir_intrinsic_op value_op = nir_intrinsic_from_system_value(value);
1381bf215546Sopenharmony_ci
1382bf215546Sopenharmony_ci      if (intrin->intrinsic == value_op) {
1383bf215546Sopenharmony_ci         return true;
1384bf215546Sopenharmony_ci      } else if (intrin->intrinsic == nir_intrinsic_load_deref) {
1385bf215546Sopenharmony_ci         nir_deref_instr* deref = nir_src_as_deref(intrin->src[0]);
1386bf215546Sopenharmony_ci         if (!nir_deref_mode_is(deref, nir_var_system_value))
1387bf215546Sopenharmony_ci            return false;
1388bf215546Sopenharmony_ci
1389bf215546Sopenharmony_ci         nir_variable* var = deref->var;
1390bf215546Sopenharmony_ci         if (var->data.location == value) {
1391bf215546Sopenharmony_ci            return true;
1392bf215546Sopenharmony_ci         }
1393bf215546Sopenharmony_ci      }
1394bf215546Sopenharmony_ci   }
1395bf215546Sopenharmony_ci
1396bf215546Sopenharmony_ci   return false;
1397bf215546Sopenharmony_ci}
1398bf215546Sopenharmony_ci
1399bf215546Sopenharmony_cistatic nir_ssa_def*
1400bf215546Sopenharmony_cilower_system_value_to_zero_instr(nir_builder* b, nir_instr* instr, void* _state)
1401bf215546Sopenharmony_ci{
1402bf215546Sopenharmony_ci   return nir_imm_int(b, 0);
1403bf215546Sopenharmony_ci}
1404bf215546Sopenharmony_ci
1405bf215546Sopenharmony_cibool
1406bf215546Sopenharmony_cidxil_nir_lower_system_values_to_zero(nir_shader* shader,
1407bf215546Sopenharmony_ci                                     gl_system_value* system_values,
1408bf215546Sopenharmony_ci                                     uint32_t count)
1409bf215546Sopenharmony_ci{
1410bf215546Sopenharmony_ci   zero_system_values_state state = { system_values, count };
1411bf215546Sopenharmony_ci   return nir_shader_lower_instructions(shader,
1412bf215546Sopenharmony_ci      lower_system_value_to_zero_filter,
1413bf215546Sopenharmony_ci      lower_system_value_to_zero_instr,
1414bf215546Sopenharmony_ci      &state);
1415bf215546Sopenharmony_ci}
1416bf215546Sopenharmony_ci
1417bf215546Sopenharmony_cistatic void
1418bf215546Sopenharmony_cilower_load_local_group_size(nir_builder *b, nir_intrinsic_instr *intr)
1419bf215546Sopenharmony_ci{
1420bf215546Sopenharmony_ci   b->cursor = nir_after_instr(&intr->instr);
1421bf215546Sopenharmony_ci
1422bf215546Sopenharmony_ci   nir_const_value v[3] = {
1423bf215546Sopenharmony_ci      nir_const_value_for_int(b->shader->info.workgroup_size[0], 32),
1424bf215546Sopenharmony_ci      nir_const_value_for_int(b->shader->info.workgroup_size[1], 32),
1425bf215546Sopenharmony_ci      nir_const_value_for_int(b->shader->info.workgroup_size[2], 32)
1426bf215546Sopenharmony_ci   };
1427bf215546Sopenharmony_ci   nir_ssa_def *size = nir_build_imm(b, 3, 32, v);
1428bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, size);
1429bf215546Sopenharmony_ci   nir_instr_remove(&intr->instr);
1430bf215546Sopenharmony_ci}
1431bf215546Sopenharmony_ci
1432bf215546Sopenharmony_cistatic bool
1433bf215546Sopenharmony_cilower_system_values_impl(nir_builder *b, nir_instr *instr, void *_state)
1434bf215546Sopenharmony_ci{
1435bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
1436bf215546Sopenharmony_ci      return false;
1437bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1438bf215546Sopenharmony_ci   switch (intr->intrinsic) {
1439bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_size:
1440bf215546Sopenharmony_ci      lower_load_local_group_size(b, intr);
1441bf215546Sopenharmony_ci      return true;
1442bf215546Sopenharmony_ci   default:
1443bf215546Sopenharmony_ci      return false;
1444bf215546Sopenharmony_ci   }
1445bf215546Sopenharmony_ci}
1446bf215546Sopenharmony_ci
1447bf215546Sopenharmony_cibool
1448bf215546Sopenharmony_cidxil_nir_lower_system_values(nir_shader *shader)
1449bf215546Sopenharmony_ci{
1450bf215546Sopenharmony_ci   return nir_shader_instructions_pass(shader, lower_system_values_impl,
1451bf215546Sopenharmony_ci      nir_metadata_block_index | nir_metadata_dominance | nir_metadata_loop_analysis, NULL);
1452bf215546Sopenharmony_ci}
1453bf215546Sopenharmony_ci
1454bf215546Sopenharmony_cistatic const struct glsl_type *
1455bf215546Sopenharmony_ciget_bare_samplers_for_type(const struct glsl_type *type, bool is_shadow)
1456bf215546Sopenharmony_ci{
1457bf215546Sopenharmony_ci   const struct glsl_type *base_sampler_type =
1458bf215546Sopenharmony_ci      is_shadow ?
1459bf215546Sopenharmony_ci      glsl_bare_shadow_sampler_type() : glsl_bare_sampler_type();
1460bf215546Sopenharmony_ci   return glsl_type_wrap_in_arrays(base_sampler_type, type);
1461bf215546Sopenharmony_ci}
1462bf215546Sopenharmony_ci
1463bf215546Sopenharmony_cistatic const struct glsl_type *
1464bf215546Sopenharmony_ciget_textures_for_sampler_type(const struct glsl_type *type)
1465bf215546Sopenharmony_ci{
1466bf215546Sopenharmony_ci   return glsl_type_wrap_in_arrays(
1467bf215546Sopenharmony_ci      glsl_sampler_type_to_texture(
1468bf215546Sopenharmony_ci         glsl_without_array(type)), type);
1469bf215546Sopenharmony_ci}
1470bf215546Sopenharmony_ci
1471bf215546Sopenharmony_cistatic bool
1472bf215546Sopenharmony_ciredirect_sampler_derefs(struct nir_builder *b, nir_instr *instr, void *data)
1473bf215546Sopenharmony_ci{
1474bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_tex)
1475bf215546Sopenharmony_ci      return false;
1476bf215546Sopenharmony_ci
1477bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_instr_as_tex(instr);
1478bf215546Sopenharmony_ci
1479bf215546Sopenharmony_ci   int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
1480bf215546Sopenharmony_ci   if (sampler_idx == -1) {
1481bf215546Sopenharmony_ci      /* No sampler deref - does this instruction even need a sampler? If not,
1482bf215546Sopenharmony_ci       * sampler_index doesn't necessarily point to a sampler, so early-out.
1483bf215546Sopenharmony_ci       */
1484bf215546Sopenharmony_ci      if (!nir_tex_instr_need_sampler(tex))
1485bf215546Sopenharmony_ci         return false;
1486bf215546Sopenharmony_ci
1487bf215546Sopenharmony_ci      /* No derefs but needs a sampler, must be using indices */
1488bf215546Sopenharmony_ci      nir_variable *bare_sampler = _mesa_hash_table_u64_search(data, tex->sampler_index);
1489bf215546Sopenharmony_ci
1490bf215546Sopenharmony_ci      /* Already have a bare sampler here */
1491bf215546Sopenharmony_ci      if (bare_sampler)
1492bf215546Sopenharmony_ci         return false;
1493bf215546Sopenharmony_ci
1494bf215546Sopenharmony_ci      nir_variable *old_sampler = NULL;
1495bf215546Sopenharmony_ci      nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
1496bf215546Sopenharmony_ci         if (var->data.binding <= tex->sampler_index &&
1497bf215546Sopenharmony_ci             var->data.binding + glsl_type_get_sampler_count(var->type) >
1498bf215546Sopenharmony_ci                tex->sampler_index) {
1499bf215546Sopenharmony_ci
1500bf215546Sopenharmony_ci            /* Already have a bare sampler for this binding and it is of the
1501bf215546Sopenharmony_ci             * correct type, add it to the table */
1502bf215546Sopenharmony_ci            if (glsl_type_is_bare_sampler(glsl_without_array(var->type)) &&
1503bf215546Sopenharmony_ci                glsl_sampler_type_is_shadow(glsl_without_array(var->type)) ==
1504bf215546Sopenharmony_ci                   tex->is_shadow) {
1505bf215546Sopenharmony_ci               _mesa_hash_table_u64_insert(data, tex->sampler_index, var);
1506bf215546Sopenharmony_ci               return false;
1507bf215546Sopenharmony_ci            }
1508bf215546Sopenharmony_ci
1509bf215546Sopenharmony_ci            old_sampler = var;
1510bf215546Sopenharmony_ci         }
1511bf215546Sopenharmony_ci      }
1512bf215546Sopenharmony_ci
1513bf215546Sopenharmony_ci      assert(old_sampler);
1514bf215546Sopenharmony_ci
1515bf215546Sopenharmony_ci      /* Clone the original sampler to a bare sampler of the correct type */
1516bf215546Sopenharmony_ci      bare_sampler = nir_variable_clone(old_sampler, b->shader);
1517bf215546Sopenharmony_ci      nir_shader_add_variable(b->shader, bare_sampler);
1518bf215546Sopenharmony_ci
1519bf215546Sopenharmony_ci      bare_sampler->type =
1520bf215546Sopenharmony_ci         get_bare_samplers_for_type(old_sampler->type, tex->is_shadow);
1521bf215546Sopenharmony_ci      _mesa_hash_table_u64_insert(data, tex->sampler_index, bare_sampler);
1522bf215546Sopenharmony_ci      return true;
1523bf215546Sopenharmony_ci   }
1524bf215546Sopenharmony_ci
1525bf215546Sopenharmony_ci   /* Using derefs, means we have to rewrite the deref chain in addition to cloning */
1526bf215546Sopenharmony_ci   nir_deref_instr *final_deref = nir_src_as_deref(tex->src[sampler_idx].src);
1527bf215546Sopenharmony_ci   nir_deref_path path;
1528bf215546Sopenharmony_ci   nir_deref_path_init(&path, final_deref, NULL);
1529bf215546Sopenharmony_ci
1530bf215546Sopenharmony_ci   nir_deref_instr *old_tail = path.path[0];
1531bf215546Sopenharmony_ci   assert(old_tail->deref_type == nir_deref_type_var);
1532bf215546Sopenharmony_ci   nir_variable *old_var = old_tail->var;
1533bf215546Sopenharmony_ci   if (glsl_type_is_bare_sampler(glsl_without_array(old_var->type)) &&
1534bf215546Sopenharmony_ci       glsl_sampler_type_is_shadow(glsl_without_array(old_var->type)) ==
1535bf215546Sopenharmony_ci          tex->is_shadow) {
1536bf215546Sopenharmony_ci      nir_deref_path_finish(&path);
1537bf215546Sopenharmony_ci      return false;
1538bf215546Sopenharmony_ci   }
1539bf215546Sopenharmony_ci
1540bf215546Sopenharmony_ci   uint64_t var_key = ((uint64_t)old_var->data.descriptor_set << 32) |
1541bf215546Sopenharmony_ci                      old_var->data.binding;
1542bf215546Sopenharmony_ci   nir_variable *new_var = _mesa_hash_table_u64_search(data, var_key);
1543bf215546Sopenharmony_ci   if (!new_var) {
1544bf215546Sopenharmony_ci      new_var = nir_variable_clone(old_var, b->shader);
1545bf215546Sopenharmony_ci      nir_shader_add_variable(b->shader, new_var);
1546bf215546Sopenharmony_ci      new_var->type =
1547bf215546Sopenharmony_ci         get_bare_samplers_for_type(old_var->type, tex->is_shadow);
1548bf215546Sopenharmony_ci      _mesa_hash_table_u64_insert(data, var_key, new_var);
1549bf215546Sopenharmony_ci   }
1550bf215546Sopenharmony_ci
1551bf215546Sopenharmony_ci   b->cursor = nir_after_instr(&old_tail->instr);
1552bf215546Sopenharmony_ci   nir_deref_instr *new_tail = nir_build_deref_var(b, new_var);
1553bf215546Sopenharmony_ci
1554bf215546Sopenharmony_ci   for (unsigned i = 1; path.path[i]; ++i) {
1555bf215546Sopenharmony_ci      b->cursor = nir_after_instr(&path.path[i]->instr);
1556bf215546Sopenharmony_ci      new_tail = nir_build_deref_follower(b, new_tail, path.path[i]);
1557bf215546Sopenharmony_ci   }
1558bf215546Sopenharmony_ci
1559bf215546Sopenharmony_ci   nir_deref_path_finish(&path);
1560bf215546Sopenharmony_ci   nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[sampler_idx].src, &new_tail->dest.ssa);
1561bf215546Sopenharmony_ci   return true;
1562bf215546Sopenharmony_ci}
1563bf215546Sopenharmony_ci
1564bf215546Sopenharmony_cistatic bool
1565bf215546Sopenharmony_ciredirect_texture_derefs(struct nir_builder *b, nir_instr *instr, void *data)
1566bf215546Sopenharmony_ci{
1567bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_tex)
1568bf215546Sopenharmony_ci      return false;
1569bf215546Sopenharmony_ci
1570bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_instr_as_tex(instr);
1571bf215546Sopenharmony_ci
1572bf215546Sopenharmony_ci   int texture_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1573bf215546Sopenharmony_ci   if (texture_idx == -1) {
1574bf215546Sopenharmony_ci      /* No derefs, must be using indices */
1575bf215546Sopenharmony_ci      nir_variable *bare_sampler = _mesa_hash_table_u64_search(data, tex->texture_index);
1576bf215546Sopenharmony_ci
1577bf215546Sopenharmony_ci      /* Already have a texture here */
1578bf215546Sopenharmony_ci      if (bare_sampler)
1579bf215546Sopenharmony_ci         return false;
1580bf215546Sopenharmony_ci
1581bf215546Sopenharmony_ci      nir_variable *typed_sampler = NULL;
1582bf215546Sopenharmony_ci      nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
1583bf215546Sopenharmony_ci         if (var->data.binding <= tex->texture_index &&
1584bf215546Sopenharmony_ci             var->data.binding + glsl_type_get_texture_count(var->type) > tex->texture_index) {
1585bf215546Sopenharmony_ci            /* Already have a texture for this binding, add it to the table */
1586bf215546Sopenharmony_ci            _mesa_hash_table_u64_insert(data, tex->texture_index, var);
1587bf215546Sopenharmony_ci            return false;
1588bf215546Sopenharmony_ci         }
1589bf215546Sopenharmony_ci
1590bf215546Sopenharmony_ci         if (var->data.binding <= tex->texture_index &&
1591bf215546Sopenharmony_ci             var->data.binding + glsl_type_get_sampler_count(var->type) > tex->texture_index &&
1592bf215546Sopenharmony_ci             !glsl_type_is_bare_sampler(glsl_without_array(var->type))) {
1593bf215546Sopenharmony_ci            typed_sampler = var;
1594bf215546Sopenharmony_ci         }
1595bf215546Sopenharmony_ci      }
1596bf215546Sopenharmony_ci
1597bf215546Sopenharmony_ci      /* Clone the typed sampler to a texture and we're done */
1598bf215546Sopenharmony_ci      assert(typed_sampler);
1599bf215546Sopenharmony_ci      bare_sampler = nir_variable_clone(typed_sampler, b->shader);
1600bf215546Sopenharmony_ci      bare_sampler->type = get_textures_for_sampler_type(typed_sampler->type);
1601bf215546Sopenharmony_ci      nir_shader_add_variable(b->shader, bare_sampler);
1602bf215546Sopenharmony_ci      _mesa_hash_table_u64_insert(data, tex->texture_index, bare_sampler);
1603bf215546Sopenharmony_ci      return true;
1604bf215546Sopenharmony_ci   }
1605bf215546Sopenharmony_ci
1606bf215546Sopenharmony_ci   /* Using derefs, means we have to rewrite the deref chain in addition to cloning */
1607bf215546Sopenharmony_ci   nir_deref_instr *final_deref = nir_src_as_deref(tex->src[texture_idx].src);
1608bf215546Sopenharmony_ci   nir_deref_path path;
1609bf215546Sopenharmony_ci   nir_deref_path_init(&path, final_deref, NULL);
1610bf215546Sopenharmony_ci
1611bf215546Sopenharmony_ci   nir_deref_instr *old_tail = path.path[0];
1612bf215546Sopenharmony_ci   assert(old_tail->deref_type == nir_deref_type_var);
1613bf215546Sopenharmony_ci   nir_variable *old_var = old_tail->var;
1614bf215546Sopenharmony_ci   if (glsl_type_is_texture(glsl_without_array(old_var->type)) ||
1615bf215546Sopenharmony_ci       glsl_type_is_image(glsl_without_array(old_var->type))) {
1616bf215546Sopenharmony_ci      nir_deref_path_finish(&path);
1617bf215546Sopenharmony_ci      return false;
1618bf215546Sopenharmony_ci   }
1619bf215546Sopenharmony_ci
1620bf215546Sopenharmony_ci   uint64_t var_key = ((uint64_t)old_var->data.descriptor_set << 32) |
1621bf215546Sopenharmony_ci                      old_var->data.binding;
1622bf215546Sopenharmony_ci   nir_variable *new_var = _mesa_hash_table_u64_search(data, var_key);
1623bf215546Sopenharmony_ci   if (!new_var) {
1624bf215546Sopenharmony_ci      new_var = nir_variable_clone(old_var, b->shader);
1625bf215546Sopenharmony_ci      new_var->type = get_textures_for_sampler_type(old_var->type);
1626bf215546Sopenharmony_ci      nir_shader_add_variable(b->shader, new_var);
1627bf215546Sopenharmony_ci      _mesa_hash_table_u64_insert(data, var_key, new_var);
1628bf215546Sopenharmony_ci   }
1629bf215546Sopenharmony_ci
1630bf215546Sopenharmony_ci   b->cursor = nir_after_instr(&old_tail->instr);
1631bf215546Sopenharmony_ci   nir_deref_instr *new_tail = nir_build_deref_var(b, new_var);
1632bf215546Sopenharmony_ci
1633bf215546Sopenharmony_ci   for (unsigned i = 1; path.path[i]; ++i) {
1634bf215546Sopenharmony_ci      b->cursor = nir_after_instr(&path.path[i]->instr);
1635bf215546Sopenharmony_ci      new_tail = nir_build_deref_follower(b, new_tail, path.path[i]);
1636bf215546Sopenharmony_ci   }
1637bf215546Sopenharmony_ci
1638bf215546Sopenharmony_ci   nir_deref_path_finish(&path);
1639bf215546Sopenharmony_ci   nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[texture_idx].src, &new_tail->dest.ssa);
1640bf215546Sopenharmony_ci
1641bf215546Sopenharmony_ci   return true;
1642bf215546Sopenharmony_ci}
1643bf215546Sopenharmony_ci
1644bf215546Sopenharmony_cibool
1645bf215546Sopenharmony_cidxil_nir_split_typed_samplers(nir_shader *nir)
1646bf215546Sopenharmony_ci{
1647bf215546Sopenharmony_ci   struct hash_table_u64 *hash_table = _mesa_hash_table_u64_create(NULL);
1648bf215546Sopenharmony_ci
1649bf215546Sopenharmony_ci   bool progress = nir_shader_instructions_pass(nir, redirect_sampler_derefs,
1650bf215546Sopenharmony_ci      nir_metadata_block_index | nir_metadata_dominance | nir_metadata_loop_analysis, hash_table);
1651bf215546Sopenharmony_ci
1652bf215546Sopenharmony_ci   _mesa_hash_table_u64_clear(hash_table);
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci   progress |= nir_shader_instructions_pass(nir, redirect_texture_derefs,
1655bf215546Sopenharmony_ci      nir_metadata_block_index | nir_metadata_dominance | nir_metadata_loop_analysis, hash_table);
1656bf215546Sopenharmony_ci
1657bf215546Sopenharmony_ci   _mesa_hash_table_u64_destroy(hash_table);
1658bf215546Sopenharmony_ci   return progress;
1659bf215546Sopenharmony_ci}
1660bf215546Sopenharmony_ci
1661bf215546Sopenharmony_ci
1662bf215546Sopenharmony_cistatic bool
1663bf215546Sopenharmony_cilower_bool_input_filter(const nir_instr *instr,
1664bf215546Sopenharmony_ci                        UNUSED const void *_options)
1665bf215546Sopenharmony_ci{
1666bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
1667bf215546Sopenharmony_ci      return false;
1668bf215546Sopenharmony_ci
1669bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1670bf215546Sopenharmony_ci   if (intr->intrinsic == nir_intrinsic_load_front_face)
1671bf215546Sopenharmony_ci      return true;
1672bf215546Sopenharmony_ci
1673bf215546Sopenharmony_ci   if (intr->intrinsic == nir_intrinsic_load_deref) {
1674bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
1675bf215546Sopenharmony_ci      nir_variable *var = nir_deref_instr_get_variable(deref);
1676bf215546Sopenharmony_ci      return var->data.mode == nir_var_shader_in &&
1677bf215546Sopenharmony_ci             glsl_get_base_type(var->type) == GLSL_TYPE_BOOL;
1678bf215546Sopenharmony_ci   }
1679bf215546Sopenharmony_ci
1680bf215546Sopenharmony_ci   return false;
1681bf215546Sopenharmony_ci}
1682bf215546Sopenharmony_ci
1683bf215546Sopenharmony_cistatic nir_ssa_def *
1684bf215546Sopenharmony_cilower_bool_input_impl(nir_builder *b, nir_instr *instr,
1685bf215546Sopenharmony_ci                      UNUSED void *_options)
1686bf215546Sopenharmony_ci{
1687bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1688bf215546Sopenharmony_ci
1689bf215546Sopenharmony_ci   if (intr->intrinsic == nir_intrinsic_load_deref) {
1690bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
1691bf215546Sopenharmony_ci      nir_variable *var = nir_deref_instr_get_variable(deref);
1692bf215546Sopenharmony_ci
1693bf215546Sopenharmony_ci      /* rewrite var->type */
1694bf215546Sopenharmony_ci      var->type = glsl_vector_type(GLSL_TYPE_UINT,
1695bf215546Sopenharmony_ci                                   glsl_get_vector_elements(var->type));
1696bf215546Sopenharmony_ci      deref->type = var->type;
1697bf215546Sopenharmony_ci   }
1698bf215546Sopenharmony_ci
1699bf215546Sopenharmony_ci   intr->dest.ssa.bit_size = 32;
1700bf215546Sopenharmony_ci   return nir_i2b1(b, &intr->dest.ssa);
1701bf215546Sopenharmony_ci}
1702bf215546Sopenharmony_ci
1703bf215546Sopenharmony_cibool
1704bf215546Sopenharmony_cidxil_nir_lower_bool_input(struct nir_shader *s)
1705bf215546Sopenharmony_ci{
1706bf215546Sopenharmony_ci   return nir_shader_lower_instructions(s, lower_bool_input_filter,
1707bf215546Sopenharmony_ci                                        lower_bool_input_impl, NULL);
1708bf215546Sopenharmony_ci}
1709bf215546Sopenharmony_ci
1710bf215546Sopenharmony_cistatic bool
1711bf215546Sopenharmony_cilower_sysval_to_load_input_impl(nir_builder *b, nir_instr *instr, void *data)
1712bf215546Sopenharmony_ci{
1713bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
1714bf215546Sopenharmony_ci      return false;
1715bf215546Sopenharmony_ci
1716bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1717bf215546Sopenharmony_ci   gl_system_value sysval = SYSTEM_VALUE_MAX;
1718bf215546Sopenharmony_ci   switch (intr->intrinsic) {
1719bf215546Sopenharmony_ci   case nir_intrinsic_load_front_face:
1720bf215546Sopenharmony_ci      sysval = SYSTEM_VALUE_FRONT_FACE;
1721bf215546Sopenharmony_ci      break;
1722bf215546Sopenharmony_ci   case nir_intrinsic_load_instance_id:
1723bf215546Sopenharmony_ci      sysval = SYSTEM_VALUE_INSTANCE_ID;
1724bf215546Sopenharmony_ci      break;
1725bf215546Sopenharmony_ci   case nir_intrinsic_load_vertex_id_zero_base:
1726bf215546Sopenharmony_ci      sysval = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1727bf215546Sopenharmony_ci      break;
1728bf215546Sopenharmony_ci   default:
1729bf215546Sopenharmony_ci      return false;
1730bf215546Sopenharmony_ci   }
1731bf215546Sopenharmony_ci
1732bf215546Sopenharmony_ci   nir_variable **sysval_vars = (nir_variable **)data;
1733bf215546Sopenharmony_ci   nir_variable *var = sysval_vars[sysval];
1734bf215546Sopenharmony_ci   assert(var);
1735bf215546Sopenharmony_ci
1736bf215546Sopenharmony_ci   b->cursor = nir_before_instr(instr);
1737bf215546Sopenharmony_ci   nir_ssa_def *result = nir_build_load_input(b, intr->dest.ssa.num_components, intr->dest.ssa.bit_size, nir_imm_int(b, 0),
1738bf215546Sopenharmony_ci      .base = var->data.driver_location, .dest_type = nir_get_nir_type_for_glsl_type(var->type));
1739bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
1740bf215546Sopenharmony_ci   return true;
1741bf215546Sopenharmony_ci}
1742bf215546Sopenharmony_ci
1743bf215546Sopenharmony_cibool
1744bf215546Sopenharmony_cidxil_nir_lower_sysval_to_load_input(nir_shader *s, nir_variable **sysval_vars)
1745bf215546Sopenharmony_ci{
1746bf215546Sopenharmony_ci   return nir_shader_instructions_pass(s, lower_sysval_to_load_input_impl,
1747bf215546Sopenharmony_ci      nir_metadata_block_index | nir_metadata_dominance, sysval_vars);
1748bf215546Sopenharmony_ci}
1749bf215546Sopenharmony_ci
1750bf215546Sopenharmony_ci/* Comparison function to sort io values so that first come normal varyings,
1751bf215546Sopenharmony_ci * then system values, and then system generated values.
1752bf215546Sopenharmony_ci */
1753bf215546Sopenharmony_cistatic int
1754bf215546Sopenharmony_civariable_location_cmp(const nir_variable* a, const nir_variable* b)
1755bf215546Sopenharmony_ci{
1756bf215546Sopenharmony_ci   // Sort by stream, driver_location, location, location_frac, then index
1757bf215546Sopenharmony_ci   unsigned a_location = a->data.location;
1758bf215546Sopenharmony_ci   if (a_location >= VARYING_SLOT_PATCH0)
1759bf215546Sopenharmony_ci      a_location -= VARYING_SLOT_PATCH0;
1760bf215546Sopenharmony_ci   unsigned b_location = b->data.location;
1761bf215546Sopenharmony_ci   if (b_location >= VARYING_SLOT_PATCH0)
1762bf215546Sopenharmony_ci      b_location -= VARYING_SLOT_PATCH0;
1763bf215546Sopenharmony_ci   unsigned a_stream = a->data.stream & ~NIR_STREAM_PACKED;
1764bf215546Sopenharmony_ci   unsigned b_stream = b->data.stream & ~NIR_STREAM_PACKED;
1765bf215546Sopenharmony_ci   return a_stream != b_stream ?
1766bf215546Sopenharmony_ci            a_stream - b_stream :
1767bf215546Sopenharmony_ci            a->data.driver_location != b->data.driver_location ?
1768bf215546Sopenharmony_ci               a->data.driver_location - b->data.driver_location :
1769bf215546Sopenharmony_ci               a_location !=  b_location ?
1770bf215546Sopenharmony_ci                  a_location - b_location :
1771bf215546Sopenharmony_ci                  a->data.location_frac != b->data.location_frac ?
1772bf215546Sopenharmony_ci                     a->data.location_frac - b->data.location_frac :
1773bf215546Sopenharmony_ci                     a->data.index - b->data.index;
1774bf215546Sopenharmony_ci}
1775bf215546Sopenharmony_ci
1776bf215546Sopenharmony_ci/* Order varyings according to driver location */
1777bf215546Sopenharmony_ciuint64_t
1778bf215546Sopenharmony_cidxil_sort_by_driver_location(nir_shader* s, nir_variable_mode modes)
1779bf215546Sopenharmony_ci{
1780bf215546Sopenharmony_ci   nir_sort_variables_with_modes(s, variable_location_cmp, modes);
1781bf215546Sopenharmony_ci
1782bf215546Sopenharmony_ci   uint64_t result = 0;
1783bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, s, modes) {
1784bf215546Sopenharmony_ci      result |= 1ull << var->data.location;
1785bf215546Sopenharmony_ci   }
1786bf215546Sopenharmony_ci   return result;
1787bf215546Sopenharmony_ci}
1788bf215546Sopenharmony_ci
1789bf215546Sopenharmony_ci/* Sort PS outputs so that color outputs come first */
1790bf215546Sopenharmony_civoid
1791bf215546Sopenharmony_cidxil_sort_ps_outputs(nir_shader* s)
1792bf215546Sopenharmony_ci{
1793bf215546Sopenharmony_ci   nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
1794bf215546Sopenharmony_ci      /* We use the driver_location here to avoid introducing a new
1795bf215546Sopenharmony_ci       * struct or member variable here. The true, updated driver location
1796bf215546Sopenharmony_ci       * will be written below, after sorting */
1797bf215546Sopenharmony_ci      switch (var->data.location) {
1798bf215546Sopenharmony_ci      case FRAG_RESULT_DEPTH:
1799bf215546Sopenharmony_ci         var->data.driver_location = 1;
1800bf215546Sopenharmony_ci         break;
1801bf215546Sopenharmony_ci      case FRAG_RESULT_STENCIL:
1802bf215546Sopenharmony_ci         var->data.driver_location = 2;
1803bf215546Sopenharmony_ci         break;
1804bf215546Sopenharmony_ci      case FRAG_RESULT_SAMPLE_MASK:
1805bf215546Sopenharmony_ci         var->data.driver_location = 3;
1806bf215546Sopenharmony_ci         break;
1807bf215546Sopenharmony_ci      default:
1808bf215546Sopenharmony_ci         var->data.driver_location = 0;
1809bf215546Sopenharmony_ci      }
1810bf215546Sopenharmony_ci   }
1811bf215546Sopenharmony_ci
1812bf215546Sopenharmony_ci   nir_sort_variables_with_modes(s, variable_location_cmp,
1813bf215546Sopenharmony_ci                                 nir_var_shader_out);
1814bf215546Sopenharmony_ci
1815bf215546Sopenharmony_ci   unsigned driver_loc = 0;
1816bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, s, nir_var_shader_out) {
1817bf215546Sopenharmony_ci      var->data.driver_location = driver_loc++;
1818bf215546Sopenharmony_ci   }
1819bf215546Sopenharmony_ci}
1820bf215546Sopenharmony_ci
1821bf215546Sopenharmony_ci/* Order between stage values so that normal varyings come first,
1822bf215546Sopenharmony_ci * then sysvalues and then system generated values.
1823bf215546Sopenharmony_ci */
1824bf215546Sopenharmony_ciuint64_t
1825bf215546Sopenharmony_cidxil_reassign_driver_locations(nir_shader* s, nir_variable_mode modes,
1826bf215546Sopenharmony_ci   uint64_t other_stage_mask)
1827bf215546Sopenharmony_ci{
1828bf215546Sopenharmony_ci   nir_foreach_variable_with_modes_safe(var, s, modes) {
1829bf215546Sopenharmony_ci      /* We use the driver_location here to avoid introducing a new
1830bf215546Sopenharmony_ci       * struct or member variable here. The true, updated driver location
1831bf215546Sopenharmony_ci       * will be written below, after sorting */
1832bf215546Sopenharmony_ci      var->data.driver_location = nir_var_to_dxil_sysvalue_type(var, other_stage_mask);
1833bf215546Sopenharmony_ci   }
1834bf215546Sopenharmony_ci
1835bf215546Sopenharmony_ci   nir_sort_variables_with_modes(s, variable_location_cmp, modes);
1836bf215546Sopenharmony_ci
1837bf215546Sopenharmony_ci   uint64_t result = 0;
1838bf215546Sopenharmony_ci   unsigned driver_loc = 0, driver_patch_loc = 0;
1839bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, s, modes) {
1840bf215546Sopenharmony_ci      if (var->data.location < 64)
1841bf215546Sopenharmony_ci         result |= 1ull << var->data.location;
1842bf215546Sopenharmony_ci      /* Overlap patches with non-patch */
1843bf215546Sopenharmony_ci      var->data.driver_location = var->data.patch ?
1844bf215546Sopenharmony_ci         driver_patch_loc++ : driver_loc++;
1845bf215546Sopenharmony_ci   }
1846bf215546Sopenharmony_ci   return result;
1847bf215546Sopenharmony_ci}
1848bf215546Sopenharmony_ci
1849bf215546Sopenharmony_cistatic bool
1850bf215546Sopenharmony_cilower_ubo_array_one_to_static(struct nir_builder *b, nir_instr *inst,
1851bf215546Sopenharmony_ci                              void *cb_data)
1852bf215546Sopenharmony_ci{
1853bf215546Sopenharmony_ci   if (inst->type != nir_instr_type_intrinsic)
1854bf215546Sopenharmony_ci      return false;
1855bf215546Sopenharmony_ci
1856bf215546Sopenharmony_ci   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(inst);
1857bf215546Sopenharmony_ci
1858bf215546Sopenharmony_ci   if (intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
1859bf215546Sopenharmony_ci      return false;
1860bf215546Sopenharmony_ci
1861bf215546Sopenharmony_ci   nir_variable *var =
1862bf215546Sopenharmony_ci      nir_get_binding_variable(b->shader, nir_chase_binding(intrin->src[0]));
1863bf215546Sopenharmony_ci
1864bf215546Sopenharmony_ci   if (!var)
1865bf215546Sopenharmony_ci      return false;
1866bf215546Sopenharmony_ci
1867bf215546Sopenharmony_ci   if (!glsl_type_is_array(var->type) || glsl_array_size(var->type) != 1)
1868bf215546Sopenharmony_ci      return false;
1869bf215546Sopenharmony_ci
1870bf215546Sopenharmony_ci   nir_intrinsic_instr *index = nir_src_as_intrinsic(intrin->src[0]);
1871bf215546Sopenharmony_ci   /* We currently do not support reindex */
1872bf215546Sopenharmony_ci   assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index);
1873bf215546Sopenharmony_ci
1874bf215546Sopenharmony_ci   if (nir_src_is_const(index->src[0]) && nir_src_as_uint(index->src[0]) == 0)
1875bf215546Sopenharmony_ci      return false;
1876bf215546Sopenharmony_ci
1877bf215546Sopenharmony_ci   if (nir_intrinsic_desc_type(index) != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1878bf215546Sopenharmony_ci      return false;
1879bf215546Sopenharmony_ci
1880bf215546Sopenharmony_ci   b->cursor = nir_instr_remove(&index->instr);
1881bf215546Sopenharmony_ci
1882bf215546Sopenharmony_ci   // Indexing out of bounds on array of UBOs is considered undefined
1883bf215546Sopenharmony_ci   // behavior. Therefore, we just hardcode all the index to 0.
1884bf215546Sopenharmony_ci   uint8_t bit_size = index->dest.ssa.bit_size;
1885bf215546Sopenharmony_ci   nir_ssa_def *zero = nir_imm_intN_t(b, 0, bit_size);
1886bf215546Sopenharmony_ci   nir_ssa_def *dest =
1887bf215546Sopenharmony_ci      nir_vulkan_resource_index(b, index->num_components, bit_size, zero,
1888bf215546Sopenharmony_ci                                .desc_set = nir_intrinsic_desc_set(index),
1889bf215546Sopenharmony_ci                                .binding = nir_intrinsic_binding(index),
1890bf215546Sopenharmony_ci                                .desc_type = nir_intrinsic_desc_type(index));
1891bf215546Sopenharmony_ci
1892bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&index->dest.ssa, dest);
1893bf215546Sopenharmony_ci
1894bf215546Sopenharmony_ci   return true;
1895bf215546Sopenharmony_ci}
1896bf215546Sopenharmony_ci
1897bf215546Sopenharmony_cibool
1898bf215546Sopenharmony_cidxil_nir_lower_ubo_array_one_to_static(nir_shader *s)
1899bf215546Sopenharmony_ci{
1900bf215546Sopenharmony_ci   bool progress = nir_shader_instructions_pass(
1901bf215546Sopenharmony_ci      s, lower_ubo_array_one_to_static, nir_metadata_none, NULL);
1902bf215546Sopenharmony_ci
1903bf215546Sopenharmony_ci   return progress;
1904bf215546Sopenharmony_ci}
1905bf215546Sopenharmony_ci
1906bf215546Sopenharmony_cistatic bool
1907bf215546Sopenharmony_ciis_fquantize2f16(const nir_instr *instr, const void *data)
1908bf215546Sopenharmony_ci{
1909bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_alu)
1910bf215546Sopenharmony_ci      return false;
1911bf215546Sopenharmony_ci
1912bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_instr_as_alu(instr);
1913bf215546Sopenharmony_ci   return alu->op == nir_op_fquantize2f16;
1914bf215546Sopenharmony_ci}
1915bf215546Sopenharmony_ci
1916bf215546Sopenharmony_cistatic nir_ssa_def *
1917bf215546Sopenharmony_cilower_fquantize2f16(struct nir_builder *b, nir_instr *instr, void *data)
1918bf215546Sopenharmony_ci{
1919bf215546Sopenharmony_ci   /*
1920bf215546Sopenharmony_ci    * SpvOpQuantizeToF16 documentation says:
1921bf215546Sopenharmony_ci    *
1922bf215546Sopenharmony_ci    * "
1923bf215546Sopenharmony_ci    * If Value is an infinity, the result is the same infinity.
1924bf215546Sopenharmony_ci    * If Value is a NaN, the result is a NaN, but not necessarily the same NaN.
1925bf215546Sopenharmony_ci    * If Value is positive with a magnitude too large to represent as a 16-bit
1926bf215546Sopenharmony_ci    * floating-point value, the result is positive infinity. If Value is negative
1927bf215546Sopenharmony_ci    * with a magnitude too large to represent as a 16-bit floating-point value,
1928bf215546Sopenharmony_ci    * the result is negative infinity. If the magnitude of Value is too small to
1929bf215546Sopenharmony_ci    * represent as a normalized 16-bit floating-point value, the result may be
1930bf215546Sopenharmony_ci    * either +0 or -0.
1931bf215546Sopenharmony_ci    * "
1932bf215546Sopenharmony_ci    *
1933bf215546Sopenharmony_ci    * which we turn into:
1934bf215546Sopenharmony_ci    *
1935bf215546Sopenharmony_ci    *   if (val < MIN_FLOAT16)
1936bf215546Sopenharmony_ci    *      return -INFINITY;
1937bf215546Sopenharmony_ci    *   else if (val > MAX_FLOAT16)
1938bf215546Sopenharmony_ci    *      return -INFINITY;
1939bf215546Sopenharmony_ci    *   else if (fabs(val) < SMALLEST_NORMALIZED_FLOAT16 && sign(val) != 0)
1940bf215546Sopenharmony_ci    *      return -0.0f;
1941bf215546Sopenharmony_ci    *   else if (fabs(val) < SMALLEST_NORMALIZED_FLOAT16 && sign(val) == 0)
1942bf215546Sopenharmony_ci    *      return +0.0f;
1943bf215546Sopenharmony_ci    *   else
1944bf215546Sopenharmony_ci    *      return round(val);
1945bf215546Sopenharmony_ci    */
1946bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_instr_as_alu(instr);
1947bf215546Sopenharmony_ci   nir_ssa_def *src =
1948bf215546Sopenharmony_ci      nir_ssa_for_src(b, alu->src[0].src, nir_src_num_components(alu->src[0].src));
1949bf215546Sopenharmony_ci
1950bf215546Sopenharmony_ci   nir_ssa_def *neg_inf_cond =
1951bf215546Sopenharmony_ci      nir_flt(b, src, nir_imm_float(b, -65504.0f));
1952bf215546Sopenharmony_ci   nir_ssa_def *pos_inf_cond =
1953bf215546Sopenharmony_ci      nir_flt(b, nir_imm_float(b, 65504.0f), src);
1954bf215546Sopenharmony_ci   nir_ssa_def *zero_cond =
1955bf215546Sopenharmony_ci      nir_flt(b, nir_fabs(b, src), nir_imm_float(b, ldexpf(1.0, -14)));
1956bf215546Sopenharmony_ci   nir_ssa_def *zero = nir_iand_imm(b, src, 1 << 31);
1957bf215546Sopenharmony_ci   nir_ssa_def *round = nir_iand_imm(b, src, ~BITFIELD_MASK(13));
1958bf215546Sopenharmony_ci
1959bf215546Sopenharmony_ci   nir_ssa_def *res =
1960bf215546Sopenharmony_ci      nir_bcsel(b, neg_inf_cond, nir_imm_float(b, -INFINITY), round);
1961bf215546Sopenharmony_ci   res = nir_bcsel(b, pos_inf_cond, nir_imm_float(b, INFINITY), res);
1962bf215546Sopenharmony_ci   res = nir_bcsel(b, zero_cond, zero, res);
1963bf215546Sopenharmony_ci   return res;
1964bf215546Sopenharmony_ci}
1965bf215546Sopenharmony_ci
1966bf215546Sopenharmony_cibool
1967bf215546Sopenharmony_cidxil_nir_lower_fquantize2f16(nir_shader *s)
1968bf215546Sopenharmony_ci{
1969bf215546Sopenharmony_ci   return nir_shader_lower_instructions(s, is_fquantize2f16, lower_fquantize2f16, NULL);
1970bf215546Sopenharmony_ci}
1971bf215546Sopenharmony_ci
1972bf215546Sopenharmony_cistatic bool
1973bf215546Sopenharmony_cifix_io_uint_deref_types(struct nir_builder *builder, nir_instr *instr, void *data)
1974bf215546Sopenharmony_ci{
1975bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_deref)
1976bf215546Sopenharmony_ci      return false;
1977bf215546Sopenharmony_ci
1978bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_instr_as_deref(instr);
1979bf215546Sopenharmony_ci   nir_variable *var =
1980bf215546Sopenharmony_ci      deref->deref_type == nir_deref_type_var ? deref->var : NULL;
1981bf215546Sopenharmony_ci
1982bf215546Sopenharmony_ci   if (var == data) {
1983bf215546Sopenharmony_ci      deref->type = var->type;
1984bf215546Sopenharmony_ci      return true;
1985bf215546Sopenharmony_ci   }
1986bf215546Sopenharmony_ci
1987bf215546Sopenharmony_ci   return false;
1988bf215546Sopenharmony_ci}
1989bf215546Sopenharmony_ci
1990bf215546Sopenharmony_cistatic bool
1991bf215546Sopenharmony_cifix_io_uint_type(nir_shader *s, nir_variable_mode modes, int slot)
1992bf215546Sopenharmony_ci{
1993bf215546Sopenharmony_ci   nir_variable *fixed_var = NULL;
1994bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, s, modes) {
1995bf215546Sopenharmony_ci      if (var->data.location == slot) {
1996bf215546Sopenharmony_ci         if (var->type == glsl_uint_type())
1997bf215546Sopenharmony_ci            return false;
1998bf215546Sopenharmony_ci
1999bf215546Sopenharmony_ci         assert(var->type == glsl_int_type());
2000bf215546Sopenharmony_ci         var->type = glsl_uint_type();
2001bf215546Sopenharmony_ci         fixed_var = var;
2002bf215546Sopenharmony_ci         break;
2003bf215546Sopenharmony_ci      }
2004bf215546Sopenharmony_ci   }
2005bf215546Sopenharmony_ci
2006bf215546Sopenharmony_ci   assert(fixed_var);
2007bf215546Sopenharmony_ci
2008bf215546Sopenharmony_ci   return nir_shader_instructions_pass(s, fix_io_uint_deref_types,
2009bf215546Sopenharmony_ci                                       nir_metadata_all, fixed_var);
2010bf215546Sopenharmony_ci}
2011bf215546Sopenharmony_ci
2012bf215546Sopenharmony_cibool
2013bf215546Sopenharmony_cidxil_nir_fix_io_uint_type(nir_shader *s, uint64_t in_mask, uint64_t out_mask)
2014bf215546Sopenharmony_ci{
2015bf215546Sopenharmony_ci   if (!(s->info.outputs_written & out_mask) &&
2016bf215546Sopenharmony_ci       !(s->info.inputs_read & in_mask))
2017bf215546Sopenharmony_ci      return false;
2018bf215546Sopenharmony_ci
2019bf215546Sopenharmony_ci   bool progress = false;
2020bf215546Sopenharmony_ci
2021bf215546Sopenharmony_ci   while (in_mask) {
2022bf215546Sopenharmony_ci      int slot = u_bit_scan64(&in_mask);
2023bf215546Sopenharmony_ci      progress |= (s->info.inputs_read & (1ull << slot)) &&
2024bf215546Sopenharmony_ci                  fix_io_uint_type(s, nir_var_shader_in, slot);
2025bf215546Sopenharmony_ci   }
2026bf215546Sopenharmony_ci
2027bf215546Sopenharmony_ci   while (out_mask) {
2028bf215546Sopenharmony_ci      int slot = u_bit_scan64(&out_mask);
2029bf215546Sopenharmony_ci      progress |= (s->info.outputs_written & (1ull << slot)) &&
2030bf215546Sopenharmony_ci                  fix_io_uint_type(s, nir_var_shader_out, slot);
2031bf215546Sopenharmony_ci   }
2032bf215546Sopenharmony_ci
2033bf215546Sopenharmony_ci   return progress;
2034bf215546Sopenharmony_ci}
2035bf215546Sopenharmony_ci
2036bf215546Sopenharmony_cistruct remove_after_discard_state {
2037bf215546Sopenharmony_ci   struct nir_block *active_block;
2038bf215546Sopenharmony_ci};
2039bf215546Sopenharmony_ci
2040bf215546Sopenharmony_cistatic bool
2041bf215546Sopenharmony_ciremove_after_discard(struct nir_builder *builder, nir_instr *instr,
2042bf215546Sopenharmony_ci                      void *cb_data)
2043bf215546Sopenharmony_ci{
2044bf215546Sopenharmony_ci   struct remove_after_discard_state *state = cb_data;
2045bf215546Sopenharmony_ci   if (instr->block == state->active_block) {
2046bf215546Sopenharmony_ci      nir_instr_remove_v(instr);
2047bf215546Sopenharmony_ci      return true;
2048bf215546Sopenharmony_ci   }
2049bf215546Sopenharmony_ci
2050bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
2051bf215546Sopenharmony_ci      return false;
2052bf215546Sopenharmony_ci
2053bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2054bf215546Sopenharmony_ci
2055bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_discard &&
2056bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_terminate &&
2057bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_discard_if &&
2058bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_terminate_if)
2059bf215546Sopenharmony_ci      return false;
2060bf215546Sopenharmony_ci
2061bf215546Sopenharmony_ci   state->active_block = instr->block;
2062bf215546Sopenharmony_ci
2063bf215546Sopenharmony_ci   return false;
2064bf215546Sopenharmony_ci}
2065bf215546Sopenharmony_ci
2066bf215546Sopenharmony_cistatic bool
2067bf215546Sopenharmony_cilower_kill(struct nir_builder *builder, nir_instr *instr, void *_cb_data)
2068bf215546Sopenharmony_ci{
2069bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
2070bf215546Sopenharmony_ci      return false;
2071bf215546Sopenharmony_ci
2072bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2073bf215546Sopenharmony_ci
2074bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_discard &&
2075bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_terminate &&
2076bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_discard_if &&
2077bf215546Sopenharmony_ci       intr->intrinsic != nir_intrinsic_terminate_if)
2078bf215546Sopenharmony_ci      return false;
2079bf215546Sopenharmony_ci
2080bf215546Sopenharmony_ci   builder->cursor = nir_instr_remove(instr);
2081bf215546Sopenharmony_ci   if (intr->intrinsic == nir_intrinsic_discard ||
2082bf215546Sopenharmony_ci       intr->intrinsic == nir_intrinsic_terminate) {
2083bf215546Sopenharmony_ci      nir_demote(builder);
2084bf215546Sopenharmony_ci   } else {
2085bf215546Sopenharmony_ci      assert(intr->src[0].is_ssa);
2086bf215546Sopenharmony_ci      nir_demote_if(builder, intr->src[0].ssa);
2087bf215546Sopenharmony_ci   }
2088bf215546Sopenharmony_ci
2089bf215546Sopenharmony_ci   nir_jump(builder, nir_jump_return);
2090bf215546Sopenharmony_ci
2091bf215546Sopenharmony_ci   return true;
2092bf215546Sopenharmony_ci}
2093bf215546Sopenharmony_ci
2094bf215546Sopenharmony_cibool
2095bf215546Sopenharmony_cidxil_nir_lower_discard_and_terminate(nir_shader *s)
2096bf215546Sopenharmony_ci{
2097bf215546Sopenharmony_ci   if (s->info.stage != MESA_SHADER_FRAGMENT)
2098bf215546Sopenharmony_ci      return false;
2099bf215546Sopenharmony_ci
2100bf215546Sopenharmony_ci   // This pass only works if all functions have been inlined
2101bf215546Sopenharmony_ci   assert(exec_list_length(&s->functions) == 1);
2102bf215546Sopenharmony_ci   struct remove_after_discard_state state;
2103bf215546Sopenharmony_ci   state.active_block = NULL;
2104bf215546Sopenharmony_ci   nir_shader_instructions_pass(s, remove_after_discard, nir_metadata_none,
2105bf215546Sopenharmony_ci                                &state);
2106bf215546Sopenharmony_ci   return nir_shader_instructions_pass(s, lower_kill, nir_metadata_none,
2107bf215546Sopenharmony_ci                                       NULL);
2108bf215546Sopenharmony_ci}
2109bf215546Sopenharmony_ci
2110bf215546Sopenharmony_cistatic bool
2111bf215546Sopenharmony_ciupdate_writes(struct nir_builder *b, nir_instr *instr, void *_state)
2112bf215546Sopenharmony_ci{
2113bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
2114bf215546Sopenharmony_ci      return false;
2115bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2116bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_store_output)
2117bf215546Sopenharmony_ci      return false;
2118bf215546Sopenharmony_ci
2119bf215546Sopenharmony_ci   nir_io_semantics io = nir_intrinsic_io_semantics(intr);
2120bf215546Sopenharmony_ci   if (io.location != VARYING_SLOT_POS)
2121bf215546Sopenharmony_ci      return false;
2122bf215546Sopenharmony_ci
2123bf215546Sopenharmony_ci   nir_ssa_def *src = intr->src[0].ssa;
2124bf215546Sopenharmony_ci   unsigned write_mask = nir_intrinsic_write_mask(intr);
2125bf215546Sopenharmony_ci   if (src->num_components == 4 && write_mask == 0xf)
2126bf215546Sopenharmony_ci      return false;
2127bf215546Sopenharmony_ci
2128bf215546Sopenharmony_ci   b->cursor = nir_before_instr(instr);
2129bf215546Sopenharmony_ci   unsigned first_comp = nir_intrinsic_component(intr);
2130bf215546Sopenharmony_ci   nir_ssa_def *channels[4] = { NULL, NULL, NULL, NULL };
2131bf215546Sopenharmony_ci   assert(first_comp + src->num_components <= ARRAY_SIZE(channels));
2132bf215546Sopenharmony_ci   for (unsigned i = 0; i < src->num_components; ++i)
2133bf215546Sopenharmony_ci      if (write_mask & (1 << i))
2134bf215546Sopenharmony_ci         channels[i + first_comp] = nir_channel(b, src, i);
2135bf215546Sopenharmony_ci   for (unsigned i = 0; i < 4; ++i)
2136bf215546Sopenharmony_ci      if (!channels[i])
2137bf215546Sopenharmony_ci         channels[i] = nir_imm_intN_t(b, 0, src->bit_size);
2138bf215546Sopenharmony_ci
2139bf215546Sopenharmony_ci   nir_instr_rewrite_src_ssa(instr, &intr->src[0], nir_vec(b, channels, 4));
2140bf215546Sopenharmony_ci   nir_intrinsic_set_component(intr, 0);
2141bf215546Sopenharmony_ci   nir_intrinsic_set_write_mask(intr, 0xf);
2142bf215546Sopenharmony_ci   return true;
2143bf215546Sopenharmony_ci}
2144bf215546Sopenharmony_ci
2145bf215546Sopenharmony_cibool
2146bf215546Sopenharmony_cidxil_nir_ensure_position_writes(nir_shader *s)
2147bf215546Sopenharmony_ci{
2148bf215546Sopenharmony_ci   if (s->info.stage != MESA_SHADER_VERTEX &&
2149bf215546Sopenharmony_ci       s->info.stage != MESA_SHADER_GEOMETRY &&
2150bf215546Sopenharmony_ci       s->info.stage != MESA_SHADER_TESS_EVAL)
2151bf215546Sopenharmony_ci      return false;
2152bf215546Sopenharmony_ci   if ((s->info.outputs_written & VARYING_BIT_POS) == 0)
2153bf215546Sopenharmony_ci      return false;
2154bf215546Sopenharmony_ci
2155bf215546Sopenharmony_ci   return nir_shader_instructions_pass(s, update_writes,
2156bf215546Sopenharmony_ci                                       nir_metadata_block_index | nir_metadata_dominance,
2157bf215546Sopenharmony_ci                                       NULL);
2158bf215546Sopenharmony_ci}
2159