1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "ac_shader_util.h"
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "radv_private.h"
28 #include "radv_shader.h"
29 #include "radv_shader_args.h"
30 
31 typedef struct {
32    enum amd_gfx_level gfx_level;
33    uint32_t address32_hi;
34    bool disable_aniso_single_level;
35    bool has_image_load_dcc_bug;
36 
37    const struct radv_shader_args *args;
38    const struct radv_shader_info *info;
39    const struct radv_pipeline_layout *pipeline_layout;
40 } apply_layout_state;
41 
42 static nir_ssa_def *
get_scalar_arg(nir_builder *b, unsigned size, struct ac_arg arg)43 get_scalar_arg(nir_builder *b, unsigned size, struct ac_arg arg)
44 {
45    assert(arg.used);
46    return nir_load_scalar_arg_amd(b, size, .base = arg.arg_index);
47 }
48 
49 static nir_ssa_def *
convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_ssa_def *ptr)50 convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_ssa_def *ptr)
51 {
52    return nir_pack_64_2x32_split(b, ptr, nir_imm_int(b, state->address32_hi));
53 }
54 
55 static nir_ssa_def *
load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set)56 load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set)
57 {
58    const struct radv_userdata_locations *user_sgprs_locs = &state->info->user_sgprs_locs;
59    if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) {
60       nir_ssa_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
61       addr = convert_pointer_to_64_bit(b, state, addr);
62       return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4));
63    }
64 
65    assert(state->args->descriptor_sets[set].used);
66    return get_scalar_arg(b, 1, state->args->descriptor_sets[set]);
67 }
68 
69 static void
visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)70 visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
71 {
72    unsigned desc_set = nir_intrinsic_desc_set(intrin);
73    unsigned binding = nir_intrinsic_binding(intrin);
74    struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout;
75    unsigned offset = layout->binding[binding].offset;
76    unsigned stride;
77 
78    nir_ssa_def *set_ptr;
79    if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
80        layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
81       unsigned idx = state->pipeline_layout->set[desc_set].dynamic_offset_start +
82                      layout->binding[binding].dynamic_offset_offset;
83       set_ptr = get_scalar_arg(b, 1, state->args->ac.push_constants);
84       offset = state->pipeline_layout->push_constant_size + idx * 16;
85       stride = 16;
86    } else {
87       set_ptr = load_desc_ptr(b, state, desc_set);
88       stride = layout->binding[binding].size;
89    }
90 
91    nir_ssa_def *binding_ptr = nir_imul_imm(b, intrin->src[0].ssa, stride);
92    nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true;
93 
94    binding_ptr = nir_iadd_imm(b, binding_ptr, offset);
95    nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true;
96 
97    if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
98       assert(stride == 16);
99       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
100    } else {
101       nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
102                                nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
103    }
104    nir_instr_remove(&intrin->instr);
105 }
106 
107 static void
visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)108 visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state,
109                               nir_intrinsic_instr *intrin)
110 {
111    VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
112    if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
113       nir_ssa_def *set_ptr = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
114       nir_ssa_def *binding_ptr = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
115 
116       nir_ssa_def *index = nir_imul_imm(b, intrin->src[1].ssa, 16);
117       nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
118 
119       binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
120 
121       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
122    } else {
123       assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
124              desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
125 
126       nir_ssa_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
127       nir_ssa_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
128 
129       nir_ssa_def *index = nir_imul(b, intrin->src[1].ssa, stride);
130       nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
131 
132       binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
133 
134       nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
135                                nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
136    }
137    nir_instr_remove(&intrin->instr);
138 }
139 
140 static void
visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)141 visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
142 {
143    if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
144       nir_ssa_def *addr = convert_pointer_to_64_bit(
145          b, state,
146          nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
147                      nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
148       nir_ssa_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
149 
150       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
151    } else {
152       nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
153                                nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
154    }
155    nir_instr_remove(&intrin->instr);
156 }
157 
158 static nir_ssa_def *
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc)159 load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc)
160 {
161    uint32_t desc_type =
162       S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
163       S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
164    if (state->gfx_level >= GFX11) {
165       desc_type |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
166                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
167    } else if (state->gfx_level >= GFX10) {
168       desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
169                    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
170    } else {
171       desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
172                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
173    }
174 
175    return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)),
176                    nir_imm_int(b, 0xffffffff), nir_imm_int(b, desc_type));
177 }
178 
179 static nir_ssa_def *
load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc, unsigned access)180 load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc,
181                        unsigned access)
182 {
183    nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc));
184 
185    /* If binding.success=false, then this is a variable pointer, which we don't support with
186     * VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK.
187     */
188    if (binding.success) {
189       struct radv_descriptor_set_layout *layout =
190          state->pipeline_layout->set[binding.desc_set].layout;
191       if (layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
192          rsrc = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
193          return load_inline_buffer_descriptor(b, state, rsrc);
194       }
195    }
196 
197    if (access & ACCESS_NON_UNIFORM)
198       return nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
199 
200    nir_ssa_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
201    return nir_load_smem_amd(b, 4, desc_set, nir_channel(b, rsrc, 1), .align_mul = 16);
202 }
203 
204 static void
visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)205 visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
206 {
207    nir_ssa_def *rsrc = intrin->src[0].ssa;
208 
209    nir_ssa_def *size;
210    if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM) {
211       nir_ssa_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
212       ptr = nir_iadd_imm(b, ptr, 8);
213       ptr = convert_pointer_to_64_bit(b, state, ptr);
214       size =
215          nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER,
216                                .align_mul = 16, .align_offset = 4);
217    } else {
218       /* load the entire descriptor so it can be CSE'd */
219       nir_ssa_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
220       nir_ssa_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16);
221       size = nir_channel(b, desc, 2);
222    }
223 
224    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, size);
225    nir_instr_remove(&intrin->instr);
226 }
227 
228 static nir_ssa_def *
get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex, bool write)229 get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref,
230                  enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex,
231                  bool write)
232 {
233    nir_variable *var = nir_deref_instr_get_variable(deref);
234    assert(var);
235    unsigned desc_set = var->data.descriptor_set;
236    unsigned binding_index = var->data.binding;
237    bool indirect = nir_deref_instr_has_indirect(deref);
238 
239    struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout;
240    struct radv_descriptor_set_binding_layout *binding = &layout->binding[binding_index];
241 
242    /* Handle immutable (compile-time) samplers (VkDescriptorSetLayoutBinding::pImmutableSamplers)
243     * We can only do this for constant array index or if all samplers in the array are the same.
244     */
245    if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
246        (!indirect || binding->immutable_samplers_equal)) {
247       unsigned constant_index = 0;
248       if (!binding->immutable_samplers_equal) {
249          while (deref->deref_type != nir_deref_type_var) {
250             assert(deref->deref_type == nir_deref_type_array);
251             unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
252             constant_index += nir_src_as_uint(deref->arr.index) * array_size;
253             deref = nir_deref_instr_parent(deref);
254          }
255       }
256 
257       uint32_t dword0_mask = tex->op == nir_texop_tg4 ? C_008F30_TRUNC_COORD : 0xffffffffu;
258       const uint32_t *samplers = radv_immutable_samplers(layout, binding);
259       return nir_imm_ivec4(b, samplers[constant_index * 4 + 0] & dword0_mask, samplers[constant_index * 4 + 1],
260                            samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]);
261    }
262 
263    unsigned size = 8;
264    unsigned offset = binding->offset;
265    switch (desc_type) {
266    case AC_DESC_IMAGE:
267    case AC_DESC_PLANE_0:
268       break;
269    case AC_DESC_FMASK:
270    case AC_DESC_PLANE_1:
271       offset += 32;
272       break;
273    case AC_DESC_SAMPLER:
274       size = 4;
275       if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
276          offset += radv_combined_image_descriptor_sampler_offset(binding);
277       break;
278    case AC_DESC_BUFFER:
279       size = 4;
280       break;
281    case AC_DESC_PLANE_2:
282       size = 4;
283       offset += 64;
284       break;
285    }
286 
287    nir_ssa_def *index = NULL;
288    while (deref->deref_type != nir_deref_type_var) {
289       assert(deref->deref_type == nir_deref_type_array);
290       unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
291       array_size *= binding->size;
292 
293       nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
294       if (tmp != deref->arr.index.ssa)
295          nir_instr_as_alu(tmp->parent_instr)->no_unsigned_wrap = true;
296 
297       if (index) {
298          index = nir_iadd(b, tmp, index);
299          nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
300       } else {
301          index = tmp;
302       }
303 
304       deref = nir_deref_instr_parent(deref);
305    }
306 
307    nir_ssa_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
308    if (index && index_offset != index)
309       nir_instr_as_alu(index_offset->parent_instr)->no_unsigned_wrap = true;
310 
311    if (non_uniform)
312       return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
313 
314    nir_ssa_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
315    nir_ssa_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
316 
317    /* 3 plane formats always have same size and format for plane 1 & 2, so
318     * use the tail from plane 1 so that we can store only the first 16 bytes
319     * of the last plane. */
320    if (desc_type == AC_DESC_PLANE_2) {
321       nir_ssa_def *desc2 =
322          get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
323 
324       nir_ssa_def *comp[8];
325       for (unsigned i = 0; i < 4; i++)
326          comp[i] = nir_channel(b, desc, i);
327       for (unsigned i = 4; i < 8; i++)
328          comp[i] = nir_channel(b, desc2, i);
329 
330       return nir_vec(b, comp, 8);
331    } else if (desc_type == AC_DESC_IMAGE && state->has_image_load_dcc_bug && !tex && !write) {
332       nir_ssa_def *comp[8];
333       for (unsigned i = 0; i < 8; i++)
334          comp[i] = nir_channel(b, desc, i);
335 
336       /* WRITE_COMPRESS_ENABLE must be 0 for all image loads to workaround a
337        * hardware bug.
338        */
339       comp[6] = nir_iand_imm(b, comp[6], C_00A018_WRITE_COMPRESS_ENABLE);
340 
341       return nir_vec(b, comp, 8);
342    } else if (desc_type == AC_DESC_SAMPLER && tex->op == nir_texop_tg4) {
343       nir_ssa_def *comp[4];
344       for (unsigned i = 0; i < 4; i++)
345          comp[i] = nir_channel(b, desc, i);
346 
347       /* We want to always use the linear filtering truncation behaviour for
348        * nir_texop_tg4, even if the sampler uses nearest/point filtering.
349        */
350       comp[0] = nir_iand_imm(b, comp[0], C_008F30_TRUNC_COORD);
351 
352       return nir_vec(b, comp, 4);
353    }
354 
355    return desc;
356 }
357 
358 static void
update_image_intrinsic(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)359 update_image_intrinsic(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
360 {
361    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
362    const enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
363    bool is_load = intrin->intrinsic == nir_intrinsic_image_deref_load ||
364                   intrin->intrinsic == nir_intrinsic_image_deref_sparse_load;
365 
366    nir_ssa_def *desc = get_sampler_desc(
367       b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
368       nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
369    nir_rewrite_image_intrinsic(intrin, desc, true);
370 }
371 
372 static void
apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)373 apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
374 {
375    b->cursor = nir_before_instr(&intrin->instr);
376 
377    nir_ssa_def *rsrc;
378    switch (intrin->intrinsic) {
379    case nir_intrinsic_vulkan_resource_index:
380       visit_vulkan_resource_index(b, state, intrin);
381       break;
382    case nir_intrinsic_vulkan_resource_reindex:
383       visit_vulkan_resource_reindex(b, state, intrin);
384       break;
385    case nir_intrinsic_load_vulkan_descriptor:
386       visit_load_vulkan_descriptor(b, state, intrin);
387       break;
388    case nir_intrinsic_load_ubo:
389    case nir_intrinsic_load_ssbo:
390    case nir_intrinsic_ssbo_atomic_add:
391    case nir_intrinsic_ssbo_atomic_imin:
392    case nir_intrinsic_ssbo_atomic_umin:
393    case nir_intrinsic_ssbo_atomic_fmin:
394    case nir_intrinsic_ssbo_atomic_imax:
395    case nir_intrinsic_ssbo_atomic_umax:
396    case nir_intrinsic_ssbo_atomic_fmax:
397    case nir_intrinsic_ssbo_atomic_and:
398    case nir_intrinsic_ssbo_atomic_or:
399    case nir_intrinsic_ssbo_atomic_xor:
400    case nir_intrinsic_ssbo_atomic_exchange:
401    case nir_intrinsic_ssbo_atomic_comp_swap:
402       rsrc = load_buffer_descriptor(b, state, intrin->src[0].ssa, nir_intrinsic_access(intrin));
403       nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[0], rsrc);
404       break;
405    case nir_intrinsic_store_ssbo:
406       rsrc = load_buffer_descriptor(b, state, intrin->src[1].ssa, nir_intrinsic_access(intrin));
407       nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[1], rsrc);
408       break;
409    case nir_intrinsic_get_ssbo_size:
410       visit_get_ssbo_size(b, state, intrin);
411       break;
412    case nir_intrinsic_image_deref_load:
413    case nir_intrinsic_image_deref_sparse_load:
414    case nir_intrinsic_image_deref_store:
415    case nir_intrinsic_image_deref_atomic_add:
416    case nir_intrinsic_image_deref_atomic_imin:
417    case nir_intrinsic_image_deref_atomic_umin:
418    case nir_intrinsic_image_deref_atomic_fmin:
419    case nir_intrinsic_image_deref_atomic_imax:
420    case nir_intrinsic_image_deref_atomic_umax:
421    case nir_intrinsic_image_deref_atomic_fmax:
422    case nir_intrinsic_image_deref_atomic_and:
423    case nir_intrinsic_image_deref_atomic_or:
424    case nir_intrinsic_image_deref_atomic_xor:
425    case nir_intrinsic_image_deref_atomic_exchange:
426    case nir_intrinsic_image_deref_atomic_comp_swap:
427    case nir_intrinsic_image_deref_atomic_fadd:
428    case nir_intrinsic_image_deref_atomic_inc_wrap:
429    case nir_intrinsic_image_deref_atomic_dec_wrap:
430    case nir_intrinsic_image_deref_size:
431    case nir_intrinsic_image_deref_samples:
432       update_image_intrinsic(b, state, intrin);
433       break;
434    default:
435       break;
436    }
437 }
438 
439 static void
apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *tex)440 apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *tex)
441 {
442    b->cursor = nir_before_instr(&tex->instr);
443 
444    nir_deref_instr *texture_deref_instr = NULL;
445    nir_deref_instr *sampler_deref_instr = NULL;
446    int plane = -1;
447 
448    for (unsigned i = 0; i < tex->num_srcs; i++) {
449       switch (tex->src[i].src_type) {
450       case nir_tex_src_texture_deref:
451          texture_deref_instr = nir_src_as_deref(tex->src[i].src);
452          break;
453       case nir_tex_src_sampler_deref:
454          sampler_deref_instr = nir_src_as_deref(tex->src[i].src);
455          break;
456       case nir_tex_src_plane:
457          plane = nir_src_as_int(tex->src[i].src);
458          break;
459       default:
460          break;
461       }
462    }
463 
464    nir_ssa_def *image = NULL;
465    nir_ssa_def *sampler = NULL;
466    if (plane >= 0) {
467       assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
468       assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
469       image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane,
470                                tex->texture_non_uniform, tex, false);
471    } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
472       image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER,
473                                tex->texture_non_uniform, tex, false);
474    } else if (tex->op == nir_texop_fragment_mask_fetch_amd) {
475       image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK,
476                                tex->texture_non_uniform, tex, false);
477    } else {
478       image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE,
479                                tex->texture_non_uniform, tex, false);
480    }
481 
482    if (sampler_deref_instr) {
483       sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER,
484                                  tex->sampler_non_uniform, tex, false);
485 
486       if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT &&
487           state->gfx_level < GFX8) {
488          /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
489           *
490           * GFX6-GFX7:
491           *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
492           *   filtering manually. The driver sets img7 to a mask clearing
493           *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
494           *     s_and_b32 samp0, samp0, img7
495           *
496           * GFX8:
497           *   The ANISO_OVERRIDE sampler field enables this fix in TA.
498           */
499          /* TODO: This is unnecessary for combined image+sampler.
500           * We can do this when updating the desc set. */
501          nir_ssa_def *comp[4];
502          for (unsigned i = 0; i < 4; i++)
503             comp[i] = nir_channel(b, sampler, i);
504          comp[0] = nir_iand(b, comp[0], nir_channel(b, image, 7));
505 
506          sampler = nir_vec(b, comp, 4);
507       }
508    }
509 
510    for (unsigned i = 0; i < tex->num_srcs; i++) {
511       switch (tex->src[i].src_type) {
512       case nir_tex_src_texture_deref:
513          tex->src[i].src_type = nir_tex_src_texture_handle;
514          nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, image);
515          break;
516       case nir_tex_src_sampler_deref:
517          tex->src[i].src_type = nir_tex_src_sampler_handle;
518          nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, sampler);
519          break;
520       default:
521          break;
522       }
523    }
524 }
525 
526 void
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, const struct radv_pipeline_layout *layout, const struct radv_shader_info *info, const struct radv_shader_args *args)527 radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
528                                const struct radv_pipeline_layout *layout,
529                                const struct radv_shader_info *info,
530                                const struct radv_shader_args *args)
531 {
532    apply_layout_state state = {
533       .gfx_level = device->physical_device->rad_info.gfx_level,
534       .address32_hi = device->physical_device->rad_info.address32_hi,
535       .disable_aniso_single_level = device->instance->disable_aniso_single_level,
536       .has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug,
537       .args = args,
538       .info = info,
539       .pipeline_layout = layout,
540    };
541 
542    nir_builder b;
543 
544    nir_foreach_function (function, shader) {
545       if (!function->impl)
546          continue;
547 
548       nir_builder_init(&b, function->impl);
549 
550       /* Iterate in reverse so load_ubo lowering can look at
551        * the vulkan_resource_index to tell if it's an inline
552        * ubo.
553        */
554       nir_foreach_block_reverse (block, function->impl) {
555          nir_foreach_instr_reverse_safe (instr, block) {
556             if (instr->type == nir_instr_type_tex)
557                apply_layout_to_tex(&b, &state, nir_instr_as_tex(instr));
558             else if (instr->type == nir_instr_type_intrinsic)
559                apply_layout_to_intrin(&b, &state, nir_instr_as_intrinsic(instr));
560          }
561       }
562 
563       nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
564    }
565 }
566