1 /*
2  * Copyright © 2022 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "ac_nir.h"
27 #include "radv_constants.h"
28 #include "radv_private.h"
29 #include "radv_shader.h"
30 #include "radv_shader_args.h"
31 
32 typedef struct {
33    enum amd_gfx_level gfx_level;
34    const struct radv_shader_args *args;
35    const struct radv_shader_info *info;
36    const struct radv_pipeline_key *pl_key;
37    bool use_llvm;
38 } lower_abi_state;
39 
40 static nir_ssa_def *
load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)41 load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
42 {
43    struct ac_arg arg =
44       b->shader->info.stage == MESA_SHADER_TASK ?
45       s->args->task_ring_offsets :
46       s->args->ring_offsets;
47 
48    nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
49    ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
50    return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u);
51 }
52 
53 static nir_ssa_def *
nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)54 nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
55 {
56    nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
57    return nir_test_mask(b, settings, mask);
58 }
59 
60 static nir_ssa_def *
lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)61 lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
62 {
63    lower_abi_state *s = (lower_abi_state *) state;
64    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
65    gl_shader_stage stage = b->shader->info.stage;
66 
67    switch (intrin->intrinsic) {
68    case nir_intrinsic_load_ring_tess_factors_amd:
69       return load_ring(b, RING_HS_TESS_FACTOR, s);
70 
71    case nir_intrinsic_load_ring_tess_factors_offset_amd:
72       return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset);
73 
74    case nir_intrinsic_load_ring_tess_offchip_amd:
75       return load_ring(b, RING_HS_TESS_OFFCHIP, s);
76 
77    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
78       return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset);
79 
80    case nir_intrinsic_load_tcs_num_patches_amd:
81       return nir_imm_int(b, s->info->num_tess_patches);
82 
83    case nir_intrinsic_load_ring_esgs_amd:
84       return load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s);
85 
86    case nir_intrinsic_load_ring_es2gs_offset_amd:
87       return ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset);
88 
89    case nir_intrinsic_load_tess_rel_patch_id_amd:
90       if (stage == MESA_SHADER_TESS_CTRL) {
91          return nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0));
92       } else if (stage == MESA_SHADER_TESS_EVAL) {
93          /* Setting an upper bound like this will actually make it possible
94           * to optimize some multiplications (in address calculations) so that
95           * constant additions can be added to the const offset in memory load instructions.
96           */
97          nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
98          nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr);
99          nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1));
100          return arg;
101       } else {
102          unreachable("invalid tessellation shader stage");
103       }
104 
105    case nir_intrinsic_load_patch_vertices_in:
106       if (stage == MESA_SHADER_TESS_CTRL)
107          return nir_imm_int(b, s->pl_key->tcs.tess_input_vertices);
108       else if (stage == MESA_SHADER_TESS_EVAL)
109          return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
110       else
111          unreachable("invalid tessellation shader stage");
112 
113    case nir_intrinsic_load_gs_vertex_offset_amd:
114       return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]);
115 
116    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
117       return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info),
118                          nir_imm_int(b, 12), nir_imm_int(b, 9));
119 
120    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
121       return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info),
122                          nir_imm_int(b, 22), nir_imm_int(b, 9));
123 
124    case nir_intrinsic_load_packed_passthrough_primitive_amd:
125       /* NGG passthrough mode: the HW already packs the primitive export value to a single register. */
126       return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]);
127 
128    case nir_intrinsic_load_shader_query_enabled_amd:
129       return nir_ieq_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_state), 1);
130 
131    case nir_intrinsic_load_cull_any_enabled_amd:
132       return nggc_bool_setting(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives, s);
133 
134    case nir_intrinsic_load_cull_front_face_enabled_amd:
135       return nggc_bool_setting(b, radv_nggc_front_face, s);
136 
137    case nir_intrinsic_load_cull_back_face_enabled_amd:
138       return nggc_bool_setting(b, radv_nggc_back_face, s);
139 
140    case nir_intrinsic_load_cull_ccw_amd:
141       return nggc_bool_setting(b, radv_nggc_face_is_ccw, s);
142 
143    case nir_intrinsic_load_cull_small_primitives_enabled_amd:
144       return nggc_bool_setting(b, radv_nggc_small_primitives, s);
145 
146    case nir_intrinsic_load_cull_small_prim_precision_amd: {
147       /* To save space, only the exponent is stored in the high 8 bits.
148        * We calculate the precision from those 8 bits:
149        * exponent = nggc_settings >> 24
150        * precision = 1.0 * 2 ^ exponent
151        */
152       nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
153       nir_ssa_def *exponent = nir_ishr_imm(b, settings, 24u);
154       return nir_ldexp(b, nir_imm_float(b, 1.0f), exponent);
155    }
156 
157    case nir_intrinsic_load_viewport_x_scale:
158       return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]);
159 
160    case nir_intrinsic_load_viewport_x_offset:
161       return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]);
162 
163    case nir_intrinsic_load_viewport_y_scale:
164       return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]);
165 
166    case nir_intrinsic_load_viewport_y_offset:
167       return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]);
168 
169    case nir_intrinsic_load_ring_task_draw_amd:
170       return load_ring(b, RING_TS_DRAW, s);
171 
172    case nir_intrinsic_load_ring_task_payload_amd:
173       return load_ring(b, RING_TS_PAYLOAD, s);
174 
175    case nir_intrinsic_load_ring_mesh_scratch_amd:
176       return load_ring(b, RING_MS_SCRATCH, s);
177 
178    case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
179       /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */
180       return nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff),
181                                           RADV_MESH_SCRATCH_ENTRY_BYTES);
182 
183    case nir_intrinsic_load_task_ring_entry_amd:
184       return ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
185 
186    case nir_intrinsic_load_task_ib_addr:
187       return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr);
188 
189    case nir_intrinsic_load_task_ib_stride:
190       return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride);
191 
192    case nir_intrinsic_load_lshs_vertex_stride_amd: {
193       unsigned io_num = stage == MESA_SHADER_VERTEX ?
194          s->info->vs.num_linked_outputs :
195          s->info->tcs.num_linked_inputs;
196       return nir_imm_int(b, io_num * 16);
197    }
198 
199    case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
200       unsigned num_patches = s->info->num_tess_patches;
201       unsigned out_vertices_per_patch = b->shader->info.tess.tcs_vertices_out;
202       unsigned num_tcs_outputs = stage == MESA_SHADER_TESS_CTRL ?
203          s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs;
204       int per_vertex_output_patch_size = out_vertices_per_patch * num_tcs_outputs * 16u;
205       return nir_imm_int(b, num_patches * per_vertex_output_patch_size);
206    }
207 
208    default:
209       unreachable("invalid NIR RADV ABI intrinsic.");
210    }
211 }
212 
213 static bool
filter_abi_instr(const nir_instr *instr, UNUSED const void *state)214 filter_abi_instr(const nir_instr *instr,
215                  UNUSED const void *state)
216 {
217    lower_abi_state *s = (lower_abi_state *) state;
218 
219    if (instr->type != nir_instr_type_intrinsic)
220       return false;
221 
222    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
223    return (intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_amd && !s->use_llvm) ||
224           (intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_amd && !s->use_llvm) ||
225           (intrin->intrinsic == nir_intrinsic_load_ring_esgs_amd && !s->use_llvm) ||
226           intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_offset_amd ||
227           intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_offset_amd ||
228           intrin->intrinsic == nir_intrinsic_load_patch_vertices_in ||
229           intrin->intrinsic == nir_intrinsic_load_tcs_num_patches_amd ||
230           intrin->intrinsic == nir_intrinsic_load_ring_es2gs_offset_amd ||
231           intrin->intrinsic == nir_intrinsic_load_tess_rel_patch_id_amd ||
232           intrin->intrinsic == nir_intrinsic_load_gs_vertex_offset_amd ||
233           intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_vertices_amd ||
234           intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_primitives_amd ||
235           intrin->intrinsic == nir_intrinsic_load_packed_passthrough_primitive_amd ||
236           intrin->intrinsic == nir_intrinsic_load_shader_query_enabled_amd ||
237           intrin->intrinsic == nir_intrinsic_load_cull_any_enabled_amd ||
238           intrin->intrinsic == nir_intrinsic_load_cull_front_face_enabled_amd ||
239           intrin->intrinsic == nir_intrinsic_load_cull_back_face_enabled_amd ||
240           intrin->intrinsic == nir_intrinsic_load_cull_ccw_amd ||
241           intrin->intrinsic == nir_intrinsic_load_cull_small_primitives_enabled_amd ||
242           intrin->intrinsic == nir_intrinsic_load_cull_small_prim_precision_amd ||
243           intrin->intrinsic == nir_intrinsic_load_viewport_x_scale ||
244           intrin->intrinsic == nir_intrinsic_load_viewport_x_offset ||
245           intrin->intrinsic == nir_intrinsic_load_viewport_y_scale ||
246           intrin->intrinsic == nir_intrinsic_load_viewport_y_offset ||
247           intrin->intrinsic == nir_intrinsic_load_ring_task_draw_amd ||
248           intrin->intrinsic == nir_intrinsic_load_ring_task_payload_amd ||
249           intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_amd ||
250           intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_offset_amd ||
251           intrin->intrinsic == nir_intrinsic_load_task_ring_entry_amd ||
252           intrin->intrinsic == nir_intrinsic_load_task_ib_addr ||
253           intrin->intrinsic == nir_intrinsic_load_task_ib_stride ||
254           intrin->intrinsic == nir_intrinsic_load_lshs_vertex_stride_amd ||
255           intrin->intrinsic == nir_intrinsic_load_hs_out_patch_data_offset_amd;
256 }
257 
258 void
radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_info *info, const struct radv_shader_args *args, const struct radv_pipeline_key *pl_key, bool use_llvm)259 radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
260                    const struct radv_shader_info *info, const struct radv_shader_args *args,
261                    const struct radv_pipeline_key *pl_key, bool use_llvm)
262 {
263    lower_abi_state state = {
264       .gfx_level = gfx_level,
265       .info = info,
266       .args = args,
267       .pl_key = pl_key,
268       .use_llvm = use_llvm,
269    };
270 
271    nir_shader_lower_instructions(shader, filter_abi_instr, lower_abi_instr, &state);
272 }
273