1/* 2 * Copyright © 2022 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "ac_nir.h" 27#include "radv_constants.h" 28#include "radv_private.h" 29#include "radv_shader.h" 30#include "radv_shader_args.h" 31 32typedef struct { 33 enum amd_gfx_level gfx_level; 34 const struct radv_shader_args *args; 35 const struct radv_shader_info *info; 36 const struct radv_pipeline_key *pl_key; 37 bool use_llvm; 38} lower_abi_state; 39 40static nir_ssa_def * 41load_ring(nir_builder *b, unsigned ring, lower_abi_state *s) 42{ 43 struct ac_arg arg = 44 b->shader->info.stage == MESA_SHADER_TASK ? 45 s->args->task_ring_offsets : 46 s->args->ring_offsets; 47 48 nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg); 49 ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1)); 50 return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u); 51} 52 53static nir_ssa_def * 54nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s) 55{ 56 nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings); 57 return nir_test_mask(b, settings, mask); 58} 59 60static nir_ssa_def * 61lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) 62{ 63 lower_abi_state *s = (lower_abi_state *) state; 64 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 65 gl_shader_stage stage = b->shader->info.stage; 66 67 switch (intrin->intrinsic) { 68 case nir_intrinsic_load_ring_tess_factors_amd: 69 return load_ring(b, RING_HS_TESS_FACTOR, s); 70 71 case nir_intrinsic_load_ring_tess_factors_offset_amd: 72 return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset); 73 74 case nir_intrinsic_load_ring_tess_offchip_amd: 75 return load_ring(b, RING_HS_TESS_OFFCHIP, s); 76 77 case nir_intrinsic_load_ring_tess_offchip_offset_amd: 78 return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset); 79 80 case nir_intrinsic_load_tcs_num_patches_amd: 81 return nir_imm_int(b, s->info->num_tess_patches); 82 83 case nir_intrinsic_load_ring_esgs_amd: 84 return load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s); 85 86 case nir_intrinsic_load_ring_es2gs_offset_amd: 87 return ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset); 88 89 case nir_intrinsic_load_tess_rel_patch_id_amd: 90 if (stage == MESA_SHADER_TESS_CTRL) { 91 return nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0)); 92 } else if (stage == MESA_SHADER_TESS_EVAL) { 93 /* Setting an upper bound like this will actually make it possible 94 * to optimize some multiplications (in address calculations) so that 95 * constant additions can be added to the const offset in memory load instructions. 96 */ 97 nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id); 98 nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr); 99 nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1)); 100 return arg; 101 } else { 102 unreachable("invalid tessellation shader stage"); 103 } 104 105 case nir_intrinsic_load_patch_vertices_in: 106 if (stage == MESA_SHADER_TESS_CTRL) 107 return nir_imm_int(b, s->pl_key->tcs.tess_input_vertices); 108 else if (stage == MESA_SHADER_TESS_EVAL) 109 return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out); 110 else 111 unreachable("invalid tessellation shader stage"); 112 113 case nir_intrinsic_load_gs_vertex_offset_amd: 114 return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]); 115 116 case nir_intrinsic_load_workgroup_num_input_vertices_amd: 117 return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 118 nir_imm_int(b, 12), nir_imm_int(b, 9)); 119 120 case nir_intrinsic_load_workgroup_num_input_primitives_amd: 121 return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 122 nir_imm_int(b, 22), nir_imm_int(b, 9)); 123 124 case nir_intrinsic_load_packed_passthrough_primitive_amd: 125 /* NGG passthrough mode: the HW already packs the primitive export value to a single register. */ 126 return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]); 127 128 case nir_intrinsic_load_shader_query_enabled_amd: 129 return nir_ieq_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_state), 1); 130 131 case nir_intrinsic_load_cull_any_enabled_amd: 132 return nggc_bool_setting(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives, s); 133 134 case nir_intrinsic_load_cull_front_face_enabled_amd: 135 return nggc_bool_setting(b, radv_nggc_front_face, s); 136 137 case nir_intrinsic_load_cull_back_face_enabled_amd: 138 return nggc_bool_setting(b, radv_nggc_back_face, s); 139 140 case nir_intrinsic_load_cull_ccw_amd: 141 return nggc_bool_setting(b, radv_nggc_face_is_ccw, s); 142 143 case nir_intrinsic_load_cull_small_primitives_enabled_amd: 144 return nggc_bool_setting(b, radv_nggc_small_primitives, s); 145 146 case nir_intrinsic_load_cull_small_prim_precision_amd: { 147 /* To save space, only the exponent is stored in the high 8 bits. 148 * We calculate the precision from those 8 bits: 149 * exponent = nggc_settings >> 24 150 * precision = 1.0 * 2 ^ exponent 151 */ 152 nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings); 153 nir_ssa_def *exponent = nir_ishr_imm(b, settings, 24u); 154 return nir_ldexp(b, nir_imm_float(b, 1.0f), exponent); 155 } 156 157 case nir_intrinsic_load_viewport_x_scale: 158 return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]); 159 160 case nir_intrinsic_load_viewport_x_offset: 161 return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]); 162 163 case nir_intrinsic_load_viewport_y_scale: 164 return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]); 165 166 case nir_intrinsic_load_viewport_y_offset: 167 return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]); 168 169 case nir_intrinsic_load_ring_task_draw_amd: 170 return load_ring(b, RING_TS_DRAW, s); 171 172 case nir_intrinsic_load_ring_task_payload_amd: 173 return load_ring(b, RING_TS_PAYLOAD, s); 174 175 case nir_intrinsic_load_ring_mesh_scratch_amd: 176 return load_ring(b, RING_MS_SCRATCH, s); 177 178 case nir_intrinsic_load_ring_mesh_scratch_offset_amd: 179 /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */ 180 return nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff), 181 RADV_MESH_SCRATCH_ENTRY_BYTES); 182 183 case nir_intrinsic_load_task_ring_entry_amd: 184 return ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry); 185 186 case nir_intrinsic_load_task_ib_addr: 187 return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr); 188 189 case nir_intrinsic_load_task_ib_stride: 190 return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride); 191 192 case nir_intrinsic_load_lshs_vertex_stride_amd: { 193 unsigned io_num = stage == MESA_SHADER_VERTEX ? 194 s->info->vs.num_linked_outputs : 195 s->info->tcs.num_linked_inputs; 196 return nir_imm_int(b, io_num * 16); 197 } 198 199 case nir_intrinsic_load_hs_out_patch_data_offset_amd: { 200 unsigned num_patches = s->info->num_tess_patches; 201 unsigned out_vertices_per_patch = b->shader->info.tess.tcs_vertices_out; 202 unsigned num_tcs_outputs = stage == MESA_SHADER_TESS_CTRL ? 203 s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs; 204 int per_vertex_output_patch_size = out_vertices_per_patch * num_tcs_outputs * 16u; 205 return nir_imm_int(b, num_patches * per_vertex_output_patch_size); 206 } 207 208 default: 209 unreachable("invalid NIR RADV ABI intrinsic."); 210 } 211} 212 213static bool 214filter_abi_instr(const nir_instr *instr, 215 UNUSED const void *state) 216{ 217 lower_abi_state *s = (lower_abi_state *) state; 218 219 if (instr->type != nir_instr_type_intrinsic) 220 return false; 221 222 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 223 return (intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_amd && !s->use_llvm) || 224 (intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_amd && !s->use_llvm) || 225 (intrin->intrinsic == nir_intrinsic_load_ring_esgs_amd && !s->use_llvm) || 226 intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_offset_amd || 227 intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_offset_amd || 228 intrin->intrinsic == nir_intrinsic_load_patch_vertices_in || 229 intrin->intrinsic == nir_intrinsic_load_tcs_num_patches_amd || 230 intrin->intrinsic == nir_intrinsic_load_ring_es2gs_offset_amd || 231 intrin->intrinsic == nir_intrinsic_load_tess_rel_patch_id_amd || 232 intrin->intrinsic == nir_intrinsic_load_gs_vertex_offset_amd || 233 intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_vertices_amd || 234 intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_primitives_amd || 235 intrin->intrinsic == nir_intrinsic_load_packed_passthrough_primitive_amd || 236 intrin->intrinsic == nir_intrinsic_load_shader_query_enabled_amd || 237 intrin->intrinsic == nir_intrinsic_load_cull_any_enabled_amd || 238 intrin->intrinsic == nir_intrinsic_load_cull_front_face_enabled_amd || 239 intrin->intrinsic == nir_intrinsic_load_cull_back_face_enabled_amd || 240 intrin->intrinsic == nir_intrinsic_load_cull_ccw_amd || 241 intrin->intrinsic == nir_intrinsic_load_cull_small_primitives_enabled_amd || 242 intrin->intrinsic == nir_intrinsic_load_cull_small_prim_precision_amd || 243 intrin->intrinsic == nir_intrinsic_load_viewport_x_scale || 244 intrin->intrinsic == nir_intrinsic_load_viewport_x_offset || 245 intrin->intrinsic == nir_intrinsic_load_viewport_y_scale || 246 intrin->intrinsic == nir_intrinsic_load_viewport_y_offset || 247 intrin->intrinsic == nir_intrinsic_load_ring_task_draw_amd || 248 intrin->intrinsic == nir_intrinsic_load_ring_task_payload_amd || 249 intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_amd || 250 intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_offset_amd || 251 intrin->intrinsic == nir_intrinsic_load_task_ring_entry_amd || 252 intrin->intrinsic == nir_intrinsic_load_task_ib_addr || 253 intrin->intrinsic == nir_intrinsic_load_task_ib_stride || 254 intrin->intrinsic == nir_intrinsic_load_lshs_vertex_stride_amd || 255 intrin->intrinsic == nir_intrinsic_load_hs_out_patch_data_offset_amd; 256} 257 258void 259radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, 260 const struct radv_shader_info *info, const struct radv_shader_args *args, 261 const struct radv_pipeline_key *pl_key, bool use_llvm) 262{ 263 lower_abi_state state = { 264 .gfx_level = gfx_level, 265 .info = info, 266 .args = args, 267 .pl_key = pl_key, 268 .use_llvm = use_llvm, 269 }; 270 271 nir_shader_lower_instructions(shader, filter_abi_instr, lower_abi_instr, &state); 272} 273