1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2022 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci#include "ac_nir.h" 27bf215546Sopenharmony_ci#include "radv_constants.h" 28bf215546Sopenharmony_ci#include "radv_private.h" 29bf215546Sopenharmony_ci#include "radv_shader.h" 30bf215546Sopenharmony_ci#include "radv_shader_args.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_citypedef struct { 33bf215546Sopenharmony_ci enum amd_gfx_level gfx_level; 34bf215546Sopenharmony_ci const struct radv_shader_args *args; 35bf215546Sopenharmony_ci const struct radv_shader_info *info; 36bf215546Sopenharmony_ci const struct radv_pipeline_key *pl_key; 37bf215546Sopenharmony_ci bool use_llvm; 38bf215546Sopenharmony_ci} lower_abi_state; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistatic nir_ssa_def * 41bf215546Sopenharmony_ciload_ring(nir_builder *b, unsigned ring, lower_abi_state *s) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci struct ac_arg arg = 44bf215546Sopenharmony_ci b->shader->info.stage == MESA_SHADER_TASK ? 45bf215546Sopenharmony_ci s->args->task_ring_offsets : 46bf215546Sopenharmony_ci s->args->ring_offsets; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg); 49bf215546Sopenharmony_ci ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1)); 50bf215546Sopenharmony_ci return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u); 51bf215546Sopenharmony_ci} 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_cistatic nir_ssa_def * 54bf215546Sopenharmony_cinggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s) 55bf215546Sopenharmony_ci{ 56bf215546Sopenharmony_ci nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings); 57bf215546Sopenharmony_ci return nir_test_mask(b, settings, mask); 58bf215546Sopenharmony_ci} 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cistatic nir_ssa_def * 61bf215546Sopenharmony_cilower_abi_instr(nir_builder *b, nir_instr *instr, void *state) 62bf215546Sopenharmony_ci{ 63bf215546Sopenharmony_ci lower_abi_state *s = (lower_abi_state *) state; 64bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 65bf215546Sopenharmony_ci gl_shader_stage stage = b->shader->info.stage; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci switch (intrin->intrinsic) { 68bf215546Sopenharmony_ci case nir_intrinsic_load_ring_tess_factors_amd: 69bf215546Sopenharmony_ci return load_ring(b, RING_HS_TESS_FACTOR, s); 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci case nir_intrinsic_load_ring_tess_factors_offset_amd: 72bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset); 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci case nir_intrinsic_load_ring_tess_offchip_amd: 75bf215546Sopenharmony_ci return load_ring(b, RING_HS_TESS_OFFCHIP, s); 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci case nir_intrinsic_load_ring_tess_offchip_offset_amd: 78bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset); 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci case nir_intrinsic_load_tcs_num_patches_amd: 81bf215546Sopenharmony_ci return nir_imm_int(b, s->info->num_tess_patches); 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci case nir_intrinsic_load_ring_esgs_amd: 84bf215546Sopenharmony_ci return load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s); 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci case nir_intrinsic_load_ring_es2gs_offset_amd: 87bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci case nir_intrinsic_load_tess_rel_patch_id_amd: 90bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_CTRL) { 91bf215546Sopenharmony_ci return nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0)); 92bf215546Sopenharmony_ci } else if (stage == MESA_SHADER_TESS_EVAL) { 93bf215546Sopenharmony_ci /* Setting an upper bound like this will actually make it possible 94bf215546Sopenharmony_ci * to optimize some multiplications (in address calculations) so that 95bf215546Sopenharmony_ci * constant additions can be added to the const offset in memory load instructions. 96bf215546Sopenharmony_ci */ 97bf215546Sopenharmony_ci nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id); 98bf215546Sopenharmony_ci nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr); 99bf215546Sopenharmony_ci nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1)); 100bf215546Sopenharmony_ci return arg; 101bf215546Sopenharmony_ci } else { 102bf215546Sopenharmony_ci unreachable("invalid tessellation shader stage"); 103bf215546Sopenharmony_ci } 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci case nir_intrinsic_load_patch_vertices_in: 106bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_CTRL) 107bf215546Sopenharmony_ci return nir_imm_int(b, s->pl_key->tcs.tess_input_vertices); 108bf215546Sopenharmony_ci else if (stage == MESA_SHADER_TESS_EVAL) 109bf215546Sopenharmony_ci return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out); 110bf215546Sopenharmony_ci else 111bf215546Sopenharmony_ci unreachable("invalid tessellation shader stage"); 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci case nir_intrinsic_load_gs_vertex_offset_amd: 114bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_num_input_vertices_amd: 117bf215546Sopenharmony_ci return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 118bf215546Sopenharmony_ci nir_imm_int(b, 12), nir_imm_int(b, 9)); 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_num_input_primitives_amd: 121bf215546Sopenharmony_ci return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 122bf215546Sopenharmony_ci nir_imm_int(b, 22), nir_imm_int(b, 9)); 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci case nir_intrinsic_load_packed_passthrough_primitive_amd: 125bf215546Sopenharmony_ci /* NGG passthrough mode: the HW already packs the primitive export value to a single register. */ 126bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci case nir_intrinsic_load_shader_query_enabled_amd: 129bf215546Sopenharmony_ci return nir_ieq_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_state), 1); 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci case nir_intrinsic_load_cull_any_enabled_amd: 132bf215546Sopenharmony_ci return nggc_bool_setting(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives, s); 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci case nir_intrinsic_load_cull_front_face_enabled_amd: 135bf215546Sopenharmony_ci return nggc_bool_setting(b, radv_nggc_front_face, s); 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci case nir_intrinsic_load_cull_back_face_enabled_amd: 138bf215546Sopenharmony_ci return nggc_bool_setting(b, radv_nggc_back_face, s); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci case nir_intrinsic_load_cull_ccw_amd: 141bf215546Sopenharmony_ci return nggc_bool_setting(b, radv_nggc_face_is_ccw, s); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci case nir_intrinsic_load_cull_small_primitives_enabled_amd: 144bf215546Sopenharmony_ci return nggc_bool_setting(b, radv_nggc_small_primitives, s); 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci case nir_intrinsic_load_cull_small_prim_precision_amd: { 147bf215546Sopenharmony_ci /* To save space, only the exponent is stored in the high 8 bits. 148bf215546Sopenharmony_ci * We calculate the precision from those 8 bits: 149bf215546Sopenharmony_ci * exponent = nggc_settings >> 24 150bf215546Sopenharmony_ci * precision = 1.0 * 2 ^ exponent 151bf215546Sopenharmony_ci */ 152bf215546Sopenharmony_ci nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings); 153bf215546Sopenharmony_ci nir_ssa_def *exponent = nir_ishr_imm(b, settings, 24u); 154bf215546Sopenharmony_ci return nir_ldexp(b, nir_imm_float(b, 1.0f), exponent); 155bf215546Sopenharmony_ci } 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_x_scale: 158bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]); 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_x_offset: 161bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]); 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_y_scale: 164bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]); 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_y_offset: 167bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci case nir_intrinsic_load_ring_task_draw_amd: 170bf215546Sopenharmony_ci return load_ring(b, RING_TS_DRAW, s); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci case nir_intrinsic_load_ring_task_payload_amd: 173bf215546Sopenharmony_ci return load_ring(b, RING_TS_PAYLOAD, s); 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci case nir_intrinsic_load_ring_mesh_scratch_amd: 176bf215546Sopenharmony_ci return load_ring(b, RING_MS_SCRATCH, s); 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci case nir_intrinsic_load_ring_mesh_scratch_offset_amd: 179bf215546Sopenharmony_ci /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */ 180bf215546Sopenharmony_ci return nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff), 181bf215546Sopenharmony_ci RADV_MESH_SCRATCH_ENTRY_BYTES); 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci case nir_intrinsic_load_task_ring_entry_amd: 184bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci case nir_intrinsic_load_task_ib_addr: 187bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr); 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci case nir_intrinsic_load_task_ib_stride: 190bf215546Sopenharmony_ci return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride); 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci case nir_intrinsic_load_lshs_vertex_stride_amd: { 193bf215546Sopenharmony_ci unsigned io_num = stage == MESA_SHADER_VERTEX ? 194bf215546Sopenharmony_ci s->info->vs.num_linked_outputs : 195bf215546Sopenharmony_ci s->info->tcs.num_linked_inputs; 196bf215546Sopenharmony_ci return nir_imm_int(b, io_num * 16); 197bf215546Sopenharmony_ci } 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci case nir_intrinsic_load_hs_out_patch_data_offset_amd: { 200bf215546Sopenharmony_ci unsigned num_patches = s->info->num_tess_patches; 201bf215546Sopenharmony_ci unsigned out_vertices_per_patch = b->shader->info.tess.tcs_vertices_out; 202bf215546Sopenharmony_ci unsigned num_tcs_outputs = stage == MESA_SHADER_TESS_CTRL ? 203bf215546Sopenharmony_ci s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs; 204bf215546Sopenharmony_ci int per_vertex_output_patch_size = out_vertices_per_patch * num_tcs_outputs * 16u; 205bf215546Sopenharmony_ci return nir_imm_int(b, num_patches * per_vertex_output_patch_size); 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci default: 209bf215546Sopenharmony_ci unreachable("invalid NIR RADV ABI intrinsic."); 210bf215546Sopenharmony_ci } 211bf215546Sopenharmony_ci} 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_cistatic bool 214bf215546Sopenharmony_cifilter_abi_instr(const nir_instr *instr, 215bf215546Sopenharmony_ci UNUSED const void *state) 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci lower_abi_state *s = (lower_abi_state *) state; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 220bf215546Sopenharmony_ci return false; 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 223bf215546Sopenharmony_ci return (intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_amd && !s->use_llvm) || 224bf215546Sopenharmony_ci (intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_amd && !s->use_llvm) || 225bf215546Sopenharmony_ci (intrin->intrinsic == nir_intrinsic_load_ring_esgs_amd && !s->use_llvm) || 226bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_offset_amd || 227bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_offset_amd || 228bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_patch_vertices_in || 229bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_tcs_num_patches_amd || 230bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ring_es2gs_offset_amd || 231bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_tess_rel_patch_id_amd || 232bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_gs_vertex_offset_amd || 233bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_vertices_amd || 234bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_primitives_amd || 235bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_packed_passthrough_primitive_amd || 236bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_shader_query_enabled_amd || 237bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_cull_any_enabled_amd || 238bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_cull_front_face_enabled_amd || 239bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_cull_back_face_enabled_amd || 240bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_cull_ccw_amd || 241bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_cull_small_primitives_enabled_amd || 242bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_cull_small_prim_precision_amd || 243bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_viewport_x_scale || 244bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_viewport_x_offset || 245bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_viewport_y_scale || 246bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_viewport_y_offset || 247bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ring_task_draw_amd || 248bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ring_task_payload_amd || 249bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_amd || 250bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_offset_amd || 251bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_task_ring_entry_amd || 252bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_task_ib_addr || 253bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_task_ib_stride || 254bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_lshs_vertex_stride_amd || 255bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_hs_out_patch_data_offset_amd; 256bf215546Sopenharmony_ci} 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_civoid 259bf215546Sopenharmony_ciradv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, 260bf215546Sopenharmony_ci const struct radv_shader_info *info, const struct radv_shader_args *args, 261bf215546Sopenharmony_ci const struct radv_pipeline_key *pl_key, bool use_llvm) 262bf215546Sopenharmony_ci{ 263bf215546Sopenharmony_ci lower_abi_state state = { 264bf215546Sopenharmony_ci .gfx_level = gfx_level, 265bf215546Sopenharmony_ci .info = info, 266bf215546Sopenharmony_ci .args = args, 267bf215546Sopenharmony_ci .pl_key = pl_key, 268bf215546Sopenharmony_ci .use_llvm = use_llvm, 269bf215546Sopenharmony_ci }; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci nir_shader_lower_instructions(shader, filter_abi_instr, lower_abi_instr, &state); 272bf215546Sopenharmony_ci} 273