1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2022 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir.h"
25bf215546Sopenharmony_ci#include "nir_builder.h"
26bf215546Sopenharmony_ci#include "ac_nir.h"
27bf215546Sopenharmony_ci#include "radv_constants.h"
28bf215546Sopenharmony_ci#include "radv_private.h"
29bf215546Sopenharmony_ci#include "radv_shader.h"
30bf215546Sopenharmony_ci#include "radv_shader_args.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_citypedef struct {
33bf215546Sopenharmony_ci   enum amd_gfx_level gfx_level;
34bf215546Sopenharmony_ci   const struct radv_shader_args *args;
35bf215546Sopenharmony_ci   const struct radv_shader_info *info;
36bf215546Sopenharmony_ci   const struct radv_pipeline_key *pl_key;
37bf215546Sopenharmony_ci   bool use_llvm;
38bf215546Sopenharmony_ci} lower_abi_state;
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic nir_ssa_def *
41bf215546Sopenharmony_ciload_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci   struct ac_arg arg =
44bf215546Sopenharmony_ci      b->shader->info.stage == MESA_SHADER_TASK ?
45bf215546Sopenharmony_ci      s->args->task_ring_offsets :
46bf215546Sopenharmony_ci      s->args->ring_offsets;
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci   nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
49bf215546Sopenharmony_ci   ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
50bf215546Sopenharmony_ci   return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u);
51bf215546Sopenharmony_ci}
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_cistatic nir_ssa_def *
54bf215546Sopenharmony_cinggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
55bf215546Sopenharmony_ci{
56bf215546Sopenharmony_ci   nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
57bf215546Sopenharmony_ci   return nir_test_mask(b, settings, mask);
58bf215546Sopenharmony_ci}
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_cistatic nir_ssa_def *
61bf215546Sopenharmony_cilower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
62bf215546Sopenharmony_ci{
63bf215546Sopenharmony_ci   lower_abi_state *s = (lower_abi_state *) state;
64bf215546Sopenharmony_ci   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
65bf215546Sopenharmony_ci   gl_shader_stage stage = b->shader->info.stage;
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   switch (intrin->intrinsic) {
68bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_tess_factors_amd:
69bf215546Sopenharmony_ci      return load_ring(b, RING_HS_TESS_FACTOR, s);
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_tess_factors_offset_amd:
72bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset);
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_tess_offchip_amd:
75bf215546Sopenharmony_ci      return load_ring(b, RING_HS_TESS_OFFCHIP, s);
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_tess_offchip_offset_amd:
78bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset);
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   case nir_intrinsic_load_tcs_num_patches_amd:
81bf215546Sopenharmony_ci      return nir_imm_int(b, s->info->num_tess_patches);
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_esgs_amd:
84bf215546Sopenharmony_ci      return load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s);
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_es2gs_offset_amd:
87bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset);
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_rel_patch_id_amd:
90bf215546Sopenharmony_ci      if (stage == MESA_SHADER_TESS_CTRL) {
91bf215546Sopenharmony_ci         return nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0));
92bf215546Sopenharmony_ci      } else if (stage == MESA_SHADER_TESS_EVAL) {
93bf215546Sopenharmony_ci         /* Setting an upper bound like this will actually make it possible
94bf215546Sopenharmony_ci          * to optimize some multiplications (in address calculations) so that
95bf215546Sopenharmony_ci          * constant additions can be added to the const offset in memory load instructions.
96bf215546Sopenharmony_ci          */
97bf215546Sopenharmony_ci         nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
98bf215546Sopenharmony_ci         nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr);
99bf215546Sopenharmony_ci         nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1));
100bf215546Sopenharmony_ci         return arg;
101bf215546Sopenharmony_ci      } else {
102bf215546Sopenharmony_ci         unreachable("invalid tessellation shader stage");
103bf215546Sopenharmony_ci      }
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci   case nir_intrinsic_load_patch_vertices_in:
106bf215546Sopenharmony_ci      if (stage == MESA_SHADER_TESS_CTRL)
107bf215546Sopenharmony_ci         return nir_imm_int(b, s->pl_key->tcs.tess_input_vertices);
108bf215546Sopenharmony_ci      else if (stage == MESA_SHADER_TESS_EVAL)
109bf215546Sopenharmony_ci         return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
110bf215546Sopenharmony_ci      else
111bf215546Sopenharmony_ci         unreachable("invalid tessellation shader stage");
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   case nir_intrinsic_load_gs_vertex_offset_amd:
114bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]);
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_num_input_vertices_amd:
117bf215546Sopenharmony_ci      return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info),
118bf215546Sopenharmony_ci                         nir_imm_int(b, 12), nir_imm_int(b, 9));
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_num_input_primitives_amd:
121bf215546Sopenharmony_ci      return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info),
122bf215546Sopenharmony_ci                         nir_imm_int(b, 22), nir_imm_int(b, 9));
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci   case nir_intrinsic_load_packed_passthrough_primitive_amd:
125bf215546Sopenharmony_ci      /* NGG passthrough mode: the HW already packs the primitive export value to a single register. */
126bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]);
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci   case nir_intrinsic_load_shader_query_enabled_amd:
129bf215546Sopenharmony_ci      return nir_ieq_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_state), 1);
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_any_enabled_amd:
132bf215546Sopenharmony_ci      return nggc_bool_setting(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives, s);
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_front_face_enabled_amd:
135bf215546Sopenharmony_ci      return nggc_bool_setting(b, radv_nggc_front_face, s);
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_back_face_enabled_amd:
138bf215546Sopenharmony_ci      return nggc_bool_setting(b, radv_nggc_back_face, s);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_ccw_amd:
141bf215546Sopenharmony_ci      return nggc_bool_setting(b, radv_nggc_face_is_ccw, s);
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_small_primitives_enabled_amd:
144bf215546Sopenharmony_ci      return nggc_bool_setting(b, radv_nggc_small_primitives, s);
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_small_prim_precision_amd: {
147bf215546Sopenharmony_ci      /* To save space, only the exponent is stored in the high 8 bits.
148bf215546Sopenharmony_ci       * We calculate the precision from those 8 bits:
149bf215546Sopenharmony_ci       * exponent = nggc_settings >> 24
150bf215546Sopenharmony_ci       * precision = 1.0 * 2 ^ exponent
151bf215546Sopenharmony_ci       */
152bf215546Sopenharmony_ci      nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
153bf215546Sopenharmony_ci      nir_ssa_def *exponent = nir_ishr_imm(b, settings, 24u);
154bf215546Sopenharmony_ci      return nir_ldexp(b, nir_imm_float(b, 1.0f), exponent);
155bf215546Sopenharmony_ci   }
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_x_scale:
158bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]);
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_x_offset:
161bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]);
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_y_scale:
164bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]);
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_y_offset:
167bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]);
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_task_draw_amd:
170bf215546Sopenharmony_ci      return load_ring(b, RING_TS_DRAW, s);
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_task_payload_amd:
173bf215546Sopenharmony_ci      return load_ring(b, RING_TS_PAYLOAD, s);
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_mesh_scratch_amd:
176bf215546Sopenharmony_ci      return load_ring(b, RING_MS_SCRATCH, s);
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
179bf215546Sopenharmony_ci      /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */
180bf215546Sopenharmony_ci      return nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff),
181bf215546Sopenharmony_ci                                          RADV_MESH_SCRATCH_ENTRY_BYTES);
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci   case nir_intrinsic_load_task_ring_entry_amd:
184bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   case nir_intrinsic_load_task_ib_addr:
187bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr);
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   case nir_intrinsic_load_task_ib_stride:
190bf215546Sopenharmony_ci      return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride);
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   case nir_intrinsic_load_lshs_vertex_stride_amd: {
193bf215546Sopenharmony_ci      unsigned io_num = stage == MESA_SHADER_VERTEX ?
194bf215546Sopenharmony_ci         s->info->vs.num_linked_outputs :
195bf215546Sopenharmony_ci         s->info->tcs.num_linked_inputs;
196bf215546Sopenharmony_ci      return nir_imm_int(b, io_num * 16);
197bf215546Sopenharmony_ci   }
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
200bf215546Sopenharmony_ci      unsigned num_patches = s->info->num_tess_patches;
201bf215546Sopenharmony_ci      unsigned out_vertices_per_patch = b->shader->info.tess.tcs_vertices_out;
202bf215546Sopenharmony_ci      unsigned num_tcs_outputs = stage == MESA_SHADER_TESS_CTRL ?
203bf215546Sopenharmony_ci         s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs;
204bf215546Sopenharmony_ci      int per_vertex_output_patch_size = out_vertices_per_patch * num_tcs_outputs * 16u;
205bf215546Sopenharmony_ci      return nir_imm_int(b, num_patches * per_vertex_output_patch_size);
206bf215546Sopenharmony_ci   }
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   default:
209bf215546Sopenharmony_ci      unreachable("invalid NIR RADV ABI intrinsic.");
210bf215546Sopenharmony_ci   }
211bf215546Sopenharmony_ci}
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_cistatic bool
214bf215546Sopenharmony_cifilter_abi_instr(const nir_instr *instr,
215bf215546Sopenharmony_ci                 UNUSED const void *state)
216bf215546Sopenharmony_ci{
217bf215546Sopenharmony_ci   lower_abi_state *s = (lower_abi_state *) state;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
220bf215546Sopenharmony_ci      return false;
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
223bf215546Sopenharmony_ci   return (intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_amd && !s->use_llvm) ||
224bf215546Sopenharmony_ci          (intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_amd && !s->use_llvm) ||
225bf215546Sopenharmony_ci          (intrin->intrinsic == nir_intrinsic_load_ring_esgs_amd && !s->use_llvm) ||
226bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_offset_amd ||
227bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_offset_amd ||
228bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_patch_vertices_in ||
229bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_tcs_num_patches_amd ||
230bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_ring_es2gs_offset_amd ||
231bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_tess_rel_patch_id_amd ||
232bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_gs_vertex_offset_amd ||
233bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_vertices_amd ||
234bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_primitives_amd ||
235bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_packed_passthrough_primitive_amd ||
236bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_shader_query_enabled_amd ||
237bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_cull_any_enabled_amd ||
238bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_cull_front_face_enabled_amd ||
239bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_cull_back_face_enabled_amd ||
240bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_cull_ccw_amd ||
241bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_cull_small_primitives_enabled_amd ||
242bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_cull_small_prim_precision_amd ||
243bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_viewport_x_scale ||
244bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_viewport_x_offset ||
245bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_viewport_y_scale ||
246bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_viewport_y_offset ||
247bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_ring_task_draw_amd ||
248bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_ring_task_payload_amd ||
249bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_amd ||
250bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_offset_amd ||
251bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_task_ring_entry_amd ||
252bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_task_ib_addr ||
253bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_task_ib_stride ||
254bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_lshs_vertex_stride_amd ||
255bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_load_hs_out_patch_data_offset_amd;
256bf215546Sopenharmony_ci}
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_civoid
259bf215546Sopenharmony_ciradv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
260bf215546Sopenharmony_ci                   const struct radv_shader_info *info, const struct radv_shader_args *args,
261bf215546Sopenharmony_ci                   const struct radv_pipeline_key *pl_key, bool use_llvm)
262bf215546Sopenharmony_ci{
263bf215546Sopenharmony_ci   lower_abi_state state = {
264bf215546Sopenharmony_ci      .gfx_level = gfx_level,
265bf215546Sopenharmony_ci      .info = info,
266bf215546Sopenharmony_ci      .args = args,
267bf215546Sopenharmony_ci      .pl_key = pl_key,
268bf215546Sopenharmony_ci      .use_llvm = use_llvm,
269bf215546Sopenharmony_ci   };
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   nir_shader_lower_instructions(shader, filter_abi_instr, lower_abi_instr, &state);
272bf215546Sopenharmony_ci}
273