1/* 2 * Copyright © 2019 Valve Corporation. 3 * Copyright © 2016 Red Hat. 4 * Copyright © 2016 Bas Nieuwenhuizen 5 * 6 * based in part on anv driver which is: 7 * Copyright © 2015 Intel Corporation 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a 10 * copy of this software and associated documentation files (the "Software"), 11 * to deal in the Software without restriction, including without limitation 12 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 * and/or sell copies of the Software, and to permit persons to whom the 14 * Software is furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice (including the next 17 * paragraph) shall be included in all copies or substantial portions of the 18 * Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 26 * IN THE SOFTWARE. 27 */ 28 29#include "radv_shader_args.h" 30#include "radv_private.h" 31#include "radv_shader.h" 32 33static void 34set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs) 35{ 36 ud_info->sgpr_idx = *sgpr_idx; 37 ud_info->num_sgprs = num_sgprs; 38 *sgpr_idx += num_sgprs; 39} 40 41static void 42set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs) 43{ 44 struct radv_userdata_info *ud_info = &args->user_sgprs_locs.shader_data[idx]; 45 assert(ud_info); 46 47 set_loc(ud_info, sgpr_idx, num_sgprs); 48} 49 50static void 51set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) 52{ 53 bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS && 54 idx != AC_UD_CS_TASK_RING_OFFSETS && idx != AC_UD_CS_SBT_DESCRIPTORS && 55 idx != AC_UD_CS_RAY_LAUNCH_SIZE_ADDR; 56 57 set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); 58} 59 60static void 61set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) 62{ 63 struct radv_userdata_locations *locs = &args->user_sgprs_locs; 64 struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; 65 assert(ud_info); 66 67 set_loc(ud_info, sgpr_idx, 1); 68 69 locs->descriptor_sets_enabled |= 1u << idx; 70} 71 72struct user_sgpr_info { 73 uint64_t inline_push_constant_mask; 74 bool inlined_all_push_consts; 75 bool indirect_all_descriptor_sets; 76 uint8_t remaining_sgprs; 77}; 78 79static uint8_t 80count_vs_user_sgprs(const struct radv_shader_info *info) 81{ 82 uint8_t count = 1; /* vertex offset */ 83 84 if (info->vs.vb_desc_usage_mask) 85 count++; 86 if (info->vs.needs_draw_id) 87 count++; 88 if (info->vs.needs_base_instance) 89 count++; 90 91 return count; 92} 93 94static uint8_t 95count_ms_user_sgprs(const struct radv_shader_info *info) 96{ 97 uint8_t count = 1 + 3; /* firstTask + num_work_groups[3] */ 98 99 if (info->vs.needs_draw_id) 100 count++; 101 if (info->cs.uses_task_rings) 102 count++; 103 104 return count; 105} 106 107static unsigned 108count_ngg_sgprs(const struct radv_shader_info *info, bool has_ngg_query) 109{ 110 unsigned count = 0; 111 112 if (has_ngg_query) 113 count += 1; /* ngg_query_state */ 114 if (info->has_ngg_culling) 115 count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */ 116 117 return count; 118} 119 120static void 121allocate_inline_push_consts(const struct radv_shader_info *info, 122 struct user_sgpr_info *user_sgpr_info) 123{ 124 uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; 125 126 if (!info->inline_push_constant_mask) 127 return; 128 129 uint64_t mask = info->inline_push_constant_mask; 130 uint8_t num_push_consts = util_bitcount64(mask); 131 132 /* Disable the default push constants path if all constants can be inlined and if shaders don't 133 * use dynamic descriptors. 134 */ 135 if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) && 136 info->can_inline_all_push_constants && !info->loads_dynamic_offsets) { 137 user_sgpr_info->inlined_all_push_consts = true; 138 remaining_sgprs++; 139 } else { 140 /* Clamp to the maximum number of allowed inlined push constants. */ 141 while (num_push_consts > MIN2(remaining_sgprs, AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT)) { 142 num_push_consts--; 143 mask &= ~BITFIELD64_BIT(util_last_bit64(mask) - 1); 144 } 145 } 146 147 user_sgpr_info->remaining_sgprs = remaining_sgprs - util_bitcount64(mask); 148 user_sgpr_info->inline_push_constant_mask = mask; 149} 150 151static void 152allocate_user_sgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, 153 struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage, 154 gl_shader_stage previous_stage, bool needs_view_index, bool has_ngg_query, 155 struct user_sgpr_info *user_sgpr_info) 156{ 157 uint8_t user_sgpr_count = 0; 158 159 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); 160 161 /* 2 user sgprs will always be allocated for scratch/rings */ 162 user_sgpr_count += 2; 163 164 if (stage == MESA_SHADER_TASK) 165 user_sgpr_count += 2; /* task descriptors */ 166 167 /* prolog inputs */ 168 if (info->vs.has_prolog) 169 user_sgpr_count += 2; 170 171 switch (stage) { 172 case MESA_SHADER_COMPUTE: 173 case MESA_SHADER_TASK: 174 if (info->cs.uses_sbt) 175 user_sgpr_count += 2; 176 if (info->cs.uses_grid_size) 177 user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2; 178 if (info->cs.uses_ray_launch_size) 179 user_sgpr_count += 2; 180 if (info->vs.needs_draw_id) 181 user_sgpr_count += 1; 182 if (info->cs.uses_task_rings) 183 user_sgpr_count += 4; /* ring_entry, 2x ib_addr, ib_stride */ 184 break; 185 case MESA_SHADER_FRAGMENT: 186 /* epilog continue PC */ 187 if (info->ps.has_epilog) 188 user_sgpr_count += 1; 189 break; 190 case MESA_SHADER_VERTEX: 191 if (!args->is_gs_copy_shader) 192 user_sgpr_count += count_vs_user_sgprs(info); 193 break; 194 case MESA_SHADER_TESS_CTRL: 195 if (has_previous_stage) { 196 if (previous_stage == MESA_SHADER_VERTEX) 197 user_sgpr_count += count_vs_user_sgprs(info); 198 } 199 break; 200 case MESA_SHADER_TESS_EVAL: 201 break; 202 case MESA_SHADER_GEOMETRY: 203 if (has_previous_stage) { 204 if (info->is_ngg) 205 user_sgpr_count += count_ngg_sgprs(info, has_ngg_query); 206 207 if (previous_stage == MESA_SHADER_VERTEX) { 208 user_sgpr_count += count_vs_user_sgprs(info); 209 } else if (previous_stage == MESA_SHADER_MESH) { 210 user_sgpr_count += count_ms_user_sgprs(info); 211 } 212 } 213 break; 214 default: 215 break; 216 } 217 218 if (needs_view_index) 219 user_sgpr_count++; 220 221 if (info->force_vrs_per_vertex) 222 user_sgpr_count++; 223 224 if (info->loads_push_constants) 225 user_sgpr_count++; 226 227 if (info->so.num_outputs) 228 user_sgpr_count++; 229 230 uint32_t available_sgprs = 231 gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16; 232 uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; 233 uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask); 234 235 if (remaining_sgprs < num_desc_set) { 236 user_sgpr_info->indirect_all_descriptor_sets = true; 237 user_sgpr_info->remaining_sgprs = remaining_sgprs - 1; 238 } else { 239 user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set; 240 } 241 242 allocate_inline_push_consts(info, user_sgpr_info); 243} 244 245static void 246declare_global_input_sgprs(const struct radv_shader_info *info, 247 const struct user_sgpr_info *user_sgpr_info, 248 struct radv_shader_args *args) 249{ 250 /* 1 for each descriptor set */ 251 if (!user_sgpr_info->indirect_all_descriptor_sets) { 252 uint32_t mask = info->desc_set_used_mask; 253 254 while (mask) { 255 int i = u_bit_scan(&mask); 256 257 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]); 258 } 259 } else { 260 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]); 261 } 262 263 if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) { 264 /* 1 for push constants and dynamic descriptors */ 265 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants); 266 } 267 268 for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) { 269 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]); 270 } 271 args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask; 272 273 if (info->so.num_outputs) { 274 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers); 275 } 276} 277 278static void 279declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, 280 gl_shader_stage stage, bool has_previous_stage, 281 gl_shader_stage previous_stage) 282{ 283 if (info->vs.has_prolog) 284 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs); 285 286 if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || 287 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { 288 if (info->vs.vb_desc_usage_mask) { 289 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers); 290 } 291 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); 292 if (info->vs.needs_draw_id) { 293 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); 294 } 295 if (info->vs.needs_base_instance) { 296 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); 297 } 298 } 299} 300 301static void 302declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, 303 struct radv_shader_args *args) 304{ 305 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); 306 if (!args->is_gs_copy_shader) { 307 if (info->vs.as_ls) { 308 309 if (gfx_level >= GFX11) { 310 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ 311 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ 312 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 313 } else if (gfx_level >= GFX10) { 314 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); 315 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 316 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 317 } else { 318 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); 319 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 320 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 321 } 322 } else { 323 if (gfx_level >= GFX10) { 324 if (info->is_ngg) { 325 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 326 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 327 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 328 } else { 329 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 330 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id); 331 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 332 } 333 } else { 334 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 335 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id); 336 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 337 } 338 } 339 } 340 341 if (info->vs.dynamic_inputs) { 342 assert(info->vs.use_per_attribute_vb_descs); 343 unsigned num_attributes = util_last_bit(info->vs.vb_desc_usage_mask); 344 for (unsigned i = 0; i < num_attributes; i++) 345 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]); 346 /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one 347 * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8. 348 */ 349 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); 350 } 351} 352 353static void 354declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, 355 gl_shader_stage stage) 356{ 357 int i; 358 359 /* Streamout SGPRs. */ 360 if (info->so.num_outputs) { 361 assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL); 362 363 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config); 364 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index); 365 } else if (stage == MESA_SHADER_TESS_EVAL) { 366 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); 367 } 368 369 /* A streamout buffer offset is loaded if the stride is non-zero. */ 370 for (i = 0; i < 4; i++) { 371 if (!info->so.strides[i]) 372 continue; 373 374 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]); 375 } 376} 377 378static void 379declare_tes_input_vgprs(struct radv_shader_args *args) 380{ 381 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u); 382 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v); 383 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id); 384 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id); 385} 386 387static void 388declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args) 389{ 390 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); 391 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups); 392 if (info->vs.needs_draw_id) { 393 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); 394 } 395 if (info->cs.uses_task_rings) { 396 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry); 397 } 398} 399 400static void 401declare_ms_input_vgprs(struct radv_shader_args *args) 402{ 403 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); 404 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 405 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 406 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* instance_id */ 407} 408 409static void 410declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args) 411{ 412 unsigned spi_ps_input = info->ps.spi_ps_input; 413 414 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); 415 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); 416 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); 417 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model); 418 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample); 419 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center); 420 ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid); 421 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */ 422 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]); 423 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]); 424 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]); 425 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]); 426 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face); 427 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary); 428 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); 429 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ 430 431 if (args->remap_spi_ps_input) { 432 /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then 433 * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the 434 * VGPR arguments here. 435 */ 436 unsigned arg_count = 0; 437 for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) { 438 if (args->ac.args[i].file != AC_ARG_VGPR) { 439 arg_count++; 440 continue; 441 } 442 443 if (!(spi_ps_input & (1 << vgpr_arg))) { 444 args->ac.args[i].skip = true; 445 } else { 446 args->ac.args[i].offset = vgpr_reg; 447 vgpr_reg += args->ac.args[i].size; 448 arg_count++; 449 } 450 vgpr_arg++; 451 } 452 } 453 454 if (info->ps.has_epilog) { 455 /* FIXME: Ensure the main shader doesn't have less VGPRs than the epilog */ 456 for (unsigned i = 0; i < MAX_RTS; i++) 457 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, NULL); 458 } 459} 460 461static void 462declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, 463 bool has_ngg_query) 464{ 465 if (has_ngg_query) 466 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_query_state); 467 468 if (info->has_ngg_culling) { 469 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings); 470 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]); 471 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]); 472 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]); 473 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]); 474 } 475} 476 477static void 478set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info, 479 uint8_t *user_sgpr_idx) 480{ 481 if (!user_sgpr_info->indirect_all_descriptor_sets) { 482 for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) { 483 if (args->descriptor_sets[i].used) 484 set_loc_desc(args, i, user_sgpr_idx); 485 } 486 } else { 487 set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx); 488 } 489 490 if (args->ac.push_constants.used) { 491 set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); 492 } 493 494 if (user_sgpr_info->inline_push_constant_mask) { 495 set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, 496 util_bitcount64(user_sgpr_info->inline_push_constant_mask)); 497 } 498 499 if (args->streamout_buffers.used) { 500 set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx); 501 } 502} 503 504static void 505set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage, 506 bool has_previous_stage, gl_shader_stage previous_stage, 507 uint8_t *user_sgpr_idx) 508{ 509 if (args->prolog_inputs.used) 510 set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2); 511 512 if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || 513 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { 514 if (args->ac.vertex_buffers.used) { 515 set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx); 516 } 517 518 unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used + 519 args->ac.start_instance.used; 520 set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num); 521 } 522} 523 524static void 525set_ms_input_locs(struct radv_shader_args *args, uint8_t *user_sgpr_idx) 526{ 527 unsigned vs_num = 528 args->ac.base_vertex.used + 3 * args->ac.num_work_groups.used + args->ac.draw_id.used; 529 set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num); 530 531 if (args->ac.task_ring_entry.used) 532 set_loc_shader(args, AC_UD_TASK_RING_ENTRY, user_sgpr_idx, 1); 533} 534 535void 536radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipeline_key *key, 537 const struct radv_shader_info *info, gl_shader_stage stage, 538 bool has_previous_stage, gl_shader_stage previous_stage, 539 struct radv_shader_args *args) 540{ 541 struct user_sgpr_info user_sgpr_info; 542 bool needs_view_index = info->uses_view_index; 543 bool has_ngg_query = stage == MESA_SHADER_GEOMETRY || key->primitives_generated_query; 544 545 if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) { 546 /* Handle all NGG shaders as GS to simplify the code here. */ 547 previous_stage = stage; 548 stage = MESA_SHADER_GEOMETRY; 549 has_previous_stage = true; 550 } 551 552 for (int i = 0; i < MAX_SETS; i++) 553 args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; 554 for (int i = 0; i < AC_UD_MAX_UD; i++) 555 args->user_sgprs_locs.shader_data[i].sgpr_idx = -1; 556 557 allocate_user_sgprs(gfx_level, info, args, stage, has_previous_stage, previous_stage, 558 needs_view_index, has_ngg_query, &user_sgpr_info); 559 560 if (args->explicit_scratch_args) { 561 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); 562 } 563 if (stage == MESA_SHADER_TASK) { 564 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets); 565 } 566 567 /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other 568 * sgprs. 569 */ 570 571 switch (stage) { 572 case MESA_SHADER_COMPUTE: 573 case MESA_SHADER_TASK: 574 declare_global_input_sgprs(info, &user_sgpr_info, args); 575 576 if (info->cs.uses_sbt) { 577 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.sbt_descriptors); 578 } 579 580 if (info->cs.uses_grid_size) { 581 if (args->load_grid_size_from_user_sgpr) 582 ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups); 583 else 584 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.num_work_groups); 585 } 586 587 if (info->cs.uses_ray_launch_size) { 588 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr); 589 } 590 591 if (info->vs.needs_draw_id) { 592 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); 593 } 594 595 if (info->cs.uses_task_rings) { 596 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry); 597 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr); 598 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride); 599 } 600 601 for (int i = 0; i < 3; i++) { 602 if (info->cs.uses_block_id[i]) { 603 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]); 604 } 605 } 606 607 if (info->cs.uses_local_invocation_idx) { 608 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size); 609 } 610 611 if (args->explicit_scratch_args && gfx_level < GFX11) { 612 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 613 } 614 615 if (gfx_level >= GFX11) 616 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids); 617 else 618 ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids); 619 break; 620 case MESA_SHADER_VERTEX: 621 /* NGG is handled by the GS case */ 622 assert(!info->is_ngg); 623 624 declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); 625 626 declare_global_input_sgprs(info, &user_sgpr_info, args); 627 628 if (needs_view_index) { 629 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 630 } 631 632 if (info->force_vrs_per_vertex) { 633 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates); 634 } 635 636 if (info->vs.as_es) { 637 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); 638 } else if (info->vs.as_ls) { 639 /* no extra parameters */ 640 } else { 641 declare_streamout_sgprs(info, args, stage); 642 } 643 644 if (args->explicit_scratch_args) { 645 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 646 } 647 648 declare_vs_input_vgprs(gfx_level, info, args); 649 break; 650 case MESA_SHADER_TESS_CTRL: 651 if (has_previous_stage) { 652 // First 6 system regs 653 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 654 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info); 655 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); 656 657 if (gfx_level >= GFX11) { 658 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id); 659 } else { 660 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 661 } 662 663 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 664 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 665 666 declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); 667 668 declare_global_input_sgprs(info, &user_sgpr_info, args); 669 670 if (needs_view_index) { 671 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 672 } 673 674 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); 675 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); 676 677 declare_vs_input_vgprs(gfx_level, info, args); 678 } else { 679 declare_global_input_sgprs(info, &user_sgpr_info, args); 680 681 if (needs_view_index) { 682 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 683 } 684 685 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 686 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); 687 if (args->explicit_scratch_args) { 688 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 689 } 690 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); 691 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); 692 } 693 break; 694 case MESA_SHADER_TESS_EVAL: 695 /* NGG is handled by the GS case */ 696 assert(!info->is_ngg); 697 698 declare_global_input_sgprs(info, &user_sgpr_info, args); 699 700 if (needs_view_index) 701 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 702 703 if (info->tes.as_es) { 704 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 705 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); 706 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); 707 } else { 708 declare_streamout_sgprs(info, args, stage); 709 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 710 } 711 if (args->explicit_scratch_args) { 712 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 713 } 714 declare_tes_input_vgprs(args); 715 break; 716 case MESA_SHADER_GEOMETRY: 717 if (has_previous_stage) { 718 // First 6 system regs 719 if (info->is_ngg) { 720 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info); 721 } else { 722 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); 723 } 724 725 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info); 726 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 727 728 if (gfx_level < GFX11) { 729 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 730 } 731 732 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 733 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 734 735 if (previous_stage == MESA_SHADER_VERTEX) { 736 declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); 737 } else if (previous_stage == MESA_SHADER_MESH) { 738 declare_ms_input_sgprs(info, args); 739 } 740 741 declare_global_input_sgprs(info, &user_sgpr_info, args); 742 743 if (needs_view_index) { 744 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 745 } 746 747 if (info->force_vrs_per_vertex) { 748 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates); 749 } 750 751 if (info->is_ngg) { 752 declare_ngg_sgprs(info, args, has_ngg_query); 753 } 754 755 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); 756 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); 757 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); 758 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); 759 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); 760 761 if (previous_stage == MESA_SHADER_VERTEX) { 762 declare_vs_input_vgprs(gfx_level, info, args); 763 } else if (previous_stage == MESA_SHADER_TESS_EVAL) { 764 declare_tes_input_vgprs(args); 765 } else if (previous_stage == MESA_SHADER_MESH) { 766 declare_ms_input_vgprs(args); 767 } 768 } else { 769 declare_global_input_sgprs(info, &user_sgpr_info, args); 770 771 if (needs_view_index) { 772 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 773 } 774 775 if (info->force_vrs_per_vertex) { 776 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates); 777 } 778 779 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); 780 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id); 781 if (args->explicit_scratch_args) { 782 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 783 } 784 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); 785 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); 786 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); 787 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); 788 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]); 789 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]); 790 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]); 791 ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); 792 } 793 break; 794 case MESA_SHADER_FRAGMENT: 795 declare_global_input_sgprs(info, &user_sgpr_info, args); 796 797 if (info->ps.has_epilog) { 798 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ps_epilog_pc); 799 } 800 801 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); 802 if (args->explicit_scratch_args && gfx_level < GFX11) { 803 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 804 } 805 806 declare_ps_input_vgprs(info, args); 807 break; 808 default: 809 unreachable("Shader stage not implemented"); 810 } 811 812 uint8_t user_sgpr_idx = 0; 813 814 set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx); 815 if (stage == MESA_SHADER_TASK) { 816 set_loc_shader_ptr(args, AC_UD_CS_TASK_RING_OFFSETS, &user_sgpr_idx); 817 } 818 819 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including 820 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ 821 if (has_previous_stage) 822 user_sgpr_idx = 0; 823 824 if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX)) 825 set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx); 826 else if (has_previous_stage && previous_stage == MESA_SHADER_MESH) 827 set_ms_input_locs(args, &user_sgpr_idx); 828 829 set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx); 830 831 switch (stage) { 832 case MESA_SHADER_COMPUTE: 833 case MESA_SHADER_TASK: 834 if (args->ac.sbt_descriptors.used) { 835 set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx); 836 } 837 if (args->ac.num_work_groups.used) { 838 set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 839 args->load_grid_size_from_user_sgpr ? 3 : 2); 840 } 841 if (args->ac.ray_launch_size_addr.used) { 842 set_loc_shader_ptr(args, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, &user_sgpr_idx); 843 } 844 if (args->ac.draw_id.used) { 845 set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1); 846 } 847 if (args->ac.task_ring_entry.used) { 848 set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1); 849 } 850 if (args->task_ib_addr.used) { 851 assert(args->task_ib_stride.used); 852 set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3); 853 } 854 break; 855 case MESA_SHADER_VERTEX: 856 if (args->ac.view_index.used) 857 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 858 if (args->ac.force_vrs_rates.used) 859 set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1); 860 break; 861 case MESA_SHADER_TESS_CTRL: 862 if (args->ac.view_index.used) 863 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 864 break; 865 case MESA_SHADER_TESS_EVAL: 866 if (args->ac.view_index.used) 867 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 868 break; 869 case MESA_SHADER_GEOMETRY: 870 if (args->ac.view_index.used) 871 set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 872 873 if (args->ac.force_vrs_rates.used) 874 set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1); 875 876 if (args->ngg_query_state.used) { 877 set_loc_shader(args, AC_UD_NGG_QUERY_STATE, &user_sgpr_idx, 1); 878 } 879 880 if (args->ngg_culling_settings.used) { 881 set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1); 882 } 883 884 if (args->ngg_viewport_scale[0].used) { 885 assert(args->ngg_viewport_scale[1].used && 886 args->ngg_viewport_translate[0].used && 887 args->ngg_viewport_translate[1].used); 888 set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4); 889 } 890 break; 891 case MESA_SHADER_FRAGMENT: 892 if (args->ps_epilog_pc.used) 893 set_loc_shader(args, AC_UD_PS_EPILOG_PC, &user_sgpr_idx, 1); 894 break; 895 default: 896 unreachable("Shader stage not implemented"); 897 } 898 899 args->num_user_sgprs = user_sgpr_idx; 900} 901 902void 903radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_epilog_key *key, 904 struct radv_shader_args *args) 905{ 906 unsigned num_inputs = 0; 907 908 ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); 909 if (gfx_level < GFX11) 910 ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 911 912 /* Declare VGPR arguments for color exports. */ 913 for (unsigned i = 0; i < MAX_RTS; i++) { 914 unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf; 915 916 if (col_format == V_028714_SPI_SHADER_ZERO) 917 continue; 918 919 ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->ps_epilog_inputs[num_inputs]); 920 num_inputs++; 921 } 922} 923