1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Valve Corporation. 3bf215546Sopenharmony_ci * Copyright © 2016 Red Hat. 4bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * based in part on anv driver which is: 7bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 8bf215546Sopenharmony_ci * 9bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 10bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 11bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 12bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 14bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 17bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 18bf215546Sopenharmony_ci * Software. 19bf215546Sopenharmony_ci * 20bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 26bf215546Sopenharmony_ci * IN THE SOFTWARE. 27bf215546Sopenharmony_ci */ 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "radv_shader_args.h" 30bf215546Sopenharmony_ci#include "radv_private.h" 31bf215546Sopenharmony_ci#include "radv_shader.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cistatic void 34bf215546Sopenharmony_ciset_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs) 35bf215546Sopenharmony_ci{ 36bf215546Sopenharmony_ci ud_info->sgpr_idx = *sgpr_idx; 37bf215546Sopenharmony_ci ud_info->num_sgprs = num_sgprs; 38bf215546Sopenharmony_ci *sgpr_idx += num_sgprs; 39bf215546Sopenharmony_ci} 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cistatic void 42bf215546Sopenharmony_ciset_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs) 43bf215546Sopenharmony_ci{ 44bf215546Sopenharmony_ci struct radv_userdata_info *ud_info = &args->user_sgprs_locs.shader_data[idx]; 45bf215546Sopenharmony_ci assert(ud_info); 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci set_loc(ud_info, sgpr_idx, num_sgprs); 48bf215546Sopenharmony_ci} 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cistatic void 51bf215546Sopenharmony_ciset_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) 52bf215546Sopenharmony_ci{ 53bf215546Sopenharmony_ci bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS && 54bf215546Sopenharmony_ci idx != AC_UD_CS_TASK_RING_OFFSETS && idx != AC_UD_CS_SBT_DESCRIPTORS && 55bf215546Sopenharmony_ci idx != AC_UD_CS_RAY_LAUNCH_SIZE_ADDR; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); 58bf215546Sopenharmony_ci} 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cistatic void 61bf215546Sopenharmony_ciset_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) 62bf215546Sopenharmony_ci{ 63bf215546Sopenharmony_ci struct radv_userdata_locations *locs = &args->user_sgprs_locs; 64bf215546Sopenharmony_ci struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; 65bf215546Sopenharmony_ci assert(ud_info); 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci set_loc(ud_info, sgpr_idx, 1); 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci locs->descriptor_sets_enabled |= 1u << idx; 70bf215546Sopenharmony_ci} 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_cistruct user_sgpr_info { 73bf215546Sopenharmony_ci uint64_t inline_push_constant_mask; 74bf215546Sopenharmony_ci bool inlined_all_push_consts; 75bf215546Sopenharmony_ci bool indirect_all_descriptor_sets; 76bf215546Sopenharmony_ci uint8_t remaining_sgprs; 77bf215546Sopenharmony_ci}; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_cistatic uint8_t 80bf215546Sopenharmony_cicount_vs_user_sgprs(const struct radv_shader_info *info) 81bf215546Sopenharmony_ci{ 82bf215546Sopenharmony_ci uint8_t count = 1; /* vertex offset */ 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci if (info->vs.vb_desc_usage_mask) 85bf215546Sopenharmony_ci count++; 86bf215546Sopenharmony_ci if (info->vs.needs_draw_id) 87bf215546Sopenharmony_ci count++; 88bf215546Sopenharmony_ci if (info->vs.needs_base_instance) 89bf215546Sopenharmony_ci count++; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci return count; 92bf215546Sopenharmony_ci} 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistatic uint8_t 95bf215546Sopenharmony_cicount_ms_user_sgprs(const struct radv_shader_info *info) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci uint8_t count = 1 + 3; /* firstTask + num_work_groups[3] */ 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci if (info->vs.needs_draw_id) 100bf215546Sopenharmony_ci count++; 101bf215546Sopenharmony_ci if (info->cs.uses_task_rings) 102bf215546Sopenharmony_ci count++; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci return count; 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistatic unsigned 108bf215546Sopenharmony_cicount_ngg_sgprs(const struct radv_shader_info *info, bool has_ngg_query) 109bf215546Sopenharmony_ci{ 110bf215546Sopenharmony_ci unsigned count = 0; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci if (has_ngg_query) 113bf215546Sopenharmony_ci count += 1; /* ngg_query_state */ 114bf215546Sopenharmony_ci if (info->has_ngg_culling) 115bf215546Sopenharmony_ci count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */ 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci return count; 118bf215546Sopenharmony_ci} 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_cistatic void 121bf215546Sopenharmony_ciallocate_inline_push_consts(const struct radv_shader_info *info, 122bf215546Sopenharmony_ci struct user_sgpr_info *user_sgpr_info) 123bf215546Sopenharmony_ci{ 124bf215546Sopenharmony_ci uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci if (!info->inline_push_constant_mask) 127bf215546Sopenharmony_ci return; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci uint64_t mask = info->inline_push_constant_mask; 130bf215546Sopenharmony_ci uint8_t num_push_consts = util_bitcount64(mask); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci /* Disable the default push constants path if all constants can be inlined and if shaders don't 133bf215546Sopenharmony_ci * use dynamic descriptors. 134bf215546Sopenharmony_ci */ 135bf215546Sopenharmony_ci if (num_push_consts <= MIN2(remaining_sgprs + 1, AC_MAX_INLINE_PUSH_CONSTS) && 136bf215546Sopenharmony_ci info->can_inline_all_push_constants && !info->loads_dynamic_offsets) { 137bf215546Sopenharmony_ci user_sgpr_info->inlined_all_push_consts = true; 138bf215546Sopenharmony_ci remaining_sgprs++; 139bf215546Sopenharmony_ci } else { 140bf215546Sopenharmony_ci /* Clamp to the maximum number of allowed inlined push constants. */ 141bf215546Sopenharmony_ci while (num_push_consts > MIN2(remaining_sgprs, AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT)) { 142bf215546Sopenharmony_ci num_push_consts--; 143bf215546Sopenharmony_ci mask &= ~BITFIELD64_BIT(util_last_bit64(mask) - 1); 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci } 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci user_sgpr_info->remaining_sgprs = remaining_sgprs - util_bitcount64(mask); 148bf215546Sopenharmony_ci user_sgpr_info->inline_push_constant_mask = mask; 149bf215546Sopenharmony_ci} 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_cistatic void 152bf215546Sopenharmony_ciallocate_user_sgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, 153bf215546Sopenharmony_ci struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage, 154bf215546Sopenharmony_ci gl_shader_stage previous_stage, bool needs_view_index, bool has_ngg_query, 155bf215546Sopenharmony_ci struct user_sgpr_info *user_sgpr_info) 156bf215546Sopenharmony_ci{ 157bf215546Sopenharmony_ci uint8_t user_sgpr_count = 0; 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci /* 2 user sgprs will always be allocated for scratch/rings */ 162bf215546Sopenharmony_ci user_sgpr_count += 2; 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci if (stage == MESA_SHADER_TASK) 165bf215546Sopenharmony_ci user_sgpr_count += 2; /* task descriptors */ 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* prolog inputs */ 168bf215546Sopenharmony_ci if (info->vs.has_prolog) 169bf215546Sopenharmony_ci user_sgpr_count += 2; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci switch (stage) { 172bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 173bf215546Sopenharmony_ci case MESA_SHADER_TASK: 174bf215546Sopenharmony_ci if (info->cs.uses_sbt) 175bf215546Sopenharmony_ci user_sgpr_count += 2; 176bf215546Sopenharmony_ci if (info->cs.uses_grid_size) 177bf215546Sopenharmony_ci user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2; 178bf215546Sopenharmony_ci if (info->cs.uses_ray_launch_size) 179bf215546Sopenharmony_ci user_sgpr_count += 2; 180bf215546Sopenharmony_ci if (info->vs.needs_draw_id) 181bf215546Sopenharmony_ci user_sgpr_count += 1; 182bf215546Sopenharmony_ci if (info->cs.uses_task_rings) 183bf215546Sopenharmony_ci user_sgpr_count += 4; /* ring_entry, 2x ib_addr, ib_stride */ 184bf215546Sopenharmony_ci break; 185bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 186bf215546Sopenharmony_ci /* epilog continue PC */ 187bf215546Sopenharmony_ci if (info->ps.has_epilog) 188bf215546Sopenharmony_ci user_sgpr_count += 1; 189bf215546Sopenharmony_ci break; 190bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 191bf215546Sopenharmony_ci if (!args->is_gs_copy_shader) 192bf215546Sopenharmony_ci user_sgpr_count += count_vs_user_sgprs(info); 193bf215546Sopenharmony_ci break; 194bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 195bf215546Sopenharmony_ci if (has_previous_stage) { 196bf215546Sopenharmony_ci if (previous_stage == MESA_SHADER_VERTEX) 197bf215546Sopenharmony_ci user_sgpr_count += count_vs_user_sgprs(info); 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci break; 200bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 201bf215546Sopenharmony_ci break; 202bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 203bf215546Sopenharmony_ci if (has_previous_stage) { 204bf215546Sopenharmony_ci if (info->is_ngg) 205bf215546Sopenharmony_ci user_sgpr_count += count_ngg_sgprs(info, has_ngg_query); 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci if (previous_stage == MESA_SHADER_VERTEX) { 208bf215546Sopenharmony_ci user_sgpr_count += count_vs_user_sgprs(info); 209bf215546Sopenharmony_ci } else if (previous_stage == MESA_SHADER_MESH) { 210bf215546Sopenharmony_ci user_sgpr_count += count_ms_user_sgprs(info); 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci break; 214bf215546Sopenharmony_ci default: 215bf215546Sopenharmony_ci break; 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci if (needs_view_index) 219bf215546Sopenharmony_ci user_sgpr_count++; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci if (info->force_vrs_per_vertex) 222bf215546Sopenharmony_ci user_sgpr_count++; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci if (info->loads_push_constants) 225bf215546Sopenharmony_ci user_sgpr_count++; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci if (info->so.num_outputs) 228bf215546Sopenharmony_ci user_sgpr_count++; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci uint32_t available_sgprs = 231bf215546Sopenharmony_ci gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16; 232bf215546Sopenharmony_ci uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; 233bf215546Sopenharmony_ci uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask); 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci if (remaining_sgprs < num_desc_set) { 236bf215546Sopenharmony_ci user_sgpr_info->indirect_all_descriptor_sets = true; 237bf215546Sopenharmony_ci user_sgpr_info->remaining_sgprs = remaining_sgprs - 1; 238bf215546Sopenharmony_ci } else { 239bf215546Sopenharmony_ci user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set; 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci allocate_inline_push_consts(info, user_sgpr_info); 243bf215546Sopenharmony_ci} 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_cistatic void 246bf215546Sopenharmony_cideclare_global_input_sgprs(const struct radv_shader_info *info, 247bf215546Sopenharmony_ci const struct user_sgpr_info *user_sgpr_info, 248bf215546Sopenharmony_ci struct radv_shader_args *args) 249bf215546Sopenharmony_ci{ 250bf215546Sopenharmony_ci /* 1 for each descriptor set */ 251bf215546Sopenharmony_ci if (!user_sgpr_info->indirect_all_descriptor_sets) { 252bf215546Sopenharmony_ci uint32_t mask = info->desc_set_used_mask; 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci while (mask) { 255bf215546Sopenharmony_ci int i = u_bit_scan(&mask); 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]); 258bf215546Sopenharmony_ci } 259bf215546Sopenharmony_ci } else { 260bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]); 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) { 264bf215546Sopenharmony_ci /* 1 for push constants and dynamic descriptors */ 265bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants); 266bf215546Sopenharmony_ci } 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci for (unsigned i = 0; i < util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) { 269bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]); 270bf215546Sopenharmony_ci } 271bf215546Sopenharmony_ci args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask; 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci if (info->so.num_outputs) { 274bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers); 275bf215546Sopenharmony_ci } 276bf215546Sopenharmony_ci} 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_cistatic void 279bf215546Sopenharmony_cideclare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, 280bf215546Sopenharmony_ci gl_shader_stage stage, bool has_previous_stage, 281bf215546Sopenharmony_ci gl_shader_stage previous_stage) 282bf215546Sopenharmony_ci{ 283bf215546Sopenharmony_ci if (info->vs.has_prolog) 284bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs); 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || 287bf215546Sopenharmony_ci (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { 288bf215546Sopenharmony_ci if (info->vs.vb_desc_usage_mask) { 289bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers); 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); 292bf215546Sopenharmony_ci if (info->vs.needs_draw_id) { 293bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci if (info->vs.needs_base_instance) { 296bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci} 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_cistatic void 302bf215546Sopenharmony_cideclare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info, 303bf215546Sopenharmony_ci struct radv_shader_args *args) 304bf215546Sopenharmony_ci{ 305bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); 306bf215546Sopenharmony_ci if (!args->is_gs_copy_shader) { 307bf215546Sopenharmony_ci if (info->vs.as_ls) { 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci if (gfx_level >= GFX11) { 310bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ 311bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ 312bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 313bf215546Sopenharmony_ci } else if (gfx_level >= GFX10) { 314bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); 315bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 316bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 317bf215546Sopenharmony_ci } else { 318bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); 319bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 320bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci } else { 323bf215546Sopenharmony_ci if (gfx_level >= GFX10) { 324bf215546Sopenharmony_ci if (info->is_ngg) { 325bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 326bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 327bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 328bf215546Sopenharmony_ci } else { 329bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 330bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id); 331bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci } else { 334bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); 335bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id); 336bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci } 339bf215546Sopenharmony_ci } 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci if (info->vs.dynamic_inputs) { 342bf215546Sopenharmony_ci assert(info->vs.use_per_attribute_vb_descs); 343bf215546Sopenharmony_ci unsigned num_attributes = util_last_bit(info->vs.vb_desc_usage_mask); 344bf215546Sopenharmony_ci for (unsigned i = 0; i < num_attributes; i++) 345bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]); 346bf215546Sopenharmony_ci /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one 347bf215546Sopenharmony_ci * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8. 348bf215546Sopenharmony_ci */ 349bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); 350bf215546Sopenharmony_ci } 351bf215546Sopenharmony_ci} 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_cistatic void 354bf215546Sopenharmony_cideclare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, 355bf215546Sopenharmony_ci gl_shader_stage stage) 356bf215546Sopenharmony_ci{ 357bf215546Sopenharmony_ci int i; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci /* Streamout SGPRs. */ 360bf215546Sopenharmony_ci if (info->so.num_outputs) { 361bf215546Sopenharmony_ci assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL); 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config); 364bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index); 365bf215546Sopenharmony_ci } else if (stage == MESA_SHADER_TESS_EVAL) { 366bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); 367bf215546Sopenharmony_ci } 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci /* A streamout buffer offset is loaded if the stride is non-zero. */ 370bf215546Sopenharmony_ci for (i = 0; i < 4; i++) { 371bf215546Sopenharmony_ci if (!info->so.strides[i]) 372bf215546Sopenharmony_ci continue; 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]); 375bf215546Sopenharmony_ci } 376bf215546Sopenharmony_ci} 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_cistatic void 379bf215546Sopenharmony_cideclare_tes_input_vgprs(struct radv_shader_args *args) 380bf215546Sopenharmony_ci{ 381bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u); 382bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v); 383bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id); 384bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id); 385bf215546Sopenharmony_ci} 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_cistatic void 388bf215546Sopenharmony_cideclare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args) 389bf215546Sopenharmony_ci{ 390bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); 391bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups); 392bf215546Sopenharmony_ci if (info->vs.needs_draw_id) { 393bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); 394bf215546Sopenharmony_ci } 395bf215546Sopenharmony_ci if (info->cs.uses_task_rings) { 396bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry); 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci} 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_cistatic void 401bf215546Sopenharmony_cideclare_ms_input_vgprs(struct radv_shader_args *args) 402bf215546Sopenharmony_ci{ 403bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); 404bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 405bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ 406bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* instance_id */ 407bf215546Sopenharmony_ci} 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_cistatic void 410bf215546Sopenharmony_cideclare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args) 411bf215546Sopenharmony_ci{ 412bf215546Sopenharmony_ci unsigned spi_ps_input = info->ps.spi_ps_input; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); 415bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); 416bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); 417bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model); 418bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample); 419bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center); 420bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid); 421bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */ 422bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]); 423bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]); 424bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]); 425bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]); 426bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face); 427bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary); 428bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); 429bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci if (args->remap_spi_ps_input) { 432bf215546Sopenharmony_ci /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then 433bf215546Sopenharmony_ci * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the 434bf215546Sopenharmony_ci * VGPR arguments here. 435bf215546Sopenharmony_ci */ 436bf215546Sopenharmony_ci unsigned arg_count = 0; 437bf215546Sopenharmony_ci for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) { 438bf215546Sopenharmony_ci if (args->ac.args[i].file != AC_ARG_VGPR) { 439bf215546Sopenharmony_ci arg_count++; 440bf215546Sopenharmony_ci continue; 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci if (!(spi_ps_input & (1 << vgpr_arg))) { 444bf215546Sopenharmony_ci args->ac.args[i].skip = true; 445bf215546Sopenharmony_ci } else { 446bf215546Sopenharmony_ci args->ac.args[i].offset = vgpr_reg; 447bf215546Sopenharmony_ci vgpr_reg += args->ac.args[i].size; 448bf215546Sopenharmony_ci arg_count++; 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci vgpr_arg++; 451bf215546Sopenharmony_ci } 452bf215546Sopenharmony_ci } 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci if (info->ps.has_epilog) { 455bf215546Sopenharmony_ci /* FIXME: Ensure the main shader doesn't have less VGPRs than the epilog */ 456bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX_RTS; i++) 457bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, NULL); 458bf215546Sopenharmony_ci } 459bf215546Sopenharmony_ci} 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_cistatic void 462bf215546Sopenharmony_cideclare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, 463bf215546Sopenharmony_ci bool has_ngg_query) 464bf215546Sopenharmony_ci{ 465bf215546Sopenharmony_ci if (has_ngg_query) 466bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_query_state); 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci if (info->has_ngg_culling) { 469bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings); 470bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]); 471bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]); 472bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]); 473bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]); 474bf215546Sopenharmony_ci } 475bf215546Sopenharmony_ci} 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_cistatic void 478bf215546Sopenharmony_ciset_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info, 479bf215546Sopenharmony_ci uint8_t *user_sgpr_idx) 480bf215546Sopenharmony_ci{ 481bf215546Sopenharmony_ci if (!user_sgpr_info->indirect_all_descriptor_sets) { 482bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) { 483bf215546Sopenharmony_ci if (args->descriptor_sets[i].used) 484bf215546Sopenharmony_ci set_loc_desc(args, i, user_sgpr_idx); 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci } else { 487bf215546Sopenharmony_ci set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx); 488bf215546Sopenharmony_ci } 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci if (args->ac.push_constants.used) { 491bf215546Sopenharmony_ci set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci if (user_sgpr_info->inline_push_constant_mask) { 495bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, 496bf215546Sopenharmony_ci util_bitcount64(user_sgpr_info->inline_push_constant_mask)); 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci if (args->streamout_buffers.used) { 500bf215546Sopenharmony_ci set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx); 501bf215546Sopenharmony_ci } 502bf215546Sopenharmony_ci} 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_cistatic void 505bf215546Sopenharmony_ciset_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage, 506bf215546Sopenharmony_ci bool has_previous_stage, gl_shader_stage previous_stage, 507bf215546Sopenharmony_ci uint8_t *user_sgpr_idx) 508bf215546Sopenharmony_ci{ 509bf215546Sopenharmony_ci if (args->prolog_inputs.used) 510bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2); 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || 513bf215546Sopenharmony_ci (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { 514bf215546Sopenharmony_ci if (args->ac.vertex_buffers.used) { 515bf215546Sopenharmony_ci set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx); 516bf215546Sopenharmony_ci } 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used + 519bf215546Sopenharmony_ci args->ac.start_instance.used; 520bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num); 521bf215546Sopenharmony_ci } 522bf215546Sopenharmony_ci} 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_cistatic void 525bf215546Sopenharmony_ciset_ms_input_locs(struct radv_shader_args *args, uint8_t *user_sgpr_idx) 526bf215546Sopenharmony_ci{ 527bf215546Sopenharmony_ci unsigned vs_num = 528bf215546Sopenharmony_ci args->ac.base_vertex.used + 3 * args->ac.num_work_groups.used + args->ac.draw_id.used; 529bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num); 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci if (args->ac.task_ring_entry.used) 532bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_TASK_RING_ENTRY, user_sgpr_idx, 1); 533bf215546Sopenharmony_ci} 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_civoid 536bf215546Sopenharmony_ciradv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipeline_key *key, 537bf215546Sopenharmony_ci const struct radv_shader_info *info, gl_shader_stage stage, 538bf215546Sopenharmony_ci bool has_previous_stage, gl_shader_stage previous_stage, 539bf215546Sopenharmony_ci struct radv_shader_args *args) 540bf215546Sopenharmony_ci{ 541bf215546Sopenharmony_ci struct user_sgpr_info user_sgpr_info; 542bf215546Sopenharmony_ci bool needs_view_index = info->uses_view_index; 543bf215546Sopenharmony_ci bool has_ngg_query = stage == MESA_SHADER_GEOMETRY || key->primitives_generated_query; 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) { 546bf215546Sopenharmony_ci /* Handle all NGG shaders as GS to simplify the code here. */ 547bf215546Sopenharmony_ci previous_stage = stage; 548bf215546Sopenharmony_ci stage = MESA_SHADER_GEOMETRY; 549bf215546Sopenharmony_ci has_previous_stage = true; 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci for (int i = 0; i < MAX_SETS; i++) 553bf215546Sopenharmony_ci args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; 554bf215546Sopenharmony_ci for (int i = 0; i < AC_UD_MAX_UD; i++) 555bf215546Sopenharmony_ci args->user_sgprs_locs.shader_data[i].sgpr_idx = -1; 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci allocate_user_sgprs(gfx_level, info, args, stage, has_previous_stage, previous_stage, 558bf215546Sopenharmony_ci needs_view_index, has_ngg_query, &user_sgpr_info); 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci if (args->explicit_scratch_args) { 561bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); 562bf215546Sopenharmony_ci } 563bf215546Sopenharmony_ci if (stage == MESA_SHADER_TASK) { 564bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets); 565bf215546Sopenharmony_ci } 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other 568bf215546Sopenharmony_ci * sgprs. 569bf215546Sopenharmony_ci */ 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci switch (stage) { 572bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 573bf215546Sopenharmony_ci case MESA_SHADER_TASK: 574bf215546Sopenharmony_ci declare_global_input_sgprs(info, &user_sgpr_info, args); 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci if (info->cs.uses_sbt) { 577bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.sbt_descriptors); 578bf215546Sopenharmony_ci } 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci if (info->cs.uses_grid_size) { 581bf215546Sopenharmony_ci if (args->load_grid_size_from_user_sgpr) 582bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups); 583bf215546Sopenharmony_ci else 584bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.num_work_groups); 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci if (info->cs.uses_ray_launch_size) { 588bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr); 589bf215546Sopenharmony_ci } 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci if (info->vs.needs_draw_id) { 592bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci if (info->cs.uses_task_rings) { 596bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry); 597bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr); 598bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride); 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) { 602bf215546Sopenharmony_ci if (info->cs.uses_block_id[i]) { 603bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]); 604bf215546Sopenharmony_ci } 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci if (info->cs.uses_local_invocation_idx) { 608bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size); 609bf215546Sopenharmony_ci } 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci if (args->explicit_scratch_args && gfx_level < GFX11) { 612bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 613bf215546Sopenharmony_ci } 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci if (gfx_level >= GFX11) 616bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids); 617bf215546Sopenharmony_ci else 618bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids); 619bf215546Sopenharmony_ci break; 620bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 621bf215546Sopenharmony_ci /* NGG is handled by the GS case */ 622bf215546Sopenharmony_ci assert(!info->is_ngg); 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci declare_global_input_sgprs(info, &user_sgpr_info, args); 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci if (needs_view_index) { 629bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 630bf215546Sopenharmony_ci } 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_ci if (info->force_vrs_per_vertex) { 633bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates); 634bf215546Sopenharmony_ci } 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci if (info->vs.as_es) { 637bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); 638bf215546Sopenharmony_ci } else if (info->vs.as_ls) { 639bf215546Sopenharmony_ci /* no extra parameters */ 640bf215546Sopenharmony_ci } else { 641bf215546Sopenharmony_ci declare_streamout_sgprs(info, args, stage); 642bf215546Sopenharmony_ci } 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci if (args->explicit_scratch_args) { 645bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 646bf215546Sopenharmony_ci } 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci declare_vs_input_vgprs(gfx_level, info, args); 649bf215546Sopenharmony_ci break; 650bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 651bf215546Sopenharmony_ci if (has_previous_stage) { 652bf215546Sopenharmony_ci // First 6 system regs 653bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 654bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info); 655bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci if (gfx_level >= GFX11) { 658bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id); 659bf215546Sopenharmony_ci } else { 660bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 661bf215546Sopenharmony_ci } 662bf215546Sopenharmony_ci 663bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 664bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci declare_global_input_sgprs(info, &user_sgpr_info, args); 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_ci if (needs_view_index) { 671bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 672bf215546Sopenharmony_ci } 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); 675bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci declare_vs_input_vgprs(gfx_level, info, args); 678bf215546Sopenharmony_ci } else { 679bf215546Sopenharmony_ci declare_global_input_sgprs(info, &user_sgpr_info, args); 680bf215546Sopenharmony_ci 681bf215546Sopenharmony_ci if (needs_view_index) { 682bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 683bf215546Sopenharmony_ci } 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 686bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); 687bf215546Sopenharmony_ci if (args->explicit_scratch_args) { 688bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 689bf215546Sopenharmony_ci } 690bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); 691bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); 692bf215546Sopenharmony_ci } 693bf215546Sopenharmony_ci break; 694bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 695bf215546Sopenharmony_ci /* NGG is handled by the GS case */ 696bf215546Sopenharmony_ci assert(!info->is_ngg); 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci declare_global_input_sgprs(info, &user_sgpr_info, args); 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci if (needs_view_index) 701bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci if (info->tes.as_es) { 704bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 705bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); 706bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); 707bf215546Sopenharmony_ci } else { 708bf215546Sopenharmony_ci declare_streamout_sgprs(info, args, stage); 709bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 710bf215546Sopenharmony_ci } 711bf215546Sopenharmony_ci if (args->explicit_scratch_args) { 712bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 713bf215546Sopenharmony_ci } 714bf215546Sopenharmony_ci declare_tes_input_vgprs(args); 715bf215546Sopenharmony_ci break; 716bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 717bf215546Sopenharmony_ci if (has_previous_stage) { 718bf215546Sopenharmony_ci // First 6 system regs 719bf215546Sopenharmony_ci if (info->is_ngg) { 720bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info); 721bf215546Sopenharmony_ci } else { 722bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); 723bf215546Sopenharmony_ci } 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info); 726bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci if (gfx_level < GFX11) { 729bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 730bf215546Sopenharmony_ci } 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 733bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_ci if (previous_stage == MESA_SHADER_VERTEX) { 736bf215546Sopenharmony_ci declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); 737bf215546Sopenharmony_ci } else if (previous_stage == MESA_SHADER_MESH) { 738bf215546Sopenharmony_ci declare_ms_input_sgprs(info, args); 739bf215546Sopenharmony_ci } 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci declare_global_input_sgprs(info, &user_sgpr_info, args); 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci if (needs_view_index) { 744bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci if (info->force_vrs_per_vertex) { 748bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates); 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci if (info->is_ngg) { 752bf215546Sopenharmony_ci declare_ngg_sgprs(info, args, has_ngg_query); 753bf215546Sopenharmony_ci } 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); 756bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); 757bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); 758bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); 759bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci if (previous_stage == MESA_SHADER_VERTEX) { 762bf215546Sopenharmony_ci declare_vs_input_vgprs(gfx_level, info, args); 763bf215546Sopenharmony_ci } else if (previous_stage == MESA_SHADER_TESS_EVAL) { 764bf215546Sopenharmony_ci declare_tes_input_vgprs(args); 765bf215546Sopenharmony_ci } else if (previous_stage == MESA_SHADER_MESH) { 766bf215546Sopenharmony_ci declare_ms_input_vgprs(args); 767bf215546Sopenharmony_ci } 768bf215546Sopenharmony_ci } else { 769bf215546Sopenharmony_ci declare_global_input_sgprs(info, &user_sgpr_info, args); 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_ci if (needs_view_index) { 772bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); 773bf215546Sopenharmony_ci } 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci if (info->force_vrs_per_vertex) { 776bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.force_vrs_rates); 777bf215546Sopenharmony_ci } 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); 780bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id); 781bf215546Sopenharmony_ci if (args->explicit_scratch_args) { 782bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 783bf215546Sopenharmony_ci } 784bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); 785bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); 786bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); 787bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); 788bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]); 789bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]); 790bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]); 791bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); 792bf215546Sopenharmony_ci } 793bf215546Sopenharmony_ci break; 794bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 795bf215546Sopenharmony_ci declare_global_input_sgprs(info, &user_sgpr_info, args); 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_ci if (info->ps.has_epilog) { 798bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ps_epilog_pc); 799bf215546Sopenharmony_ci } 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); 802bf215546Sopenharmony_ci if (args->explicit_scratch_args && gfx_level < GFX11) { 803bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 804bf215546Sopenharmony_ci } 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci declare_ps_input_vgprs(info, args); 807bf215546Sopenharmony_ci break; 808bf215546Sopenharmony_ci default: 809bf215546Sopenharmony_ci unreachable("Shader stage not implemented"); 810bf215546Sopenharmony_ci } 811bf215546Sopenharmony_ci 812bf215546Sopenharmony_ci uint8_t user_sgpr_idx = 0; 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx); 815bf215546Sopenharmony_ci if (stage == MESA_SHADER_TASK) { 816bf215546Sopenharmony_ci set_loc_shader_ptr(args, AC_UD_CS_TASK_RING_OFFSETS, &user_sgpr_idx); 817bf215546Sopenharmony_ci } 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including 820bf215546Sopenharmony_ci * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ 821bf215546Sopenharmony_ci if (has_previous_stage) 822bf215546Sopenharmony_ci user_sgpr_idx = 0; 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX)) 825bf215546Sopenharmony_ci set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx); 826bf215546Sopenharmony_ci else if (has_previous_stage && previous_stage == MESA_SHADER_MESH) 827bf215546Sopenharmony_ci set_ms_input_locs(args, &user_sgpr_idx); 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx); 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci switch (stage) { 832bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 833bf215546Sopenharmony_ci case MESA_SHADER_TASK: 834bf215546Sopenharmony_ci if (args->ac.sbt_descriptors.used) { 835bf215546Sopenharmony_ci set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx); 836bf215546Sopenharmony_ci } 837bf215546Sopenharmony_ci if (args->ac.num_work_groups.used) { 838bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 839bf215546Sopenharmony_ci args->load_grid_size_from_user_sgpr ? 3 : 2); 840bf215546Sopenharmony_ci } 841bf215546Sopenharmony_ci if (args->ac.ray_launch_size_addr.used) { 842bf215546Sopenharmony_ci set_loc_shader_ptr(args, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, &user_sgpr_idx); 843bf215546Sopenharmony_ci } 844bf215546Sopenharmony_ci if (args->ac.draw_id.used) { 845bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1); 846bf215546Sopenharmony_ci } 847bf215546Sopenharmony_ci if (args->ac.task_ring_entry.used) { 848bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1); 849bf215546Sopenharmony_ci } 850bf215546Sopenharmony_ci if (args->task_ib_addr.used) { 851bf215546Sopenharmony_ci assert(args->task_ib_stride.used); 852bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3); 853bf215546Sopenharmony_ci } 854bf215546Sopenharmony_ci break; 855bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 856bf215546Sopenharmony_ci if (args->ac.view_index.used) 857bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 858bf215546Sopenharmony_ci if (args->ac.force_vrs_rates.used) 859bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1); 860bf215546Sopenharmony_ci break; 861bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 862bf215546Sopenharmony_ci if (args->ac.view_index.used) 863bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 864bf215546Sopenharmony_ci break; 865bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 866bf215546Sopenharmony_ci if (args->ac.view_index.used) 867bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 868bf215546Sopenharmony_ci break; 869bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 870bf215546Sopenharmony_ci if (args->ac.view_index.used) 871bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_ci if (args->ac.force_vrs_rates.used) 874bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_FORCE_VRS_RATES, &user_sgpr_idx, 1); 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_ci if (args->ngg_query_state.used) { 877bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_NGG_QUERY_STATE, &user_sgpr_idx, 1); 878bf215546Sopenharmony_ci } 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci if (args->ngg_culling_settings.used) { 881bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1); 882bf215546Sopenharmony_ci } 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci if (args->ngg_viewport_scale[0].used) { 885bf215546Sopenharmony_ci assert(args->ngg_viewport_scale[1].used && 886bf215546Sopenharmony_ci args->ngg_viewport_translate[0].used && 887bf215546Sopenharmony_ci args->ngg_viewport_translate[1].used); 888bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4); 889bf215546Sopenharmony_ci } 890bf215546Sopenharmony_ci break; 891bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 892bf215546Sopenharmony_ci if (args->ps_epilog_pc.used) 893bf215546Sopenharmony_ci set_loc_shader(args, AC_UD_PS_EPILOG_PC, &user_sgpr_idx, 1); 894bf215546Sopenharmony_ci break; 895bf215546Sopenharmony_ci default: 896bf215546Sopenharmony_ci unreachable("Shader stage not implemented"); 897bf215546Sopenharmony_ci } 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci args->num_user_sgprs = user_sgpr_idx; 900bf215546Sopenharmony_ci} 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_civoid 903bf215546Sopenharmony_ciradv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_epilog_key *key, 904bf215546Sopenharmony_ci struct radv_shader_args *args) 905bf215546Sopenharmony_ci{ 906bf215546Sopenharmony_ci unsigned num_inputs = 0; 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); 909bf215546Sopenharmony_ci if (gfx_level < GFX11) 910bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci /* Declare VGPR arguments for color exports. */ 913bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX_RTS; i++) { 914bf215546Sopenharmony_ci unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf; 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci if (col_format == V_028714_SPI_SHADER_ZERO) 917bf215546Sopenharmony_ci continue; 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->ps_epilog_inputs[num_inputs]); 920bf215546Sopenharmony_ci num_inputs++; 921bf215546Sopenharmony_ci } 922bf215546Sopenharmony_ci} 923