1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "aco_instruction_selection.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "common/ac_nir.h" 28bf215546Sopenharmony_ci#include "common/sid.h" 29bf215546Sopenharmony_ci#include "vulkan/radv_descriptor_set.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "nir_control_flow.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#include <vector> 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cinamespace aco { 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_cinamespace { 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cibool 40bf215546Sopenharmony_ciis_loop_header_block(nir_block* block) 41bf215546Sopenharmony_ci{ 42bf215546Sopenharmony_ci return block->cf_node.parent->type == nir_cf_node_loop && 43bf215546Sopenharmony_ci block == nir_loop_first_block(nir_cf_node_as_loop(block->cf_node.parent)); 44bf215546Sopenharmony_ci} 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci/* similar to nir_block_is_unreachable(), but does not require dominance information */ 47bf215546Sopenharmony_cibool 48bf215546Sopenharmony_ciis_block_reachable(nir_function_impl* impl, nir_block* known_reachable, nir_block* block) 49bf215546Sopenharmony_ci{ 50bf215546Sopenharmony_ci if (block == nir_start_block(impl) || block == known_reachable) 51bf215546Sopenharmony_ci return true; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci /* skip loop back-edges */ 54bf215546Sopenharmony_ci if (is_loop_header_block(block)) { 55bf215546Sopenharmony_ci nir_loop* loop = nir_cf_node_as_loop(block->cf_node.parent); 56bf215546Sopenharmony_ci nir_block* preheader = nir_block_cf_tree_prev(nir_loop_first_block(loop)); 57bf215546Sopenharmony_ci return is_block_reachable(impl, known_reachable, preheader); 58bf215546Sopenharmony_ci } 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci set_foreach (block->predecessors, entry) { 61bf215546Sopenharmony_ci if (is_block_reachable(impl, known_reachable, (nir_block*)entry->key)) 62bf215546Sopenharmony_ci return true; 63bf215546Sopenharmony_ci } 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci return false; 66bf215546Sopenharmony_ci} 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci/* Check whether the given SSA def is only used by cross-lane instructions. */ 69bf215546Sopenharmony_cibool 70bf215546Sopenharmony_cionly_used_by_cross_lane_instrs(nir_ssa_def* ssa, bool follow_phis = true) 71bf215546Sopenharmony_ci{ 72bf215546Sopenharmony_ci nir_foreach_use (src, ssa) { 73bf215546Sopenharmony_ci switch (src->parent_instr->type) { 74bf215546Sopenharmony_ci case nir_instr_type_alu: { 75bf215546Sopenharmony_ci nir_alu_instr* alu = nir_instr_as_alu(src->parent_instr); 76bf215546Sopenharmony_ci if (alu->op != nir_op_unpack_64_2x32_split_x && alu->op != nir_op_unpack_64_2x32_split_y) 77bf215546Sopenharmony_ci return false; 78bf215546Sopenharmony_ci if (!only_used_by_cross_lane_instrs(&alu->dest.dest.ssa, follow_phis)) 79bf215546Sopenharmony_ci return false; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci continue; 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 84bf215546Sopenharmony_ci nir_intrinsic_instr* intrin = nir_instr_as_intrinsic(src->parent_instr); 85bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_read_invocation && 86bf215546Sopenharmony_ci intrin->intrinsic != nir_intrinsic_read_first_invocation && 87bf215546Sopenharmony_ci intrin->intrinsic != nir_intrinsic_lane_permute_16_amd) 88bf215546Sopenharmony_ci return false; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci continue; 91bf215546Sopenharmony_ci } 92bf215546Sopenharmony_ci case nir_instr_type_phi: { 93bf215546Sopenharmony_ci /* Don't follow more than 1 phis, this avoids infinite loops. */ 94bf215546Sopenharmony_ci if (!follow_phis) 95bf215546Sopenharmony_ci return false; 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci nir_phi_instr* phi = nir_instr_as_phi(src->parent_instr); 98bf215546Sopenharmony_ci if (!only_used_by_cross_lane_instrs(&phi->dest.ssa, false)) 99bf215546Sopenharmony_ci return false; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci continue; 102bf215546Sopenharmony_ci } 103bf215546Sopenharmony_ci default: return false; 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci } 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci return true; 108bf215546Sopenharmony_ci} 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci/* If one side of a divergent IF ends in a branch and the other doesn't, we 111bf215546Sopenharmony_ci * might have to emit the contents of the side without the branch at the merge 112bf215546Sopenharmony_ci * block instead. This is so that we can use any SGPR live-out of the side 113bf215546Sopenharmony_ci * without the branch without creating a linear phi in the invert or merge block. */ 114bf215546Sopenharmony_cibool 115bf215546Sopenharmony_cisanitize_if(nir_function_impl* impl, nir_if* nif) 116bf215546Sopenharmony_ci{ 117bf215546Sopenharmony_ci // TODO: skip this if the condition is uniform and there are no divergent breaks/continues? 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci nir_block* then_block = nir_if_last_then_block(nif); 120bf215546Sopenharmony_ci nir_block* else_block = nir_if_last_else_block(nif); 121bf215546Sopenharmony_ci bool then_jump = nir_block_ends_in_jump(then_block) || 122bf215546Sopenharmony_ci !is_block_reachable(impl, nir_if_first_then_block(nif), then_block); 123bf215546Sopenharmony_ci bool else_jump = nir_block_ends_in_jump(else_block) || 124bf215546Sopenharmony_ci !is_block_reachable(impl, nir_if_first_else_block(nif), else_block); 125bf215546Sopenharmony_ci if (then_jump == else_jump) 126bf215546Sopenharmony_ci return false; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci /* If the continue from block is empty then return as there is nothing to 129bf215546Sopenharmony_ci * move. 130bf215546Sopenharmony_ci */ 131bf215546Sopenharmony_ci if (nir_cf_list_is_empty_block(else_jump ? &nif->then_list : &nif->else_list)) 132bf215546Sopenharmony_ci return false; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci /* Even though this if statement has a jump on one side, we may still have 135bf215546Sopenharmony_ci * phis afterwards. Single-source phis can be produced by loop unrolling 136bf215546Sopenharmony_ci * or dead control-flow passes and are perfectly legal. Run a quick phi 137bf215546Sopenharmony_ci * removal on the block after the if to clean up any such phis. 138bf215546Sopenharmony_ci */ 139bf215546Sopenharmony_ci nir_opt_remove_phis_block(nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node))); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci /* Finally, move the continue from branch after the if-statement. */ 142bf215546Sopenharmony_ci nir_block* last_continue_from_blk = else_jump ? then_block : else_block; 143bf215546Sopenharmony_ci nir_block* first_continue_from_blk = 144bf215546Sopenharmony_ci else_jump ? nir_if_first_then_block(nif) : nir_if_first_else_block(nif); 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci nir_cf_list tmp; 147bf215546Sopenharmony_ci nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk), 148bf215546Sopenharmony_ci nir_after_block(last_continue_from_blk)); 149bf215546Sopenharmony_ci nir_cf_reinsert(&tmp, nir_after_cf_node(&nif->cf_node)); 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci return true; 152bf215546Sopenharmony_ci} 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_cibool 155bf215546Sopenharmony_cisanitize_cf_list(nir_function_impl* impl, struct exec_list* cf_list) 156bf215546Sopenharmony_ci{ 157bf215546Sopenharmony_ci bool progress = false; 158bf215546Sopenharmony_ci foreach_list_typed (nir_cf_node, cf_node, node, cf_list) { 159bf215546Sopenharmony_ci switch (cf_node->type) { 160bf215546Sopenharmony_ci case nir_cf_node_block: break; 161bf215546Sopenharmony_ci case nir_cf_node_if: { 162bf215546Sopenharmony_ci nir_if* nif = nir_cf_node_as_if(cf_node); 163bf215546Sopenharmony_ci progress |= sanitize_cf_list(impl, &nif->then_list); 164bf215546Sopenharmony_ci progress |= sanitize_cf_list(impl, &nif->else_list); 165bf215546Sopenharmony_ci progress |= sanitize_if(impl, nif); 166bf215546Sopenharmony_ci break; 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci case nir_cf_node_loop: { 169bf215546Sopenharmony_ci nir_loop* loop = nir_cf_node_as_loop(cf_node); 170bf215546Sopenharmony_ci progress |= sanitize_cf_list(impl, &loop->body); 171bf215546Sopenharmony_ci break; 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci case nir_cf_node_function: unreachable("Invalid cf type"); 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci return progress; 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_civoid 181bf215546Sopenharmony_ciapply_nuw_to_ssa(isel_context* ctx, nir_ssa_def* ssa) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci nir_ssa_scalar scalar; 184bf215546Sopenharmony_ci scalar.def = ssa; 185bf215546Sopenharmony_ci scalar.comp = 0; 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci if (!nir_ssa_scalar_is_alu(scalar) || nir_ssa_scalar_alu_op(scalar) != nir_op_iadd) 188bf215546Sopenharmony_ci return; 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci nir_alu_instr* add = nir_instr_as_alu(ssa->parent_instr); 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci if (add->no_unsigned_wrap) 193bf215546Sopenharmony_ci return; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0); 196bf215546Sopenharmony_ci nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1); 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(src0)) { 199bf215546Sopenharmony_ci nir_ssa_scalar tmp = src0; 200bf215546Sopenharmony_ci src0 = src1; 201bf215546Sopenharmony_ci src1 = tmp; 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci uint32_t src1_ub = nir_unsigned_upper_bound(ctx->shader, ctx->range_ht, src1, &ctx->ub_config); 205bf215546Sopenharmony_ci add->no_unsigned_wrap = 206bf215546Sopenharmony_ci !nir_addition_might_overflow(ctx->shader, ctx->range_ht, src0, src1_ub, &ctx->ub_config); 207bf215546Sopenharmony_ci} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_civoid 210bf215546Sopenharmony_ciapply_nuw_to_offsets(isel_context* ctx, nir_function_impl* impl) 211bf215546Sopenharmony_ci{ 212bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 213bf215546Sopenharmony_ci nir_foreach_instr (instr, block) { 214bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 215bf215546Sopenharmony_ci continue; 216bf215546Sopenharmony_ci nir_intrinsic_instr* intrin = nir_instr_as_intrinsic(instr); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci switch (intrin->intrinsic) { 219bf215546Sopenharmony_ci case nir_intrinsic_load_constant: 220bf215546Sopenharmony_ci case nir_intrinsic_load_uniform: 221bf215546Sopenharmony_ci case nir_intrinsic_load_push_constant: 222bf215546Sopenharmony_ci if (!nir_src_is_divergent(intrin->src[0])) 223bf215546Sopenharmony_ci apply_nuw_to_ssa(ctx, intrin->src[0].ssa); 224bf215546Sopenharmony_ci break; 225bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 226bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 227bf215546Sopenharmony_ci if (!nir_src_is_divergent(intrin->src[1])) 228bf215546Sopenharmony_ci apply_nuw_to_ssa(ctx, intrin->src[1].ssa); 229bf215546Sopenharmony_ci break; 230bf215546Sopenharmony_ci case nir_intrinsic_store_ssbo: 231bf215546Sopenharmony_ci if (!nir_src_is_divergent(intrin->src[2])) 232bf215546Sopenharmony_ci apply_nuw_to_ssa(ctx, intrin->src[2].ssa); 233bf215546Sopenharmony_ci break; 234bf215546Sopenharmony_ci default: break; 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci} 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ciRegClass 241bf215546Sopenharmony_ciget_reg_class(isel_context* ctx, RegType type, unsigned components, unsigned bitsize) 242bf215546Sopenharmony_ci{ 243bf215546Sopenharmony_ci if (bitsize == 1) 244bf215546Sopenharmony_ci return RegClass(RegType::sgpr, ctx->program->lane_mask.size() * components); 245bf215546Sopenharmony_ci else 246bf215546Sopenharmony_ci return RegClass::get(type, components * bitsize / 8u); 247bf215546Sopenharmony_ci} 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_civoid 250bf215546Sopenharmony_cisetup_vs_output_info(isel_context* ctx, nir_shader* nir, 251bf215546Sopenharmony_ci const aco_vp_output_info* outinfo) 252bf215546Sopenharmony_ci{ 253bf215546Sopenharmony_ci ctx->export_clip_dists = outinfo->export_clip_dists; 254bf215546Sopenharmony_ci ctx->num_clip_distances = util_bitcount(outinfo->clip_dist_mask); 255bf215546Sopenharmony_ci ctx->num_cull_distances = util_bitcount(outinfo->cull_dist_mask); 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci assert(ctx->num_clip_distances + ctx->num_cull_distances <= 8); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci /* GFX10+ early rasterization: 260bf215546Sopenharmony_ci * When there are no param exports in an NGG (or legacy VS) shader, 261bf215546Sopenharmony_ci * RADV sets NO_PC_EXPORT=1, which means the HW will start clipping and rasterization 262bf215546Sopenharmony_ci * as soon as it encounters a DONE pos export. When this happens, PS waves can launch 263bf215546Sopenharmony_ci * before the NGG (or VS) waves finish. 264bf215546Sopenharmony_ci */ 265bf215546Sopenharmony_ci ctx->program->early_rast = ctx->program->gfx_level >= GFX10 && outinfo->param_exports == 0; 266bf215546Sopenharmony_ci} 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_civoid 269bf215546Sopenharmony_cisetup_vs_variables(isel_context* ctx, nir_shader* nir) 270bf215546Sopenharmony_ci{ 271bf215546Sopenharmony_ci if (ctx->stage == vertex_vs || ctx->stage == vertex_ngg) { 272bf215546Sopenharmony_ci setup_vs_output_info(ctx, nir, &ctx->program->info.vs.outinfo); 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci /* TODO: NGG streamout */ 275bf215546Sopenharmony_ci if (ctx->stage.hw == HWStage::NGG) 276bf215546Sopenharmony_ci assert(!ctx->program->info.so.num_outputs); 277bf215546Sopenharmony_ci } 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci if (ctx->stage == vertex_ngg) { 280bf215546Sopenharmony_ci ctx->program->config->lds_size = 281bf215546Sopenharmony_ci DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule); 282bf215546Sopenharmony_ci assert((ctx->program->config->lds_size * ctx->program->dev.lds_encoding_granule) < 283bf215546Sopenharmony_ci (32 * 1024)); 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci} 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_civoid 288bf215546Sopenharmony_cisetup_gs_variables(isel_context* ctx, nir_shader* nir) 289bf215546Sopenharmony_ci{ 290bf215546Sopenharmony_ci if (ctx->stage == vertex_geometry_gs || ctx->stage == tess_eval_geometry_gs) { 291bf215546Sopenharmony_ci ctx->program->config->lds_size = 292bf215546Sopenharmony_ci ctx->program->info.gfx9_gs_ring_lds_size; /* Already in units of the alloc granularity */ 293bf215546Sopenharmony_ci } else if (ctx->stage == vertex_geometry_ngg || ctx->stage == tess_eval_geometry_ngg) { 294bf215546Sopenharmony_ci setup_vs_output_info(ctx, nir, &ctx->program->info.vs.outinfo); 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci ctx->program->config->lds_size = 297bf215546Sopenharmony_ci DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule); 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci} 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_civoid 302bf215546Sopenharmony_cisetup_tcs_info(isel_context* ctx, nir_shader* nir, nir_shader* vs) 303bf215546Sopenharmony_ci{ 304bf215546Sopenharmony_ci ctx->tcs_in_out_eq = ctx->program->info.vs.tcs_in_out_eq; 305bf215546Sopenharmony_ci ctx->tcs_temp_only_inputs = ctx->program->info.vs.tcs_temp_only_input_mask; 306bf215546Sopenharmony_ci ctx->tcs_num_patches = ctx->program->info.num_tess_patches; 307bf215546Sopenharmony_ci ctx->program->config->lds_size = ctx->program->info.tcs.num_lds_blocks; 308bf215546Sopenharmony_ci} 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_civoid 311bf215546Sopenharmony_cisetup_tes_variables(isel_context* ctx, nir_shader* nir) 312bf215546Sopenharmony_ci{ 313bf215546Sopenharmony_ci ctx->tcs_num_patches = ctx->program->info.num_tess_patches; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci if (ctx->stage == tess_eval_vs || ctx->stage == tess_eval_ngg) { 316bf215546Sopenharmony_ci setup_vs_output_info(ctx, nir, &ctx->program->info.tes.outinfo); 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci /* TODO: NGG streamout */ 319bf215546Sopenharmony_ci if (ctx->stage.hw == HWStage::NGG) 320bf215546Sopenharmony_ci assert(!ctx->program->info.so.num_outputs); 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci if (ctx->stage == tess_eval_ngg) { 324bf215546Sopenharmony_ci ctx->program->config->lds_size = 325bf215546Sopenharmony_ci DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule); 326bf215546Sopenharmony_ci assert((ctx->program->config->lds_size * ctx->program->dev.lds_encoding_granule) < 327bf215546Sopenharmony_ci (32 * 1024)); 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci} 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_civoid 332bf215546Sopenharmony_cisetup_ms_variables(isel_context* ctx, nir_shader* nir) 333bf215546Sopenharmony_ci{ 334bf215546Sopenharmony_ci setup_vs_output_info(ctx, nir, &ctx->program->info.ms.outinfo); 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci ctx->program->config->lds_size = 337bf215546Sopenharmony_ci DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule); 338bf215546Sopenharmony_ci assert((ctx->program->config->lds_size * ctx->program->dev.lds_encoding_granule) < (32 * 1024)); 339bf215546Sopenharmony_ci} 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_civoid 342bf215546Sopenharmony_cisetup_variables(isel_context* ctx, nir_shader* nir) 343bf215546Sopenharmony_ci{ 344bf215546Sopenharmony_ci switch (nir->info.stage) { 345bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: { 346bf215546Sopenharmony_ci break; 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 349bf215546Sopenharmony_ci case MESA_SHADER_TASK: { 350bf215546Sopenharmony_ci ctx->program->config->lds_size = 351bf215546Sopenharmony_ci DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule); 352bf215546Sopenharmony_ci break; 353bf215546Sopenharmony_ci } 354bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: { 355bf215546Sopenharmony_ci setup_vs_variables(ctx, nir); 356bf215546Sopenharmony_ci break; 357bf215546Sopenharmony_ci } 358bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: { 359bf215546Sopenharmony_ci setup_gs_variables(ctx, nir); 360bf215546Sopenharmony_ci break; 361bf215546Sopenharmony_ci } 362bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: { 363bf215546Sopenharmony_ci break; 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: { 366bf215546Sopenharmony_ci setup_tes_variables(ctx, nir); 367bf215546Sopenharmony_ci break; 368bf215546Sopenharmony_ci } 369bf215546Sopenharmony_ci case MESA_SHADER_MESH: { 370bf215546Sopenharmony_ci setup_ms_variables(ctx, nir); 371bf215546Sopenharmony_ci break; 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci default: unreachable("Unhandled shader stage."); 374bf215546Sopenharmony_ci } 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci /* Make sure we fit the available LDS space. */ 377bf215546Sopenharmony_ci assert((ctx->program->config->lds_size * ctx->program->dev.lds_encoding_granule) <= 378bf215546Sopenharmony_ci ctx->program->dev.lds_limit); 379bf215546Sopenharmony_ci} 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_civoid 382bf215546Sopenharmony_cisetup_nir(isel_context* ctx, nir_shader* nir) 383bf215546Sopenharmony_ci{ 384bf215546Sopenharmony_ci /* the variable setup has to be done before lower_io / CSE */ 385bf215546Sopenharmony_ci setup_variables(ctx, nir); 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci nir_convert_to_lcssa(nir, true, false); 388bf215546Sopenharmony_ci nir_lower_phis_to_scalar(nir, true); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci nir_function_impl* func = nir_shader_get_entrypoint(nir); 391bf215546Sopenharmony_ci nir_index_ssa_defs(func); 392bf215546Sopenharmony_ci} 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci} /* end namespace */ 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_civoid 397bf215546Sopenharmony_ciinit_context(isel_context* ctx, nir_shader* shader) 398bf215546Sopenharmony_ci{ 399bf215546Sopenharmony_ci nir_function_impl* impl = nir_shader_get_entrypoint(shader); 400bf215546Sopenharmony_ci ctx->shader = shader; 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci /* Init NIR range analysis. */ 403bf215546Sopenharmony_ci ctx->range_ht = _mesa_pointer_hash_table_create(NULL); 404bf215546Sopenharmony_ci ctx->ub_config.min_subgroup_size = 64; 405bf215546Sopenharmony_ci ctx->ub_config.max_subgroup_size = 64; 406bf215546Sopenharmony_ci if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->program->info.cs.subgroup_size) { 407bf215546Sopenharmony_ci ctx->ub_config.min_subgroup_size = ctx->program->info.cs.subgroup_size; 408bf215546Sopenharmony_ci ctx->ub_config.max_subgroup_size = ctx->program->info.cs.subgroup_size; 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci ctx->ub_config.max_workgroup_invocations = 2048; 411bf215546Sopenharmony_ci ctx->ub_config.max_workgroup_count[0] = 65535; 412bf215546Sopenharmony_ci ctx->ub_config.max_workgroup_count[1] = 65535; 413bf215546Sopenharmony_ci ctx->ub_config.max_workgroup_count[2] = 65535; 414bf215546Sopenharmony_ci ctx->ub_config.max_workgroup_size[0] = 2048; 415bf215546Sopenharmony_ci ctx->ub_config.max_workgroup_size[1] = 2048; 416bf215546Sopenharmony_ci ctx->ub_config.max_workgroup_size[2] = 2048; 417bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) { 418bf215546Sopenharmony_ci unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[i]; 419bf215546Sopenharmony_ci unsigned dfmt = attrib_format & 0xf; 420bf215546Sopenharmony_ci unsigned nfmt = (attrib_format >> 4) & 0x7; 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci uint32_t max = UINT32_MAX; 423bf215546Sopenharmony_ci if (nfmt == V_008F0C_BUF_NUM_FORMAT_UNORM) { 424bf215546Sopenharmony_ci max = 0x3f800000u; 425bf215546Sopenharmony_ci } else if (nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED) { 426bf215546Sopenharmony_ci bool uscaled = nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED; 427bf215546Sopenharmony_ci switch (dfmt) { 428bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_8: 429bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_8_8: 430bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: max = uscaled ? 0x437f0000u : UINT8_MAX; break; 431bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: 432bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: max = uscaled ? 0x447fc000u : 1023; break; 433bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_10_11_11: 434bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_11_11_10: max = uscaled ? 0x44ffe000u : 2047; break; 435bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_16: 436bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_16_16: 437bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: max = uscaled ? 0x477fff00u : UINT16_MAX; break; 438bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_32: 439bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_32_32: 440bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_32_32_32: 441bf215546Sopenharmony_ci case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: max = uscaled ? 0x4f800000u : UINT32_MAX; break; 442bf215546Sopenharmony_ci } 443bf215546Sopenharmony_ci } 444bf215546Sopenharmony_ci ctx->ub_config.vertex_attrib_max[i] = max; 445bf215546Sopenharmony_ci } 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci nir_divergence_analysis(shader); 448bf215546Sopenharmony_ci nir_opt_uniform_atomics(shader); 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci apply_nuw_to_offsets(ctx, impl); 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci /* sanitize control flow */ 453bf215546Sopenharmony_ci sanitize_cf_list(impl, &impl->body); 454bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_none); 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci /* we'll need these for isel */ 457bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_block_index); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci if (!ctx->stage.has(SWStage::GSCopy) && ctx->options->dump_preoptir) { 460bf215546Sopenharmony_ci fprintf(stderr, "NIR shader before instruction selection:\n"); 461bf215546Sopenharmony_ci nir_print_shader(shader, stderr); 462bf215546Sopenharmony_ci } 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci ctx->first_temp_id = ctx->program->peekAllocationId(); 465bf215546Sopenharmony_ci ctx->program->allocateRange(impl->ssa_alloc); 466bf215546Sopenharmony_ci RegClass* regclasses = ctx->program->temp_rc.data() + ctx->first_temp_id; 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci std::unique_ptr<unsigned[]> nir_to_aco{new unsigned[impl->num_blocks]()}; 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci /* TODO: make this recursive to improve compile times */ 471bf215546Sopenharmony_ci bool done = false; 472bf215546Sopenharmony_ci while (!done) { 473bf215546Sopenharmony_ci done = true; 474bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 475bf215546Sopenharmony_ci nir_foreach_instr (instr, block) { 476bf215546Sopenharmony_ci switch (instr->type) { 477bf215546Sopenharmony_ci case nir_instr_type_alu: { 478bf215546Sopenharmony_ci nir_alu_instr* alu_instr = nir_instr_as_alu(instr); 479bf215546Sopenharmony_ci RegType type = 480bf215546Sopenharmony_ci nir_dest_is_divergent(alu_instr->dest.dest) ? RegType::vgpr : RegType::sgpr; 481bf215546Sopenharmony_ci switch (alu_instr->op) { 482bf215546Sopenharmony_ci case nir_op_fmul: 483bf215546Sopenharmony_ci case nir_op_fmulz: 484bf215546Sopenharmony_ci case nir_op_fadd: 485bf215546Sopenharmony_ci case nir_op_fsub: 486bf215546Sopenharmony_ci case nir_op_ffma: 487bf215546Sopenharmony_ci case nir_op_ffmaz: 488bf215546Sopenharmony_ci case nir_op_fmax: 489bf215546Sopenharmony_ci case nir_op_fmin: 490bf215546Sopenharmony_ci case nir_op_fneg: 491bf215546Sopenharmony_ci case nir_op_fabs: 492bf215546Sopenharmony_ci case nir_op_fsat: 493bf215546Sopenharmony_ci case nir_op_fsign: 494bf215546Sopenharmony_ci case nir_op_frcp: 495bf215546Sopenharmony_ci case nir_op_frsq: 496bf215546Sopenharmony_ci case nir_op_fsqrt: 497bf215546Sopenharmony_ci case nir_op_fexp2: 498bf215546Sopenharmony_ci case nir_op_flog2: 499bf215546Sopenharmony_ci case nir_op_ffract: 500bf215546Sopenharmony_ci case nir_op_ffloor: 501bf215546Sopenharmony_ci case nir_op_fceil: 502bf215546Sopenharmony_ci case nir_op_ftrunc: 503bf215546Sopenharmony_ci case nir_op_fround_even: 504bf215546Sopenharmony_ci case nir_op_fsin_amd: 505bf215546Sopenharmony_ci case nir_op_fcos_amd: 506bf215546Sopenharmony_ci case nir_op_f2f16: 507bf215546Sopenharmony_ci case nir_op_f2f16_rtz: 508bf215546Sopenharmony_ci case nir_op_f2f16_rtne: 509bf215546Sopenharmony_ci case nir_op_f2f32: 510bf215546Sopenharmony_ci case nir_op_f2f64: 511bf215546Sopenharmony_ci case nir_op_u2f16: 512bf215546Sopenharmony_ci case nir_op_u2f32: 513bf215546Sopenharmony_ci case nir_op_u2f64: 514bf215546Sopenharmony_ci case nir_op_i2f16: 515bf215546Sopenharmony_ci case nir_op_i2f32: 516bf215546Sopenharmony_ci case nir_op_i2f64: 517bf215546Sopenharmony_ci case nir_op_pack_half_2x16_split: 518bf215546Sopenharmony_ci case nir_op_pack_unorm_2x16: 519bf215546Sopenharmony_ci case nir_op_pack_snorm_2x16: 520bf215546Sopenharmony_ci case nir_op_pack_uint_2x16: 521bf215546Sopenharmony_ci case nir_op_pack_sint_2x16: 522bf215546Sopenharmony_ci case nir_op_unpack_half_2x16_split_x: 523bf215546Sopenharmony_ci case nir_op_unpack_half_2x16_split_y: 524bf215546Sopenharmony_ci case nir_op_fddx: 525bf215546Sopenharmony_ci case nir_op_fddy: 526bf215546Sopenharmony_ci case nir_op_fddx_fine: 527bf215546Sopenharmony_ci case nir_op_fddy_fine: 528bf215546Sopenharmony_ci case nir_op_fddx_coarse: 529bf215546Sopenharmony_ci case nir_op_fddy_coarse: 530bf215546Sopenharmony_ci case nir_op_fquantize2f16: 531bf215546Sopenharmony_ci case nir_op_ldexp: 532bf215546Sopenharmony_ci case nir_op_frexp_sig: 533bf215546Sopenharmony_ci case nir_op_frexp_exp: 534bf215546Sopenharmony_ci case nir_op_cube_face_index_amd: 535bf215546Sopenharmony_ci case nir_op_cube_face_coord_amd: 536bf215546Sopenharmony_ci case nir_op_sad_u8x4: 537bf215546Sopenharmony_ci case nir_op_udot_4x8_uadd: 538bf215546Sopenharmony_ci case nir_op_sdot_4x8_iadd: 539bf215546Sopenharmony_ci case nir_op_udot_4x8_uadd_sat: 540bf215546Sopenharmony_ci case nir_op_sdot_4x8_iadd_sat: 541bf215546Sopenharmony_ci case nir_op_udot_2x16_uadd: 542bf215546Sopenharmony_ci case nir_op_sdot_2x16_iadd: 543bf215546Sopenharmony_ci case nir_op_udot_2x16_uadd_sat: 544bf215546Sopenharmony_ci case nir_op_sdot_2x16_iadd_sat: type = RegType::vgpr; break; 545bf215546Sopenharmony_ci case nir_op_f2i16: 546bf215546Sopenharmony_ci case nir_op_f2u16: 547bf215546Sopenharmony_ci case nir_op_f2i32: 548bf215546Sopenharmony_ci case nir_op_f2u32: 549bf215546Sopenharmony_ci case nir_op_f2i64: 550bf215546Sopenharmony_ci case nir_op_f2u64: 551bf215546Sopenharmony_ci case nir_op_b2i8: 552bf215546Sopenharmony_ci case nir_op_b2i16: 553bf215546Sopenharmony_ci case nir_op_b2i32: 554bf215546Sopenharmony_ci case nir_op_b2i64: 555bf215546Sopenharmony_ci case nir_op_b2b32: 556bf215546Sopenharmony_ci case nir_op_b2f16: 557bf215546Sopenharmony_ci case nir_op_b2f32: 558bf215546Sopenharmony_ci case nir_op_mov: break; 559bf215546Sopenharmony_ci case nir_op_iabs: 560bf215546Sopenharmony_ci case nir_op_iadd: 561bf215546Sopenharmony_ci case nir_op_iadd_sat: 562bf215546Sopenharmony_ci case nir_op_uadd_sat: 563bf215546Sopenharmony_ci case nir_op_isub: 564bf215546Sopenharmony_ci case nir_op_isub_sat: 565bf215546Sopenharmony_ci case nir_op_usub_sat: 566bf215546Sopenharmony_ci case nir_op_imul: 567bf215546Sopenharmony_ci case nir_op_imin: 568bf215546Sopenharmony_ci case nir_op_imax: 569bf215546Sopenharmony_ci case nir_op_umin: 570bf215546Sopenharmony_ci case nir_op_umax: 571bf215546Sopenharmony_ci case nir_op_ishl: 572bf215546Sopenharmony_ci case nir_op_ishr: 573bf215546Sopenharmony_ci case nir_op_ushr: 574bf215546Sopenharmony_ci /* packed 16bit instructions have to be VGPR */ 575bf215546Sopenharmony_ci type = alu_instr->dest.dest.ssa.num_components == 2 ? RegType::vgpr : type; 576bf215546Sopenharmony_ci FALLTHROUGH; 577bf215546Sopenharmony_ci default: 578bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) { 579bf215546Sopenharmony_ci if (regclasses[alu_instr->src[i].src.ssa->index].type() == RegType::vgpr) 580bf215546Sopenharmony_ci type = RegType::vgpr; 581bf215546Sopenharmony_ci } 582bf215546Sopenharmony_ci break; 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci RegClass rc = get_reg_class(ctx, type, alu_instr->dest.dest.ssa.num_components, 586bf215546Sopenharmony_ci alu_instr->dest.dest.ssa.bit_size); 587bf215546Sopenharmony_ci regclasses[alu_instr->dest.dest.ssa.index] = rc; 588bf215546Sopenharmony_ci break; 589bf215546Sopenharmony_ci } 590bf215546Sopenharmony_ci case nir_instr_type_load_const: { 591bf215546Sopenharmony_ci unsigned num_components = nir_instr_as_load_const(instr)->def.num_components; 592bf215546Sopenharmony_ci unsigned bit_size = nir_instr_as_load_const(instr)->def.bit_size; 593bf215546Sopenharmony_ci RegClass rc = get_reg_class(ctx, RegType::sgpr, num_components, bit_size); 594bf215546Sopenharmony_ci regclasses[nir_instr_as_load_const(instr)->def.index] = rc; 595bf215546Sopenharmony_ci break; 596bf215546Sopenharmony_ci } 597bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 598bf215546Sopenharmony_ci nir_intrinsic_instr* intrinsic = nir_instr_as_intrinsic(instr); 599bf215546Sopenharmony_ci if (!nir_intrinsic_infos[intrinsic->intrinsic].has_dest) 600bf215546Sopenharmony_ci break; 601bf215546Sopenharmony_ci RegType type = RegType::sgpr; 602bf215546Sopenharmony_ci switch (intrinsic->intrinsic) { 603bf215546Sopenharmony_ci case nir_intrinsic_load_push_constant: 604bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_id: 605bf215546Sopenharmony_ci case nir_intrinsic_load_num_workgroups: 606bf215546Sopenharmony_ci case nir_intrinsic_load_ray_launch_size_addr_amd: 607bf215546Sopenharmony_ci case nir_intrinsic_load_sbt_base_amd: 608bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_id: 609bf215546Sopenharmony_ci case nir_intrinsic_load_num_subgroups: 610bf215546Sopenharmony_ci case nir_intrinsic_load_first_vertex: 611bf215546Sopenharmony_ci case nir_intrinsic_load_base_instance: 612bf215546Sopenharmony_ci case nir_intrinsic_vote_all: 613bf215546Sopenharmony_ci case nir_intrinsic_vote_any: 614bf215546Sopenharmony_ci case nir_intrinsic_read_first_invocation: 615bf215546Sopenharmony_ci case nir_intrinsic_read_invocation: 616bf215546Sopenharmony_ci case nir_intrinsic_first_invocation: 617bf215546Sopenharmony_ci case nir_intrinsic_ballot: 618bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_samples: 619bf215546Sopenharmony_ci case nir_intrinsic_has_input_vertex_amd: 620bf215546Sopenharmony_ci case nir_intrinsic_has_input_primitive_amd: 621bf215546Sopenharmony_ci case nir_intrinsic_load_force_vrs_rates_amd: 622bf215546Sopenharmony_ci case nir_intrinsic_load_scalar_arg_amd: 623bf215546Sopenharmony_ci case nir_intrinsic_load_smem_amd: type = RegType::sgpr; break; 624bf215546Sopenharmony_ci case nir_intrinsic_load_sample_id: 625bf215546Sopenharmony_ci case nir_intrinsic_load_input: 626bf215546Sopenharmony_ci case nir_intrinsic_load_output: 627bf215546Sopenharmony_ci case nir_intrinsic_load_input_vertex: 628bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: 629bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_output: 630bf215546Sopenharmony_ci case nir_intrinsic_load_vertex_id_zero_base: 631bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_sample: 632bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_pixel: 633bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_model: 634bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_centroid: 635bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_sample: 636bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_offset: 637bf215546Sopenharmony_ci case nir_intrinsic_load_interpolated_input: 638bf215546Sopenharmony_ci case nir_intrinsic_load_frag_coord: 639bf215546Sopenharmony_ci case nir_intrinsic_load_frag_shading_rate: 640bf215546Sopenharmony_ci case nir_intrinsic_load_sample_pos: 641bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_id: 642bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_index: 643bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_invocation: 644bf215546Sopenharmony_ci case nir_intrinsic_load_tess_coord: 645bf215546Sopenharmony_ci case nir_intrinsic_write_invocation_amd: 646bf215546Sopenharmony_ci case nir_intrinsic_mbcnt_amd: 647bf215546Sopenharmony_ci case nir_intrinsic_byte_permute_amd: 648bf215546Sopenharmony_ci case nir_intrinsic_lane_permute_16_amd: 649bf215546Sopenharmony_ci case nir_intrinsic_load_instance_id: 650bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_add: 651bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imin: 652bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umin: 653bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imax: 654bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umax: 655bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_and: 656bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_or: 657bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_xor: 658bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_exchange: 659bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_comp_swap: 660bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fmin: 661bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fmax: 662bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_add_amd: 663bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imin_amd: 664bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umin_amd: 665bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imax_amd: 666bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umax_amd: 667bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_and_amd: 668bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_or_amd: 669bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_xor_amd: 670bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_exchange_amd: 671bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_comp_swap_amd: 672bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmin_amd: 673bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmax_amd: 674bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_add: 675bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_umin: 676bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_imin: 677bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_umax: 678bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_imax: 679bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_and: 680bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_or: 681bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_xor: 682bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_exchange: 683bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_comp_swap: 684bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_fmin: 685bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_fmax: 686bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_size: 687bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_add: 688bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_imin: 689bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_umin: 690bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_imax: 691bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_umax: 692bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_and: 693bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_or: 694bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_xor: 695bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_exchange: 696bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_comp_swap: 697bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fadd: 698bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fmin: 699bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fmax: 700bf215546Sopenharmony_ci case nir_intrinsic_load_scratch: 701bf215546Sopenharmony_ci case nir_intrinsic_load_invocation_id: 702bf215546Sopenharmony_ci case nir_intrinsic_load_primitive_id: 703bf215546Sopenharmony_ci case nir_intrinsic_load_buffer_amd: 704bf215546Sopenharmony_ci case nir_intrinsic_load_initial_edgeflags_amd: 705bf215546Sopenharmony_ci case nir_intrinsic_gds_atomic_add_amd: 706bf215546Sopenharmony_ci case nir_intrinsic_bvh64_intersect_ray_amd: 707bf215546Sopenharmony_ci case nir_intrinsic_load_vector_arg_amd: type = RegType::vgpr; break; 708bf215546Sopenharmony_ci case nir_intrinsic_load_shared: 709bf215546Sopenharmony_ci case nir_intrinsic_load_shared2_amd: 710bf215546Sopenharmony_ci /* When the result of these loads is only used by cross-lane instructions, 711bf215546Sopenharmony_ci * it is beneficial to use a VGPR destination. This is because this allows 712bf215546Sopenharmony_ci * to put the s_waitcnt further down, which decreases latency. 713bf215546Sopenharmony_ci */ 714bf215546Sopenharmony_ci if (only_used_by_cross_lane_instrs(&intrinsic->dest.ssa)) { 715bf215546Sopenharmony_ci type = RegType::vgpr; 716bf215546Sopenharmony_ci break; 717bf215546Sopenharmony_ci } 718bf215546Sopenharmony_ci FALLTHROUGH; 719bf215546Sopenharmony_ci case nir_intrinsic_shuffle: 720bf215546Sopenharmony_ci case nir_intrinsic_quad_broadcast: 721bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_horizontal: 722bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_vertical: 723bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_diagonal: 724bf215546Sopenharmony_ci case nir_intrinsic_quad_swizzle_amd: 725bf215546Sopenharmony_ci case nir_intrinsic_masked_swizzle_amd: 726bf215546Sopenharmony_ci case nir_intrinsic_inclusive_scan: 727bf215546Sopenharmony_ci case nir_intrinsic_exclusive_scan: 728bf215546Sopenharmony_ci case nir_intrinsic_reduce: 729bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 730bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 731bf215546Sopenharmony_ci case nir_intrinsic_load_global_amd: 732bf215546Sopenharmony_ci type = nir_dest_is_divergent(intrinsic->dest) ? RegType::vgpr : RegType::sgpr; 733bf215546Sopenharmony_ci break; 734bf215546Sopenharmony_ci case nir_intrinsic_load_view_index: 735bf215546Sopenharmony_ci type = ctx->stage == fragment_fs ? RegType::vgpr : RegType::sgpr; 736bf215546Sopenharmony_ci break; 737bf215546Sopenharmony_ci default: 738bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_intrinsic_infos[intrinsic->intrinsic].num_srcs; 739bf215546Sopenharmony_ci i++) { 740bf215546Sopenharmony_ci if (regclasses[intrinsic->src[i].ssa->index].type() == RegType::vgpr) 741bf215546Sopenharmony_ci type = RegType::vgpr; 742bf215546Sopenharmony_ci } 743bf215546Sopenharmony_ci break; 744bf215546Sopenharmony_ci } 745bf215546Sopenharmony_ci RegClass rc = get_reg_class(ctx, type, intrinsic->dest.ssa.num_components, 746bf215546Sopenharmony_ci intrinsic->dest.ssa.bit_size); 747bf215546Sopenharmony_ci regclasses[intrinsic->dest.ssa.index] = rc; 748bf215546Sopenharmony_ci break; 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci case nir_instr_type_tex: { 751bf215546Sopenharmony_ci nir_tex_instr* tex = nir_instr_as_tex(instr); 752bf215546Sopenharmony_ci RegType type = nir_dest_is_divergent(tex->dest) ? RegType::vgpr : RegType::sgpr; 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci if (tex->op == nir_texop_texture_samples) { 755bf215546Sopenharmony_ci assert(!tex->dest.ssa.divergent); 756bf215546Sopenharmony_ci } 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci RegClass rc = 759bf215546Sopenharmony_ci get_reg_class(ctx, type, tex->dest.ssa.num_components, tex->dest.ssa.bit_size); 760bf215546Sopenharmony_ci regclasses[tex->dest.ssa.index] = rc; 761bf215546Sopenharmony_ci break; 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci case nir_instr_type_parallel_copy: { 764bf215546Sopenharmony_ci nir_foreach_parallel_copy_entry (entry, nir_instr_as_parallel_copy(instr)) { 765bf215546Sopenharmony_ci regclasses[entry->dest.ssa.index] = regclasses[entry->src.ssa->index]; 766bf215546Sopenharmony_ci } 767bf215546Sopenharmony_ci break; 768bf215546Sopenharmony_ci } 769bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: { 770bf215546Sopenharmony_ci unsigned num_components = nir_instr_as_ssa_undef(instr)->def.num_components; 771bf215546Sopenharmony_ci unsigned bit_size = nir_instr_as_ssa_undef(instr)->def.bit_size; 772bf215546Sopenharmony_ci RegClass rc = get_reg_class(ctx, RegType::sgpr, num_components, bit_size); 773bf215546Sopenharmony_ci regclasses[nir_instr_as_ssa_undef(instr)->def.index] = rc; 774bf215546Sopenharmony_ci break; 775bf215546Sopenharmony_ci } 776bf215546Sopenharmony_ci case nir_instr_type_phi: { 777bf215546Sopenharmony_ci nir_phi_instr* phi = nir_instr_as_phi(instr); 778bf215546Sopenharmony_ci RegType type = RegType::sgpr; 779bf215546Sopenharmony_ci unsigned num_components = phi->dest.ssa.num_components; 780bf215546Sopenharmony_ci assert((phi->dest.ssa.bit_size != 1 || num_components == 1) && 781bf215546Sopenharmony_ci "Multiple components not supported on boolean phis."); 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci if (nir_dest_is_divergent(phi->dest)) { 784bf215546Sopenharmony_ci type = RegType::vgpr; 785bf215546Sopenharmony_ci } else { 786bf215546Sopenharmony_ci nir_foreach_phi_src (src, phi) { 787bf215546Sopenharmony_ci if (regclasses[src->src.ssa->index].type() == RegType::vgpr) 788bf215546Sopenharmony_ci type = RegType::vgpr; 789bf215546Sopenharmony_ci } 790bf215546Sopenharmony_ci } 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci RegClass rc = get_reg_class(ctx, type, num_components, phi->dest.ssa.bit_size); 793bf215546Sopenharmony_ci if (rc != regclasses[phi->dest.ssa.index]) 794bf215546Sopenharmony_ci done = false; 795bf215546Sopenharmony_ci regclasses[phi->dest.ssa.index] = rc; 796bf215546Sopenharmony_ci break; 797bf215546Sopenharmony_ci } 798bf215546Sopenharmony_ci default: break; 799bf215546Sopenharmony_ci } 800bf215546Sopenharmony_ci } 801bf215546Sopenharmony_ci } 802bf215546Sopenharmony_ci } 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci ctx->program->config->spi_ps_input_ena = ctx->program->info.ps.spi_ps_input; 805bf215546Sopenharmony_ci ctx->program->config->spi_ps_input_addr = ctx->program->info.ps.spi_ps_input; 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci ctx->cf_info.nir_to_aco = std::move(nir_to_aco); 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci /* align and copy constant data */ 810bf215546Sopenharmony_ci while (ctx->program->constant_data.size() % 4u) 811bf215546Sopenharmony_ci ctx->program->constant_data.push_back(0); 812bf215546Sopenharmony_ci ctx->constant_data_offset = ctx->program->constant_data.size(); 813bf215546Sopenharmony_ci ctx->program->constant_data.insert(ctx->program->constant_data.end(), 814bf215546Sopenharmony_ci (uint8_t*)shader->constant_data, 815bf215546Sopenharmony_ci (uint8_t*)shader->constant_data + shader->constant_data_size); 816bf215546Sopenharmony_ci} 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_civoid 819bf215546Sopenharmony_cicleanup_context(isel_context* ctx) 820bf215546Sopenharmony_ci{ 821bf215546Sopenharmony_ci _mesa_hash_table_destroy(ctx->range_ht, NULL); 822bf215546Sopenharmony_ci} 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ciisel_context 825bf215546Sopenharmony_cisetup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders, 826bf215546Sopenharmony_ci ac_shader_config* config, const struct aco_compiler_options* options, 827bf215546Sopenharmony_ci const struct aco_shader_info* info, 828bf215546Sopenharmony_ci const struct radv_shader_args* args, bool is_gs_copy_shader, 829bf215546Sopenharmony_ci bool is_ps_epilog) 830bf215546Sopenharmony_ci{ 831bf215546Sopenharmony_ci SWStage sw_stage = SWStage::None; 832bf215546Sopenharmony_ci for (unsigned i = 0; i < shader_count; i++) { 833bf215546Sopenharmony_ci switch (shaders[i]->info.stage) { 834bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: sw_stage = sw_stage | SWStage::VS; break; 835bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: sw_stage = sw_stage | SWStage::TCS; break; 836bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: sw_stage = sw_stage | SWStage::TES; break; 837bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 838bf215546Sopenharmony_ci sw_stage = sw_stage | (is_gs_copy_shader ? SWStage::GSCopy : SWStage::GS); 839bf215546Sopenharmony_ci break; 840bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: sw_stage = sw_stage | SWStage::FS; break; 841bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: sw_stage = sw_stage | SWStage::CS; break; 842bf215546Sopenharmony_ci case MESA_SHADER_TASK: sw_stage = sw_stage | SWStage::TS; break; 843bf215546Sopenharmony_ci case MESA_SHADER_MESH: sw_stage = sw_stage | SWStage::MS; break; 844bf215546Sopenharmony_ci default: unreachable("Shader stage not implemented"); 845bf215546Sopenharmony_ci } 846bf215546Sopenharmony_ci } 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci if (is_ps_epilog) { 849bf215546Sopenharmony_ci assert(shader_count == 0 && !shaders); 850bf215546Sopenharmony_ci sw_stage = SWStage::FS; 851bf215546Sopenharmony_ci } 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci bool gfx9_plus = options->gfx_level >= GFX9; 854bf215546Sopenharmony_ci bool ngg = info->is_ngg && options->gfx_level >= GFX10; 855bf215546Sopenharmony_ci HWStage hw_stage{}; 856bf215546Sopenharmony_ci if (sw_stage == SWStage::VS && info->vs.as_es && !ngg) 857bf215546Sopenharmony_ci hw_stage = HWStage::ES; 858bf215546Sopenharmony_ci else if (sw_stage == SWStage::VS && !info->vs.as_ls && !ngg) 859bf215546Sopenharmony_ci hw_stage = HWStage::VS; 860bf215546Sopenharmony_ci else if (sw_stage == SWStage::VS && ngg) 861bf215546Sopenharmony_ci hw_stage = HWStage::NGG; /* GFX10/NGG: VS without GS uses the HW GS stage */ 862bf215546Sopenharmony_ci else if (sw_stage == SWStage::GS) 863bf215546Sopenharmony_ci hw_stage = HWStage::GS; 864bf215546Sopenharmony_ci else if (sw_stage == SWStage::FS) 865bf215546Sopenharmony_ci hw_stage = HWStage::FS; 866bf215546Sopenharmony_ci else if (sw_stage == SWStage::CS) 867bf215546Sopenharmony_ci hw_stage = HWStage::CS; 868bf215546Sopenharmony_ci else if (sw_stage == SWStage::GSCopy) 869bf215546Sopenharmony_ci hw_stage = HWStage::VS; 870bf215546Sopenharmony_ci else if (sw_stage == SWStage::TS) 871bf215546Sopenharmony_ci hw_stage = HWStage::CS; /* Task shaders are implemented with compute shaders. */ 872bf215546Sopenharmony_ci else if (sw_stage == SWStage::MS) 873bf215546Sopenharmony_ci hw_stage = HWStage::NGG; /* Mesh shaders only work on NGG and on GFX10.3+. */ 874bf215546Sopenharmony_ci else if (sw_stage == SWStage::VS_GS && gfx9_plus && !ngg) 875bf215546Sopenharmony_ci hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */ 876bf215546Sopenharmony_ci else if (sw_stage == SWStage::VS_GS && ngg) 877bf215546Sopenharmony_ci hw_stage = HWStage::NGG; /* GFX10+: VS+GS merged into an NGG GS */ 878bf215546Sopenharmony_ci else if (sw_stage == SWStage::VS && info->vs.as_ls) 879bf215546Sopenharmony_ci hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */ 880bf215546Sopenharmony_ci else if (sw_stage == SWStage::TCS) 881bf215546Sopenharmony_ci hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */ 882bf215546Sopenharmony_ci else if (sw_stage == SWStage::VS_TCS) 883bf215546Sopenharmony_ci hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */ 884bf215546Sopenharmony_ci else if (sw_stage == SWStage::TES && !info->tes.as_es && !ngg) 885bf215546Sopenharmony_ci hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */ 886bf215546Sopenharmony_ci else if (sw_stage == SWStage::TES && !info->tes.as_es && ngg) 887bf215546Sopenharmony_ci hw_stage = HWStage::NGG; /* GFX10/NGG: TES without GS */ 888bf215546Sopenharmony_ci else if (sw_stage == SWStage::TES && info->tes.as_es && !ngg) 889bf215546Sopenharmony_ci hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */ 890bf215546Sopenharmony_ci else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg) 891bf215546Sopenharmony_ci hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */ 892bf215546Sopenharmony_ci else if (sw_stage == SWStage::TES_GS && ngg) 893bf215546Sopenharmony_ci hw_stage = HWStage::NGG; /* GFX10+: TES+GS merged into an NGG GS */ 894bf215546Sopenharmony_ci else 895bf215546Sopenharmony_ci unreachable("Shader stage not implemented"); 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci init_program(program, Stage{hw_stage, sw_stage}, info, options->gfx_level, options->family, 898bf215546Sopenharmony_ci options->wgp_mode, config); 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci isel_context ctx = {}; 901bf215546Sopenharmony_ci ctx.program = program; 902bf215546Sopenharmony_ci ctx.args = args; 903bf215546Sopenharmony_ci ctx.options = options; 904bf215546Sopenharmony_ci ctx.stage = program->stage; 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci program->workgroup_size = program->info.workgroup_size; 907bf215546Sopenharmony_ci assert(program->workgroup_size); 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci /* Mesh shading only works on GFX10.3+. */ 910bf215546Sopenharmony_ci ASSERTED bool mesh_shading = ctx.stage.has(SWStage::TS) || ctx.stage.has(SWStage::MS); 911bf215546Sopenharmony_ci assert(!mesh_shading || ctx.program->gfx_level >= GFX10_3); 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_ci if (ctx.stage == tess_control_hs) 914bf215546Sopenharmony_ci setup_tcs_info(&ctx, shaders[0], NULL); 915bf215546Sopenharmony_ci else if (ctx.stage == vertex_tess_control_hs) 916bf215546Sopenharmony_ci setup_tcs_info(&ctx, shaders[1], shaders[0]); 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci calc_min_waves(program); 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci unsigned scratch_size = 0; 921bf215546Sopenharmony_ci if (program->stage == gs_copy_vs) { 922bf215546Sopenharmony_ci assert(shader_count == 1); 923bf215546Sopenharmony_ci setup_vs_output_info(&ctx, shaders[0], &program->info.vs.outinfo); 924bf215546Sopenharmony_ci } else { 925bf215546Sopenharmony_ci for (unsigned i = 0; i < shader_count; i++) { 926bf215546Sopenharmony_ci nir_shader* nir = shaders[i]; 927bf215546Sopenharmony_ci setup_nir(&ctx, nir); 928bf215546Sopenharmony_ci } 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_ci for (unsigned i = 0; i < shader_count; i++) 931bf215546Sopenharmony_ci scratch_size = std::max(scratch_size, shaders[i]->scratch_size); 932bf215546Sopenharmony_ci } 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024); 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci ctx.block = ctx.program->create_and_insert_block(); 937bf215546Sopenharmony_ci ctx.block->kind = block_kind_top_level; 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_ci return ctx; 940bf215546Sopenharmony_ci} 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci} // namespace aco 943