1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2021 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * All Rights Reserved. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "si_pipe.h" 26bf215546Sopenharmony_ci#include "util/mesa-sha1.h" 27bf215546Sopenharmony_ci#include "util/u_prim.h" 28bf215546Sopenharmony_ci#include "sid.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cistruct si_shader_profile { 32bf215546Sopenharmony_ci uint32_t sha1[SHA1_DIGEST_LENGTH32]; 33bf215546Sopenharmony_ci uint32_t options; 34bf215546Sopenharmony_ci}; 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_cistatic struct si_shader_profile profiles[] = 37bf215546Sopenharmony_ci{ 38bf215546Sopenharmony_ci { 39bf215546Sopenharmony_ci /* Plot3D */ 40bf215546Sopenharmony_ci {0x485320cd, 0x87a9ba05, 0x24a60e4f, 0x25aa19f7, 0xf5287451}, 41bf215546Sopenharmony_ci SI_PROFILE_VS_NO_BINNING, 42bf215546Sopenharmony_ci }, 43bf215546Sopenharmony_ci { 44bf215546Sopenharmony_ci /* Viewperf/Energy isn't affected by the discard bug. */ 45bf215546Sopenharmony_ci {0x17118671, 0xd0102e0c, 0x947f3592, 0xb2057e7b, 0x4da5d9b0}, 46bf215546Sopenharmony_ci SI_PROFILE_IGNORE_LLVM13_DISCARD_BUG, 47bf215546Sopenharmony_ci }, 48bf215546Sopenharmony_ci { 49bf215546Sopenharmony_ci /* Viewperf/Medical */ 50bf215546Sopenharmony_ci {0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0}, 51bf215546Sopenharmony_ci SI_PROFILE_PS_NO_BINNING, 52bf215546Sopenharmony_ci }, 53bf215546Sopenharmony_ci { 54bf215546Sopenharmony_ci /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32, 55bf215546Sopenharmony_ci * probably due to interpolation performance. 56bf215546Sopenharmony_ci */ 57bf215546Sopenharmony_ci {0x29f0f4a0, 0x0672258d, 0x47ccdcfd, 0x31e67dcc, 0xdcb1fda8}, 58bf215546Sopenharmony_ci SI_PROFILE_WAVE64, 59bf215546Sopenharmony_ci }, 60bf215546Sopenharmony_ci { 61bf215546Sopenharmony_ci /* Viewperf/Creo */ 62bf215546Sopenharmony_ci {0x1f288a73, 0xba46cce5, 0xbf68e6c6, 0x58543651, 0xca3c8efd}, 63bf215546Sopenharmony_ci SI_PROFILE_CLAMP_DIV_BY_ZERO, 64bf215546Sopenharmony_ci }, 65bf215546Sopenharmony_ci}; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_cistatic unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) 68bf215546Sopenharmony_ci{ 69bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_store_output) 70bf215546Sopenharmony_ci return 0; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin); 73bf215546Sopenharmony_ci unsigned location = nir_intrinsic_io_semantics(intrin).location; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci if (location == VARYING_SLOT_TESS_LEVEL_OUTER) 76bf215546Sopenharmony_ci return writemask << 4; 77bf215546Sopenharmony_ci else if (location == VARYING_SLOT_TESS_LEVEL_INNER) 78bf215546Sopenharmony_ci return writemask; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci return 0; 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask, 84bf215546Sopenharmony_ci unsigned *cond_block_tf_writemask, 85bf215546Sopenharmony_ci bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf) 86bf215546Sopenharmony_ci{ 87bf215546Sopenharmony_ci switch (cf_node->type) { 88bf215546Sopenharmony_ci case nir_cf_node_block: { 89bf215546Sopenharmony_ci nir_block *block = nir_cf_node_as_block(cf_node); 90bf215546Sopenharmony_ci nir_foreach_instr (instr, block) { 91bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 92bf215546Sopenharmony_ci continue; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 95bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_control_barrier) { 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci /* If we find a barrier in nested control flow put this in the 98bf215546Sopenharmony_ci * too hard basket. In GLSL this is not possible but it is in 99bf215546Sopenharmony_ci * SPIR-V. 100bf215546Sopenharmony_ci */ 101bf215546Sopenharmony_ci if (is_nested_cf) { 102bf215546Sopenharmony_ci *tessfactors_are_def_in_all_invocs = false; 103bf215546Sopenharmony_ci return; 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci /* The following case must be prevented: 107bf215546Sopenharmony_ci * gl_TessLevelInner = ...; 108bf215546Sopenharmony_ci * barrier(); 109bf215546Sopenharmony_ci * if (gl_InvocationID == 1) 110bf215546Sopenharmony_ci * gl_TessLevelInner = ...; 111bf215546Sopenharmony_ci * 112bf215546Sopenharmony_ci * If you consider disjoint code segments separated by barriers, each 113bf215546Sopenharmony_ci * such segment that writes tess factor channels should write the same 114bf215546Sopenharmony_ci * channels in all codepaths within that segment. 115bf215546Sopenharmony_ci */ 116bf215546Sopenharmony_ci if (*upper_block_tf_writemask || *cond_block_tf_writemask) { 117bf215546Sopenharmony_ci /* Accumulate the result: */ 118bf215546Sopenharmony_ci *tessfactors_are_def_in_all_invocs &= 119bf215546Sopenharmony_ci !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask)); 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci /* Analyze the next code segment from scratch. */ 122bf215546Sopenharmony_ci *upper_block_tf_writemask = 0; 123bf215546Sopenharmony_ci *cond_block_tf_writemask = 0; 124bf215546Sopenharmony_ci } 125bf215546Sopenharmony_ci } else 126bf215546Sopenharmony_ci *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin); 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci break; 130bf215546Sopenharmony_ci } 131bf215546Sopenharmony_ci case nir_cf_node_if: { 132bf215546Sopenharmony_ci unsigned then_tessfactor_writemask = 0; 133bf215546Sopenharmony_ci unsigned else_tessfactor_writemask = 0; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci nir_if *if_stmt = nir_cf_node_as_if(cf_node); 136bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) 137bf215546Sopenharmony_ci { 138bf215546Sopenharmony_ci scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask, 139bf215546Sopenharmony_ci tessfactors_are_def_in_all_invocs, true); 140bf215546Sopenharmony_ci } 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) 143bf215546Sopenharmony_ci { 144bf215546Sopenharmony_ci scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask, 145bf215546Sopenharmony_ci tessfactors_are_def_in_all_invocs, true); 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if (then_tessfactor_writemask || else_tessfactor_writemask) { 149bf215546Sopenharmony_ci /* If both statements write the same tess factor channels, 150bf215546Sopenharmony_ci * we can say that the upper block writes them too. 151bf215546Sopenharmony_ci */ 152bf215546Sopenharmony_ci *upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask; 153bf215546Sopenharmony_ci *cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask; 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci break; 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci case nir_cf_node_loop: { 159bf215546Sopenharmony_ci nir_loop *loop = nir_cf_node_as_loop(cf_node); 160bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) 161bf215546Sopenharmony_ci { 162bf215546Sopenharmony_ci scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask, 163bf215546Sopenharmony_ci tessfactors_are_def_in_all_invocs, true); 164bf215546Sopenharmony_ci } 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci break; 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci default: 169bf215546Sopenharmony_ci unreachable("unknown cf node type"); 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci} 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_cistatic bool are_tessfactors_def_in_all_invocs(const struct nir_shader *nir) 174bf215546Sopenharmony_ci{ 175bf215546Sopenharmony_ci assert(nir->info.stage == MESA_SHADER_TESS_CTRL); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci /* The pass works as follows: 178bf215546Sopenharmony_ci * If all codepaths write tess factors, we can say that all 179bf215546Sopenharmony_ci * invocations define tess factors. 180bf215546Sopenharmony_ci * 181bf215546Sopenharmony_ci * Each tess factor channel is tracked separately. 182bf215546Sopenharmony_ci */ 183bf215546Sopenharmony_ci unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */ 184bf215546Sopenharmony_ci unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */ 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci /* Initial value = true. Here the pass will accumulate results from 187bf215546Sopenharmony_ci * multiple segments surrounded by barriers. If tess factors aren't 188bf215546Sopenharmony_ci * written at all, it's a shader bug and we don't care if this will be 189bf215546Sopenharmony_ci * true. 190bf215546Sopenharmony_ci */ 191bf215546Sopenharmony_ci bool tessfactors_are_def_in_all_invocs = true; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci nir_foreach_function (function, nir) { 194bf215546Sopenharmony_ci if (function->impl) { 195bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, node, node, &function->impl->body) 196bf215546Sopenharmony_ci { 197bf215546Sopenharmony_ci scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask, 198bf215546Sopenharmony_ci &tessfactors_are_def_in_all_invocs, false); 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci } 201bf215546Sopenharmony_ci } 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci /* Accumulate the result for the last code segment separated by a 204bf215546Sopenharmony_ci * barrier. 205bf215546Sopenharmony_ci */ 206bf215546Sopenharmony_ci if (main_block_tf_writemask || cond_block_tf_writemask) { 207bf215546Sopenharmony_ci tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask); 208bf215546Sopenharmony_ci } 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci return tessfactors_are_def_in_all_invocs; 211bf215546Sopenharmony_ci} 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_cistatic const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type) 214bf215546Sopenharmony_ci{ 215bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->num_srcs; i++) { 216bf215546Sopenharmony_ci if (instr->src[i].src_type == type) 217bf215546Sopenharmony_ci return &instr->src[i].src; 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci return NULL; 220bf215546Sopenharmony_ci} 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_cistatic void scan_io_usage(const nir_shader *nir, struct si_shader_info *info, 223bf215546Sopenharmony_ci nir_intrinsic_instr *intr, bool is_input) 224bf215546Sopenharmony_ci{ 225bf215546Sopenharmony_ci unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */ 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_load_interpolated_input) { 228bf215546Sopenharmony_ci nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci if (baryc) { 231bf215546Sopenharmony_ci if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0) 232bf215546Sopenharmony_ci interp = nir_intrinsic_interp_mode(baryc); 233bf215546Sopenharmony_ci else 234bf215546Sopenharmony_ci unreachable("unknown barycentric intrinsic"); 235bf215546Sopenharmony_ci } else { 236bf215546Sopenharmony_ci unreachable("unknown barycentric expression"); 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci unsigned mask, bit_size; 241bf215546Sopenharmony_ci bool is_output_load; 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci if (nir_intrinsic_has_write_mask(intr)) { 244bf215546Sopenharmony_ci mask = nir_intrinsic_write_mask(intr); /* store */ 245bf215546Sopenharmony_ci bit_size = nir_src_bit_size(intr->src[0]); 246bf215546Sopenharmony_ci is_output_load = false; 247bf215546Sopenharmony_ci } else { 248bf215546Sopenharmony_ci mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */ 249bf215546Sopenharmony_ci bit_size = intr->dest.ssa.bit_size; 250bf215546Sopenharmony_ci is_output_load = !is_input; 251bf215546Sopenharmony_ci } 252bf215546Sopenharmony_ci assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered"); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs 255bf215546Sopenharmony_ci * where the mask is untyped. 256bf215546Sopenharmony_ci */ 257bf215546Sopenharmony_ci if (bit_size == 16 && !is_input) { 258bf215546Sopenharmony_ci unsigned new_mask = 0; 259bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 260bf215546Sopenharmony_ci if (mask & (1 << i)) 261bf215546Sopenharmony_ci new_mask |= 0x1 << (i / 2); 262bf215546Sopenharmony_ci } 263bf215546Sopenharmony_ci mask = new_mask; 264bf215546Sopenharmony_ci } 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci mask <<= nir_intrinsic_component(intr); 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci nir_src offset = *nir_get_io_offset_src(intr); 269bf215546Sopenharmony_ci bool indirect = !nir_src_is_const(offset); 270bf215546Sopenharmony_ci if (!indirect) 271bf215546Sopenharmony_ci assert(nir_src_as_uint(offset) == 0); 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci unsigned semantic = 0; 274bf215546Sopenharmony_ci /* VS doesn't have semantics. */ 275bf215546Sopenharmony_ci if (nir->info.stage != MESA_SHADER_VERTEX || !is_input) 276bf215546Sopenharmony_ci semantic = nir_intrinsic_io_semantics(intr).location; 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) { 279bf215546Sopenharmony_ci /* Never use FRAG_RESULT_COLOR directly. */ 280bf215546Sopenharmony_ci if (semantic == FRAG_RESULT_COLOR) 281bf215546Sopenharmony_ci semantic = FRAG_RESULT_DATA0; 282bf215546Sopenharmony_ci semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index; 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci unsigned driver_location = nir_intrinsic_base(intr); 286bf215546Sopenharmony_ci unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1; 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci if (is_input) { 289bf215546Sopenharmony_ci assert(driver_location + num_slots <= ARRAY_SIZE(info->input)); 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci for (unsigned i = 0; i < num_slots; i++) { 292bf215546Sopenharmony_ci unsigned loc = driver_location + i; 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci info->input[loc].semantic = semantic + i; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci if (semantic == VARYING_SLOT_PRIMITIVE_ID) 297bf215546Sopenharmony_ci info->input[loc].interpolate = INTERP_MODE_FLAT; 298bf215546Sopenharmony_ci else 299bf215546Sopenharmony_ci info->input[loc].interpolate = interp; 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci if (mask) { 302bf215546Sopenharmony_ci info->input[loc].usage_mask |= mask; 303bf215546Sopenharmony_ci if (bit_size == 16) { 304bf215546Sopenharmony_ci if (nir_intrinsic_io_semantics(intr).high_16bits) 305bf215546Sopenharmony_ci info->input[loc].fp16_lo_hi_valid |= 0x2; 306bf215546Sopenharmony_ci else 307bf215546Sopenharmony_ci info->input[loc].fp16_lo_hi_valid |= 0x1; 308bf215546Sopenharmony_ci } 309bf215546Sopenharmony_ci info->num_inputs = MAX2(info->num_inputs, loc + 1); 310bf215546Sopenharmony_ci } 311bf215546Sopenharmony_ci } 312bf215546Sopenharmony_ci } else { 313bf215546Sopenharmony_ci /* Outputs. */ 314bf215546Sopenharmony_ci assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask)); 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci for (unsigned i = 0; i < num_slots; i++) { 317bf215546Sopenharmony_ci unsigned loc = driver_location + i; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci info->output_semantic[loc] = semantic + i; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci if (is_output_load) { 322bf215546Sopenharmony_ci /* Output loads have only a few things that we need to track. */ 323bf215546Sopenharmony_ci info->output_readmask[loc] |= mask; 324bf215546Sopenharmony_ci } else if (mask) { 325bf215546Sopenharmony_ci /* Output stores. */ 326bf215546Sopenharmony_ci unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams << 327bf215546Sopenharmony_ci (nir_intrinsic_component(intr) * 2); 328bf215546Sopenharmony_ci unsigned new_mask = mask & ~info->output_usagemask[loc]; 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci /* Iterate over all components. */ 331bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 332bf215546Sopenharmony_ci unsigned stream = (gs_streams >> (i * 2)) & 0x3; 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci if (new_mask & (1 << i)) { 335bf215546Sopenharmony_ci info->output_streams[loc] |= stream << (i * 2); 336bf215546Sopenharmony_ci info->num_stream_output_components[stream]++; 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci if (nir_intrinsic_has_io_xfb(intr)) { 340bf215546Sopenharmony_ci nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) : 341bf215546Sopenharmony_ci nir_intrinsic_io_xfb2(intr); 342bf215546Sopenharmony_ci if (xfb.out[i % 2].num_components) { 343bf215546Sopenharmony_ci unsigned stream = (gs_streams >> (i * 2)) & 0x3; 344bf215546Sopenharmony_ci info->enabled_streamout_buffer_mask |= 345bf215546Sopenharmony_ci BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer); 346bf215546Sopenharmony_ci } 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci } 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci if (nir_intrinsic_has_src_type(intr)) 351bf215546Sopenharmony_ci info->output_type[loc] = nir_intrinsic_src_type(intr); 352bf215546Sopenharmony_ci else if (nir_intrinsic_has_dest_type(intr)) 353bf215546Sopenharmony_ci info->output_type[loc] = nir_intrinsic_dest_type(intr); 354bf215546Sopenharmony_ci else 355bf215546Sopenharmony_ci info->output_type[loc] = nir_type_float32; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci info->output_usagemask[loc] |= mask; 358bf215546Sopenharmony_ci info->num_outputs = MAX2(info->num_outputs, loc + 1); 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT && 361bf215546Sopenharmony_ci semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) { 362bf215546Sopenharmony_ci unsigned index = semantic - FRAG_RESULT_DATA0; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci if (nir_intrinsic_src_type(intr) == nir_type_float16) 365bf215546Sopenharmony_ci info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2); 366bf215546Sopenharmony_ci else if (nir_intrinsic_src_type(intr) == nir_type_int16) 367bf215546Sopenharmony_ci info->output_color_types |= SI_TYPE_INT16 << (index * 2); 368bf215546Sopenharmony_ci else if (nir_intrinsic_src_type(intr) == nir_type_uint16) 369bf215546Sopenharmony_ci info->output_color_types |= SI_TYPE_UINT16 << (index * 2); 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci } 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci} 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_cistatic bool is_bindless_handle_indirect(nir_instr *src) 377bf215546Sopenharmony_ci{ 378bf215546Sopenharmony_ci /* Check if the bindless handle comes from indirect load_ubo. */ 379bf215546Sopenharmony_ci if (src->type == nir_instr_type_intrinsic && 380bf215546Sopenharmony_ci nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) { 381bf215546Sopenharmony_ci if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0])) 382bf215546Sopenharmony_ci return true; 383bf215546Sopenharmony_ci } else { 384bf215546Sopenharmony_ci /* Some other instruction. Return the worst-case result. */ 385bf215546Sopenharmony_ci return true; 386bf215546Sopenharmony_ci } 387bf215546Sopenharmony_ci return false; 388bf215546Sopenharmony_ci} 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci/* TODO: convert to nir_shader_instructions_pass */ 391bf215546Sopenharmony_cistatic void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info, 392bf215546Sopenharmony_ci nir_instr *instr) 393bf215546Sopenharmony_ci{ 394bf215546Sopenharmony_ci if (instr->type == nir_instr_type_tex) { 395bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(instr); 396bf215546Sopenharmony_ci const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci /* Gather the types of used VMEM instructions that return something. */ 399bf215546Sopenharmony_ci switch (tex->op) { 400bf215546Sopenharmony_ci case nir_texop_tex: 401bf215546Sopenharmony_ci case nir_texop_txb: 402bf215546Sopenharmony_ci case nir_texop_txl: 403bf215546Sopenharmony_ci case nir_texop_txd: 404bf215546Sopenharmony_ci case nir_texop_lod: 405bf215546Sopenharmony_ci case nir_texop_tg4: 406bf215546Sopenharmony_ci info->uses_vmem_sampler_or_bvh = true; 407bf215546Sopenharmony_ci break; 408bf215546Sopenharmony_ci default: 409bf215546Sopenharmony_ci info->uses_vmem_load_other = true; 410bf215546Sopenharmony_ci break; 411bf215546Sopenharmony_ci } 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci if (handle) { 414bf215546Sopenharmony_ci info->uses_bindless_samplers = true; 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci if (is_bindless_handle_indirect(handle->ssa->parent_instr)) 417bf215546Sopenharmony_ci info->uses_indirect_descriptor = true; 418bf215546Sopenharmony_ci } else { 419bf215546Sopenharmony_ci const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref); 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref))) 422bf215546Sopenharmony_ci info->uses_indirect_descriptor = true; 423bf215546Sopenharmony_ci } 424bf215546Sopenharmony_ci } else if (instr->type == nir_instr_type_intrinsic) { 425bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 426bf215546Sopenharmony_ci const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name; 427bf215546Sopenharmony_ci bool is_ssbo = strstr(intr_name, "ssbo"); 428bf215546Sopenharmony_ci bool is_image = strstr(intr_name, "image") == intr_name; 429bf215546Sopenharmony_ci bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name; 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci /* Gather the types of used VMEM instructions that return something. */ 432bf215546Sopenharmony_ci if (nir_intrinsic_infos[intr->intrinsic].has_dest) { 433bf215546Sopenharmony_ci switch (intr->intrinsic) { 434bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 435bf215546Sopenharmony_ci if (!nir_src_is_const(intr->src[1])) 436bf215546Sopenharmony_ci info->uses_vmem_load_other = true; 437bf215546Sopenharmony_ci break; 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_ci case nir_intrinsic_load_input: 440bf215546Sopenharmony_ci case nir_intrinsic_load_input_vertex: 441bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: 442bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX || 443bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_TESS_EVAL) 444bf215546Sopenharmony_ci info->uses_vmem_load_other = true; 445bf215546Sopenharmony_ci break; 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci case nir_intrinsic_load_constant: 448bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */ 449bf215546Sopenharmony_ci case nir_intrinsic_load_buffer_amd: 450bf215546Sopenharmony_ci info->uses_vmem_load_other = true; 451bf215546Sopenharmony_ci break; 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci default: 454bf215546Sopenharmony_ci if (is_image || 455bf215546Sopenharmony_ci is_bindless_image || 456bf215546Sopenharmony_ci is_ssbo || 457bf215546Sopenharmony_ci (strstr(intr_name, "global") == intr_name || 458bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_load_global || 459bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_store_global) || 460bf215546Sopenharmony_ci strstr(intr_name, "scratch")) 461bf215546Sopenharmony_ci info->uses_vmem_load_other = true; 462bf215546Sopenharmony_ci break; 463bf215546Sopenharmony_ci } 464bf215546Sopenharmony_ci } 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci if (is_bindless_image) 467bf215546Sopenharmony_ci info->uses_bindless_images = true; 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci if (nir_intrinsic_writes_external_memory(intr)) 470bf215546Sopenharmony_ci info->num_memory_stores++; 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0]))) 473bf215546Sopenharmony_ci info->uses_indirect_descriptor = true; 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr)) 476bf215546Sopenharmony_ci info->uses_indirect_descriptor = true; 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo && 479bf215546Sopenharmony_ci !nir_src_is_const(intr->src[0])) 480bf215546Sopenharmony_ci info->uses_indirect_descriptor = true; 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci switch (intr->intrinsic) { 483bf215546Sopenharmony_ci case nir_intrinsic_store_ssbo: 484bf215546Sopenharmony_ci if (!nir_src_is_const(intr->src[1])) 485bf215546Sopenharmony_ci info->uses_indirect_descriptor = true; 486bf215546Sopenharmony_ci break; 487bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 488bf215546Sopenharmony_ci if (!nir_src_is_const(intr->src[0])) 489bf215546Sopenharmony_ci info->uses_indirect_descriptor = true; 490bf215546Sopenharmony_ci break; 491bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_id: 492bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_id: { 493bf215546Sopenharmony_ci unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa); 494bf215546Sopenharmony_ci while (mask) { 495bf215546Sopenharmony_ci unsigned i = u_bit_scan(&mask); 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_load_workgroup_id) 498bf215546Sopenharmony_ci info->uses_block_id[i] = true; 499bf215546Sopenharmony_ci else 500bf215546Sopenharmony_ci info->uses_thread_id[i] = true; 501bf215546Sopenharmony_ci } 502bf215546Sopenharmony_ci break; 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci case nir_intrinsic_load_color0: 505bf215546Sopenharmony_ci case nir_intrinsic_load_color1: { 506bf215546Sopenharmony_ci unsigned index = intr->intrinsic == nir_intrinsic_load_color1; 507bf215546Sopenharmony_ci uint8_t mask = nir_ssa_def_components_read(&intr->dest.ssa); 508bf215546Sopenharmony_ci info->colors_read |= mask << (index * 4); 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci switch (info->color_interpolate[index]) { 511bf215546Sopenharmony_ci case INTERP_MODE_SMOOTH: 512bf215546Sopenharmony_ci if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) 513bf215546Sopenharmony_ci info->uses_persp_sample = true; 514bf215546Sopenharmony_ci else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) 515bf215546Sopenharmony_ci info->uses_persp_centroid = true; 516bf215546Sopenharmony_ci else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) 517bf215546Sopenharmony_ci info->uses_persp_center = true; 518bf215546Sopenharmony_ci break; 519bf215546Sopenharmony_ci case INTERP_MODE_NOPERSPECTIVE: 520bf215546Sopenharmony_ci if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) 521bf215546Sopenharmony_ci info->uses_linear_sample = true; 522bf215546Sopenharmony_ci else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) 523bf215546Sopenharmony_ci info->uses_linear_centroid = true; 524bf215546Sopenharmony_ci else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) 525bf215546Sopenharmony_ci info->uses_linear_center = true; 526bf215546Sopenharmony_ci break; 527bf215546Sopenharmony_ci case INTERP_MODE_COLOR: 528bf215546Sopenharmony_ci /* We don't know the final value. This will be FLAT if flatshading is enabled 529bf215546Sopenharmony_ci * in the rasterizer state, otherwise it will be SMOOTH. 530bf215546Sopenharmony_ci */ 531bf215546Sopenharmony_ci info->uses_interp_color = true; 532bf215546Sopenharmony_ci if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) 533bf215546Sopenharmony_ci info->uses_persp_sample_color = true; 534bf215546Sopenharmony_ci else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) 535bf215546Sopenharmony_ci info->uses_persp_centroid_color = true; 536bf215546Sopenharmony_ci else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) 537bf215546Sopenharmony_ci info->uses_persp_center_color = true; 538bf215546Sopenharmony_ci break; 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci break; 541bf215546Sopenharmony_ci } 542bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_offset: /* uses center */ 543bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_sample: /* uses center */ 544bf215546Sopenharmony_ci if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT) 545bf215546Sopenharmony_ci break; 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) { 548bf215546Sopenharmony_ci info->uses_linear_center = true; 549bf215546Sopenharmony_ci } else { 550bf215546Sopenharmony_ci info->uses_persp_center = true; 551bf215546Sopenharmony_ci } 552bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) 553bf215546Sopenharmony_ci info->uses_interp_at_sample = true; 554bf215546Sopenharmony_ci break; 555bf215546Sopenharmony_ci case nir_intrinsic_load_input: 556bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: 557bf215546Sopenharmony_ci case nir_intrinsic_load_input_vertex: 558bf215546Sopenharmony_ci case nir_intrinsic_load_interpolated_input: 559bf215546Sopenharmony_ci scan_io_usage(nir, info, intr, true); 560bf215546Sopenharmony_ci break; 561bf215546Sopenharmony_ci case nir_intrinsic_load_output: 562bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_output: 563bf215546Sopenharmony_ci case nir_intrinsic_store_output: 564bf215546Sopenharmony_ci case nir_intrinsic_store_per_vertex_output: 565bf215546Sopenharmony_ci scan_io_usage(nir, info, intr, false); 566bf215546Sopenharmony_ci break; 567bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 568bf215546Sopenharmony_ci case nir_intrinsic_store_deref: 569bf215546Sopenharmony_ci /* These can only occur if there is indirect temp indexing. */ 570bf215546Sopenharmony_ci break; 571bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_centroid: 572bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_sample: 573bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_offset: 574bf215546Sopenharmony_ci unreachable("these opcodes should have been lowered"); 575bf215546Sopenharmony_ci break; 576bf215546Sopenharmony_ci default: 577bf215546Sopenharmony_ci break; 578bf215546Sopenharmony_ci } 579bf215546Sopenharmony_ci } 580bf215546Sopenharmony_ci} 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_civoid si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, 583bf215546Sopenharmony_ci struct si_shader_info *info) 584bf215546Sopenharmony_ci{ 585bf215546Sopenharmony_ci memset(info, 0, sizeof(*info)); 586bf215546Sopenharmony_ci info->base = nir->info; 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci /* Get options from shader profiles. */ 589bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(profiles); i++) { 590bf215546Sopenharmony_ci if (_mesa_printed_sha1_equal(info->base.source_sha1, profiles[i].sha1)) { 591bf215546Sopenharmony_ci info->options = profiles[i].options; 592bf215546Sopenharmony_ci break; 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci } 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT) { 597bf215546Sopenharmony_ci /* post_depth_coverage implies early_fragment_tests */ 598bf215546Sopenharmony_ci info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci info->color_interpolate[0] = nir->info.fs.color0_interp; 601bf215546Sopenharmony_ci info->color_interpolate[1] = nir->info.fs.color1_interp; 602bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 603bf215546Sopenharmony_ci if (info->color_interpolate[i] == INTERP_MODE_NONE) 604bf215546Sopenharmony_ci info->color_interpolate[i] = INTERP_MODE_COLOR; 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : 608bf215546Sopenharmony_ci nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : 609bf215546Sopenharmony_ci TGSI_INTERPOLATE_LOC_CENTER; 610bf215546Sopenharmony_ci info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : 611bf215546Sopenharmony_ci nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : 612bf215546Sopenharmony_ci TGSI_INTERPOLATE_LOC_CENTER; 613bf215546Sopenharmony_ci /* Set an invalid value. Will be determined at draw time if needed when the expected 614bf215546Sopenharmony_ci * conditions are met. 615bf215546Sopenharmony_ci */ 616bf215546Sopenharmony_ci info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff; 617bf215546Sopenharmony_ci } 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci info->constbuf0_num_slots = nir->num_uniforms; 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_TESS_CTRL) { 622bf215546Sopenharmony_ci info->tessfactors_are_def_in_all_invocs = are_tessfactors_def_in_all_invocs(nir); 623bf215546Sopenharmony_ci } 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci /* tess factors are loaded as input instead of system value */ 626bf215546Sopenharmony_ci info->reads_tess_factors = nir->info.patch_inputs_read & 627bf215546Sopenharmony_ci (BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) | 628bf215546Sopenharmony_ci BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER)); 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); 631bf215546Sopenharmony_ci info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); 632bf215546Sopenharmony_ci info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX); 633bf215546Sopenharmony_ci info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); 634bf215546Sopenharmony_ci info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID); 635bf215546Sopenharmony_ci info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS); 636bf215546Sopenharmony_ci info->uses_subgroup_info = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) || 637bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) || 638bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS); 639bf215546Sopenharmony_ci info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE); 640bf215546Sopenharmony_ci info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); 641bf215546Sopenharmony_ci info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || 642bf215546Sopenharmony_ci nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID; 643bf215546Sopenharmony_ci info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); 644bf215546Sopenharmony_ci info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE); 645bf215546Sopenharmony_ci info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); 646bf215546Sopenharmony_ci info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL); 647bf215546Sopenharmony_ci info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE); 648bf215546Sopenharmony_ci info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID); 649bf215546Sopenharmony_ci info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT) { 652bf215546Sopenharmony_ci info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH); 653bf215546Sopenharmony_ci info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); 654bf215546Sopenharmony_ci info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0; 657bf215546Sopenharmony_ci if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { 658bf215546Sopenharmony_ci info->color0_writes_all_cbufs = true; 659bf215546Sopenharmony_ci info->colors_written |= 0x1; 660bf215546Sopenharmony_ci } 661bf215546Sopenharmony_ci if (nir->info.fs.color_is_dual_source) 662bf215546Sopenharmony_ci info->colors_written |= 0x2; 663bf215546Sopenharmony_ci } else { 664bf215546Sopenharmony_ci info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID; 665bf215546Sopenharmony_ci info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT; 666bf215546Sopenharmony_ci info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER; 667bf215546Sopenharmony_ci info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ; 668bf215546Sopenharmony_ci info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX; 669bf215546Sopenharmony_ci info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE; 670bf215546Sopenharmony_ci info->writes_position = nir->info.outputs_written & VARYING_BIT_POS; 671bf215546Sopenharmony_ci } 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader*)nir); 674bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 675bf215546Sopenharmony_ci nir_foreach_instr (instr, block) 676bf215546Sopenharmony_ci scan_instruction(nir, info, instr); 677bf215546Sopenharmony_ci } 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { 680bf215546Sopenharmony_ci /* Add the PrimitiveID output, but don't increment num_outputs. 681bf215546Sopenharmony_ci * The driver inserts PrimitiveID only when it's used by the pixel shader, 682bf215546Sopenharmony_ci * and si_emit_spi_map uses this unconditionally when such a pixel shader is used. 683bf215546Sopenharmony_ci */ 684bf215546Sopenharmony_ci info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID; 685bf215546Sopenharmony_ci info->output_type[info->num_outputs] = nir_type_uint32; 686bf215546Sopenharmony_ci info->output_usagemask[info->num_outputs] = 0x1; 687bf215546Sopenharmony_ci } 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT) { 690bf215546Sopenharmony_ci info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid || 691bf215546Sopenharmony_ci info->uses_persp_sample || info->uses_linear_center || 692bf215546Sopenharmony_ci info->uses_linear_centroid || info->uses_linear_sample || 693bf215546Sopenharmony_ci info->uses_interp_at_sample || nir->info.writes_memory || 694bf215546Sopenharmony_ci nir->info.fs.uses_fbfetch_output || 695bf215546Sopenharmony_ci nir->info.fs.needs_quad_helper_invocations || 696bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || 697bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) || 698bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || 699bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) || 700bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) || 701bf215546Sopenharmony_ci BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION)); 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci info->uses_vmem_load_other |= info->base.fs.uses_fbfetch_output; 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci /* Add both front and back color inputs. */ 706bf215546Sopenharmony_ci unsigned num_inputs_with_colors = info->num_inputs; 707bf215546Sopenharmony_ci for (unsigned back = 0; back < 2; back++) { 708bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 709bf215546Sopenharmony_ci if ((info->colors_read >> (i * 4)) & 0xf) { 710bf215546Sopenharmony_ci unsigned index = num_inputs_with_colors; 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_ci info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i; 713bf215546Sopenharmony_ci info->input[index].interpolate = info->color_interpolate[i]; 714bf215546Sopenharmony_ci info->input[index].usage_mask = info->colors_read >> (i * 4); 715bf215546Sopenharmony_ci num_inputs_with_colors++; 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci /* Back-face color don't increment num_inputs. si_emit_spi_map will use 718bf215546Sopenharmony_ci * back-face colors conditionally only when they are needed. 719bf215546Sopenharmony_ci */ 720bf215546Sopenharmony_ci if (!back) 721bf215546Sopenharmony_ci info->num_inputs = num_inputs_with_colors; 722bf215546Sopenharmony_ci } 723bf215546Sopenharmony_ci } 724bf215546Sopenharmony_ci } 725bf215546Sopenharmony_ci } 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_ci info->uses_vmem_load_other |= info->uses_indirect_descriptor; 728bf215546Sopenharmony_ci 729bf215546Sopenharmony_ci /* Trim output read masks based on write masks. */ 730bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_outputs; i++) 731bf215546Sopenharmony_ci info->output_readmask[i] &= info->output_usagemask[i]; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir); 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX || 736bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_TESS_CTRL || 737bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_TESS_EVAL || 738bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_GEOMETRY) { 739bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_TESS_CTRL) { 740bf215546Sopenharmony_ci /* Always reserve space for these. */ 741bf215546Sopenharmony_ci info->patch_outputs_written |= 742bf215546Sopenharmony_ci (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) | 743bf215546Sopenharmony_ci (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER)); 744bf215546Sopenharmony_ci } 745bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_outputs; i++) { 746bf215546Sopenharmony_ci unsigned semantic = info->output_semantic[i]; 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci if (semantic == VARYING_SLOT_TESS_LEVEL_INNER || 749bf215546Sopenharmony_ci semantic == VARYING_SLOT_TESS_LEVEL_OUTER || 750bf215546Sopenharmony_ci (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) { 751bf215546Sopenharmony_ci info->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic); 752bf215546Sopenharmony_ci } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) && 753bf215546Sopenharmony_ci semantic != VARYING_SLOT_EDGE) { 754bf215546Sopenharmony_ci info->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false); 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci /* Ignore outputs that are not passed from VS to PS. */ 757bf215546Sopenharmony_ci if (semantic != VARYING_SLOT_POS && 758bf215546Sopenharmony_ci semantic != VARYING_SLOT_PSIZ && 759bf215546Sopenharmony_ci semantic != VARYING_SLOT_CLIP_VERTEX) { 760bf215546Sopenharmony_ci info->outputs_written_before_ps |= 1ull 761bf215546Sopenharmony_ci << si_shader_io_get_unique_index(semantic, true); 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci } 765bf215546Sopenharmony_ci } 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX) { 768bf215546Sopenharmony_ci info->num_vs_inputs = 769bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_VERTEX && !info->base.vs.blit_sgprs_amd ? info->num_inputs : 0; 770bf215546Sopenharmony_ci unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.gfx_level); 771bf215546Sopenharmony_ci info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs); 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci /* The prolog is a no-op if there are no inputs. */ 774bf215546Sopenharmony_ci info->vs_needs_prolog = info->num_inputs && !info->base.vs.blit_sgprs_amd; 775bf215546Sopenharmony_ci } 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX || 778bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_TESS_CTRL || 779bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_TESS_EVAL) { 780bf215546Sopenharmony_ci info->esgs_itemsize = util_last_bit64(info->outputs_written) * 16; 781bf215546Sopenharmony_ci info->lshs_vertex_stride = info->esgs_itemsize; 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci /* Add 1 dword to reduce LDS bank conflicts, so that each vertex 784bf215546Sopenharmony_ci * will start on a different bank. (except for the maximum 32*16). 785bf215546Sopenharmony_ci */ 786bf215546Sopenharmony_ci if (info->lshs_vertex_stride < 32 * 16) 787bf215546Sopenharmony_ci info->lshs_vertex_stride += 4; 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank 790bf215546Sopenharmony_ci * conflicts, i.e. each vertex will start at a different bank. 791bf215546Sopenharmony_ci */ 792bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX9) 793bf215546Sopenharmony_ci info->esgs_itemsize += 4; 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci assert(((info->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0); 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_ci info->tcs_vgpr_only_inputs = ~info->base.tess.tcs_cross_invocation_inputs_read & 798bf215546Sopenharmony_ci ~info->base.inputs_read_indirectly & 799bf215546Sopenharmony_ci info->base.inputs_read; 800bf215546Sopenharmony_ci } 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_GEOMETRY) { 803bf215546Sopenharmony_ci info->gsvs_vertex_size = info->num_outputs * 16; 804bf215546Sopenharmony_ci info->max_gsvs_emit_size = info->gsvs_vertex_size * info->base.gs.vertices_out; 805bf215546Sopenharmony_ci info->gs_input_verts_per_prim = 806bf215546Sopenharmony_ci u_vertices_per_prim((enum pipe_prim_type)info->base.gs.input_primitive); 807bf215546Sopenharmony_ci } 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK : 810bf215546Sopenharmony_ci u_bit_consecutive(0, info->base.clip_distance_array_size); 811bf215546Sopenharmony_ci info->culldist_mask = u_bit_consecutive(0, info->base.cull_distance_array_size) << 812bf215546Sopenharmony_ci info->base.clip_distance_array_size; 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT) { 815bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_inputs; i++) { 816bf215546Sopenharmony_ci unsigned semantic = info->input[i].semantic; 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) && 819bf215546Sopenharmony_ci semantic != VARYING_SLOT_PNTC) { 820bf215546Sopenharmony_ci info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true); 821bf215546Sopenharmony_ci } 822bf215546Sopenharmony_ci } 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci for (unsigned i = 0; i < 8; i++) 825bf215546Sopenharmony_ci if (info->colors_written & (1 << i)) 826bf215546Sopenharmony_ci info->colors_written_4bit |= 0xf << (4 * i); 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_inputs; i++) { 829bf215546Sopenharmony_ci if (info->input[i].semantic == VARYING_SLOT_COL0) 830bf215546Sopenharmony_ci info->color_attr_index[0] = i; 831bf215546Sopenharmony_ci else if (info->input[i].semantic == VARYING_SLOT_COL1) 832bf215546Sopenharmony_ci info->color_attr_index[1] = i; 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci } 835bf215546Sopenharmony_ci} 836