1/* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_pipe.h" 26#include "util/mesa-sha1.h" 27#include "util/u_prim.h" 28#include "sid.h" 29 30 31struct si_shader_profile { 32 uint32_t sha1[SHA1_DIGEST_LENGTH32]; 33 uint32_t options; 34}; 35 36static struct si_shader_profile profiles[] = 37{ 38 { 39 /* Plot3D */ 40 {0x485320cd, 0x87a9ba05, 0x24a60e4f, 0x25aa19f7, 0xf5287451}, 41 SI_PROFILE_VS_NO_BINNING, 42 }, 43 { 44 /* Viewperf/Energy isn't affected by the discard bug. */ 45 {0x17118671, 0xd0102e0c, 0x947f3592, 0xb2057e7b, 0x4da5d9b0}, 46 SI_PROFILE_IGNORE_LLVM13_DISCARD_BUG, 47 }, 48 { 49 /* Viewperf/Medical */ 50 {0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0}, 51 SI_PROFILE_PS_NO_BINNING, 52 }, 53 { 54 /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32, 55 * probably due to interpolation performance. 56 */ 57 {0x29f0f4a0, 0x0672258d, 0x47ccdcfd, 0x31e67dcc, 0xdcb1fda8}, 58 SI_PROFILE_WAVE64, 59 }, 60 { 61 /* Viewperf/Creo */ 62 {0x1f288a73, 0xba46cce5, 0xbf68e6c6, 0x58543651, 0xca3c8efd}, 63 SI_PROFILE_CLAMP_DIV_BY_ZERO, 64 }, 65}; 66 67static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) 68{ 69 if (intrin->intrinsic != nir_intrinsic_store_output) 70 return 0; 71 72 unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin); 73 unsigned location = nir_intrinsic_io_semantics(intrin).location; 74 75 if (location == VARYING_SLOT_TESS_LEVEL_OUTER) 76 return writemask << 4; 77 else if (location == VARYING_SLOT_TESS_LEVEL_INNER) 78 return writemask; 79 80 return 0; 81} 82 83static void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask, 84 unsigned *cond_block_tf_writemask, 85 bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf) 86{ 87 switch (cf_node->type) { 88 case nir_cf_node_block: { 89 nir_block *block = nir_cf_node_as_block(cf_node); 90 nir_foreach_instr (instr, block) { 91 if (instr->type != nir_instr_type_intrinsic) 92 continue; 93 94 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 95 if (intrin->intrinsic == nir_intrinsic_control_barrier) { 96 97 /* If we find a barrier in nested control flow put this in the 98 * too hard basket. In GLSL this is not possible but it is in 99 * SPIR-V. 100 */ 101 if (is_nested_cf) { 102 *tessfactors_are_def_in_all_invocs = false; 103 return; 104 } 105 106 /* The following case must be prevented: 107 * gl_TessLevelInner = ...; 108 * barrier(); 109 * if (gl_InvocationID == 1) 110 * gl_TessLevelInner = ...; 111 * 112 * If you consider disjoint code segments separated by barriers, each 113 * such segment that writes tess factor channels should write the same 114 * channels in all codepaths within that segment. 115 */ 116 if (*upper_block_tf_writemask || *cond_block_tf_writemask) { 117 /* Accumulate the result: */ 118 *tessfactors_are_def_in_all_invocs &= 119 !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask)); 120 121 /* Analyze the next code segment from scratch. */ 122 *upper_block_tf_writemask = 0; 123 *cond_block_tf_writemask = 0; 124 } 125 } else 126 *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin); 127 } 128 129 break; 130 } 131 case nir_cf_node_if: { 132 unsigned then_tessfactor_writemask = 0; 133 unsigned else_tessfactor_writemask = 0; 134 135 nir_if *if_stmt = nir_cf_node_as_if(cf_node); 136 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) 137 { 138 scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask, 139 tessfactors_are_def_in_all_invocs, true); 140 } 141 142 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) 143 { 144 scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask, 145 tessfactors_are_def_in_all_invocs, true); 146 } 147 148 if (then_tessfactor_writemask || else_tessfactor_writemask) { 149 /* If both statements write the same tess factor channels, 150 * we can say that the upper block writes them too. 151 */ 152 *upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask; 153 *cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask; 154 } 155 156 break; 157 } 158 case nir_cf_node_loop: { 159 nir_loop *loop = nir_cf_node_as_loop(cf_node); 160 foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) 161 { 162 scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask, 163 tessfactors_are_def_in_all_invocs, true); 164 } 165 166 break; 167 } 168 default: 169 unreachable("unknown cf node type"); 170 } 171} 172 173static bool are_tessfactors_def_in_all_invocs(const struct nir_shader *nir) 174{ 175 assert(nir->info.stage == MESA_SHADER_TESS_CTRL); 176 177 /* The pass works as follows: 178 * If all codepaths write tess factors, we can say that all 179 * invocations define tess factors. 180 * 181 * Each tess factor channel is tracked separately. 182 */ 183 unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */ 184 unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */ 185 186 /* Initial value = true. Here the pass will accumulate results from 187 * multiple segments surrounded by barriers. If tess factors aren't 188 * written at all, it's a shader bug and we don't care if this will be 189 * true. 190 */ 191 bool tessfactors_are_def_in_all_invocs = true; 192 193 nir_foreach_function (function, nir) { 194 if (function->impl) { 195 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) 196 { 197 scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask, 198 &tessfactors_are_def_in_all_invocs, false); 199 } 200 } 201 } 202 203 /* Accumulate the result for the last code segment separated by a 204 * barrier. 205 */ 206 if (main_block_tf_writemask || cond_block_tf_writemask) { 207 tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask); 208 } 209 210 return tessfactors_are_def_in_all_invocs; 211} 212 213static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type) 214{ 215 for (unsigned i = 0; i < instr->num_srcs; i++) { 216 if (instr->src[i].src_type == type) 217 return &instr->src[i].src; 218 } 219 return NULL; 220} 221 222static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info, 223 nir_intrinsic_instr *intr, bool is_input) 224{ 225 unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */ 226 227 if (intr->intrinsic == nir_intrinsic_load_interpolated_input) { 228 nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr); 229 230 if (baryc) { 231 if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0) 232 interp = nir_intrinsic_interp_mode(baryc); 233 else 234 unreachable("unknown barycentric intrinsic"); 235 } else { 236 unreachable("unknown barycentric expression"); 237 } 238 } 239 240 unsigned mask, bit_size; 241 bool is_output_load; 242 243 if (nir_intrinsic_has_write_mask(intr)) { 244 mask = nir_intrinsic_write_mask(intr); /* store */ 245 bit_size = nir_src_bit_size(intr->src[0]); 246 is_output_load = false; 247 } else { 248 mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */ 249 bit_size = intr->dest.ssa.bit_size; 250 is_output_load = !is_input; 251 } 252 assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered"); 253 254 /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs 255 * where the mask is untyped. 256 */ 257 if (bit_size == 16 && !is_input) { 258 unsigned new_mask = 0; 259 for (unsigned i = 0; i < 4; i++) { 260 if (mask & (1 << i)) 261 new_mask |= 0x1 << (i / 2); 262 } 263 mask = new_mask; 264 } 265 266 mask <<= nir_intrinsic_component(intr); 267 268 nir_src offset = *nir_get_io_offset_src(intr); 269 bool indirect = !nir_src_is_const(offset); 270 if (!indirect) 271 assert(nir_src_as_uint(offset) == 0); 272 273 unsigned semantic = 0; 274 /* VS doesn't have semantics. */ 275 if (nir->info.stage != MESA_SHADER_VERTEX || !is_input) 276 semantic = nir_intrinsic_io_semantics(intr).location; 277 278 if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) { 279 /* Never use FRAG_RESULT_COLOR directly. */ 280 if (semantic == FRAG_RESULT_COLOR) 281 semantic = FRAG_RESULT_DATA0; 282 semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index; 283 } 284 285 unsigned driver_location = nir_intrinsic_base(intr); 286 unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1; 287 288 if (is_input) { 289 assert(driver_location + num_slots <= ARRAY_SIZE(info->input)); 290 291 for (unsigned i = 0; i < num_slots; i++) { 292 unsigned loc = driver_location + i; 293 294 info->input[loc].semantic = semantic + i; 295 296 if (semantic == VARYING_SLOT_PRIMITIVE_ID) 297 info->input[loc].interpolate = INTERP_MODE_FLAT; 298 else 299 info->input[loc].interpolate = interp; 300 301 if (mask) { 302 info->input[loc].usage_mask |= mask; 303 if (bit_size == 16) { 304 if (nir_intrinsic_io_semantics(intr).high_16bits) 305 info->input[loc].fp16_lo_hi_valid |= 0x2; 306 else 307 info->input[loc].fp16_lo_hi_valid |= 0x1; 308 } 309 info->num_inputs = MAX2(info->num_inputs, loc + 1); 310 } 311 } 312 } else { 313 /* Outputs. */ 314 assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask)); 315 316 for (unsigned i = 0; i < num_slots; i++) { 317 unsigned loc = driver_location + i; 318 319 info->output_semantic[loc] = semantic + i; 320 321 if (is_output_load) { 322 /* Output loads have only a few things that we need to track. */ 323 info->output_readmask[loc] |= mask; 324 } else if (mask) { 325 /* Output stores. */ 326 unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams << 327 (nir_intrinsic_component(intr) * 2); 328 unsigned new_mask = mask & ~info->output_usagemask[loc]; 329 330 /* Iterate over all components. */ 331 for (unsigned i = 0; i < 4; i++) { 332 unsigned stream = (gs_streams >> (i * 2)) & 0x3; 333 334 if (new_mask & (1 << i)) { 335 info->output_streams[loc] |= stream << (i * 2); 336 info->num_stream_output_components[stream]++; 337 } 338 339 if (nir_intrinsic_has_io_xfb(intr)) { 340 nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) : 341 nir_intrinsic_io_xfb2(intr); 342 if (xfb.out[i % 2].num_components) { 343 unsigned stream = (gs_streams >> (i * 2)) & 0x3; 344 info->enabled_streamout_buffer_mask |= 345 BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer); 346 } 347 } 348 } 349 350 if (nir_intrinsic_has_src_type(intr)) 351 info->output_type[loc] = nir_intrinsic_src_type(intr); 352 else if (nir_intrinsic_has_dest_type(intr)) 353 info->output_type[loc] = nir_intrinsic_dest_type(intr); 354 else 355 info->output_type[loc] = nir_type_float32; 356 357 info->output_usagemask[loc] |= mask; 358 info->num_outputs = MAX2(info->num_outputs, loc + 1); 359 360 if (nir->info.stage == MESA_SHADER_FRAGMENT && 361 semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) { 362 unsigned index = semantic - FRAG_RESULT_DATA0; 363 364 if (nir_intrinsic_src_type(intr) == nir_type_float16) 365 info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2); 366 else if (nir_intrinsic_src_type(intr) == nir_type_int16) 367 info->output_color_types |= SI_TYPE_INT16 << (index * 2); 368 else if (nir_intrinsic_src_type(intr) == nir_type_uint16) 369 info->output_color_types |= SI_TYPE_UINT16 << (index * 2); 370 } 371 } 372 } 373 } 374} 375 376static bool is_bindless_handle_indirect(nir_instr *src) 377{ 378 /* Check if the bindless handle comes from indirect load_ubo. */ 379 if (src->type == nir_instr_type_intrinsic && 380 nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) { 381 if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0])) 382 return true; 383 } else { 384 /* Some other instruction. Return the worst-case result. */ 385 return true; 386 } 387 return false; 388} 389 390/* TODO: convert to nir_shader_instructions_pass */ 391static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info, 392 nir_instr *instr) 393{ 394 if (instr->type == nir_instr_type_tex) { 395 nir_tex_instr *tex = nir_instr_as_tex(instr); 396 const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle); 397 398 /* Gather the types of used VMEM instructions that return something. */ 399 switch (tex->op) { 400 case nir_texop_tex: 401 case nir_texop_txb: 402 case nir_texop_txl: 403 case nir_texop_txd: 404 case nir_texop_lod: 405 case nir_texop_tg4: 406 info->uses_vmem_sampler_or_bvh = true; 407 break; 408 default: 409 info->uses_vmem_load_other = true; 410 break; 411 } 412 413 if (handle) { 414 info->uses_bindless_samplers = true; 415 416 if (is_bindless_handle_indirect(handle->ssa->parent_instr)) 417 info->uses_indirect_descriptor = true; 418 } else { 419 const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref); 420 421 if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref))) 422 info->uses_indirect_descriptor = true; 423 } 424 } else if (instr->type == nir_instr_type_intrinsic) { 425 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 426 const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name; 427 bool is_ssbo = strstr(intr_name, "ssbo"); 428 bool is_image = strstr(intr_name, "image") == intr_name; 429 bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name; 430 431 /* Gather the types of used VMEM instructions that return something. */ 432 if (nir_intrinsic_infos[intr->intrinsic].has_dest) { 433 switch (intr->intrinsic) { 434 case nir_intrinsic_load_ubo: 435 if (!nir_src_is_const(intr->src[1])) 436 info->uses_vmem_load_other = true; 437 break; 438 439 case nir_intrinsic_load_input: 440 case nir_intrinsic_load_input_vertex: 441 case nir_intrinsic_load_per_vertex_input: 442 if (nir->info.stage == MESA_SHADER_VERTEX || 443 nir->info.stage == MESA_SHADER_TESS_EVAL) 444 info->uses_vmem_load_other = true; 445 break; 446 447 case nir_intrinsic_load_constant: 448 case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */ 449 case nir_intrinsic_load_buffer_amd: 450 info->uses_vmem_load_other = true; 451 break; 452 453 default: 454 if (is_image || 455 is_bindless_image || 456 is_ssbo || 457 (strstr(intr_name, "global") == intr_name || 458 intr->intrinsic == nir_intrinsic_load_global || 459 intr->intrinsic == nir_intrinsic_store_global) || 460 strstr(intr_name, "scratch")) 461 info->uses_vmem_load_other = true; 462 break; 463 } 464 } 465 466 if (is_bindless_image) 467 info->uses_bindless_images = true; 468 469 if (nir_intrinsic_writes_external_memory(intr)) 470 info->num_memory_stores++; 471 472 if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0]))) 473 info->uses_indirect_descriptor = true; 474 475 if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr)) 476 info->uses_indirect_descriptor = true; 477 478 if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo && 479 !nir_src_is_const(intr->src[0])) 480 info->uses_indirect_descriptor = true; 481 482 switch (intr->intrinsic) { 483 case nir_intrinsic_store_ssbo: 484 if (!nir_src_is_const(intr->src[1])) 485 info->uses_indirect_descriptor = true; 486 break; 487 case nir_intrinsic_load_ubo: 488 if (!nir_src_is_const(intr->src[0])) 489 info->uses_indirect_descriptor = true; 490 break; 491 case nir_intrinsic_load_local_invocation_id: 492 case nir_intrinsic_load_workgroup_id: { 493 unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa); 494 while (mask) { 495 unsigned i = u_bit_scan(&mask); 496 497 if (intr->intrinsic == nir_intrinsic_load_workgroup_id) 498 info->uses_block_id[i] = true; 499 else 500 info->uses_thread_id[i] = true; 501 } 502 break; 503 } 504 case nir_intrinsic_load_color0: 505 case nir_intrinsic_load_color1: { 506 unsigned index = intr->intrinsic == nir_intrinsic_load_color1; 507 uint8_t mask = nir_ssa_def_components_read(&intr->dest.ssa); 508 info->colors_read |= mask << (index * 4); 509 510 switch (info->color_interpolate[index]) { 511 case INTERP_MODE_SMOOTH: 512 if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) 513 info->uses_persp_sample = true; 514 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) 515 info->uses_persp_centroid = true; 516 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) 517 info->uses_persp_center = true; 518 break; 519 case INTERP_MODE_NOPERSPECTIVE: 520 if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) 521 info->uses_linear_sample = true; 522 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) 523 info->uses_linear_centroid = true; 524 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) 525 info->uses_linear_center = true; 526 break; 527 case INTERP_MODE_COLOR: 528 /* We don't know the final value. This will be FLAT if flatshading is enabled 529 * in the rasterizer state, otherwise it will be SMOOTH. 530 */ 531 info->uses_interp_color = true; 532 if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE) 533 info->uses_persp_sample_color = true; 534 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID) 535 info->uses_persp_centroid_color = true; 536 else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER) 537 info->uses_persp_center_color = true; 538 break; 539 } 540 break; 541 } 542 case nir_intrinsic_load_barycentric_at_offset: /* uses center */ 543 case nir_intrinsic_load_barycentric_at_sample: /* uses center */ 544 if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT) 545 break; 546 547 if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) { 548 info->uses_linear_center = true; 549 } else { 550 info->uses_persp_center = true; 551 } 552 if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) 553 info->uses_interp_at_sample = true; 554 break; 555 case nir_intrinsic_load_input: 556 case nir_intrinsic_load_per_vertex_input: 557 case nir_intrinsic_load_input_vertex: 558 case nir_intrinsic_load_interpolated_input: 559 scan_io_usage(nir, info, intr, true); 560 break; 561 case nir_intrinsic_load_output: 562 case nir_intrinsic_load_per_vertex_output: 563 case nir_intrinsic_store_output: 564 case nir_intrinsic_store_per_vertex_output: 565 scan_io_usage(nir, info, intr, false); 566 break; 567 case nir_intrinsic_load_deref: 568 case nir_intrinsic_store_deref: 569 /* These can only occur if there is indirect temp indexing. */ 570 break; 571 case nir_intrinsic_interp_deref_at_centroid: 572 case nir_intrinsic_interp_deref_at_sample: 573 case nir_intrinsic_interp_deref_at_offset: 574 unreachable("these opcodes should have been lowered"); 575 break; 576 default: 577 break; 578 } 579 } 580} 581 582void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, 583 struct si_shader_info *info) 584{ 585 memset(info, 0, sizeof(*info)); 586 info->base = nir->info; 587 588 /* Get options from shader profiles. */ 589 for (unsigned i = 0; i < ARRAY_SIZE(profiles); i++) { 590 if (_mesa_printed_sha1_equal(info->base.source_sha1, profiles[i].sha1)) { 591 info->options = profiles[i].options; 592 break; 593 } 594 } 595 596 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 597 /* post_depth_coverage implies early_fragment_tests */ 598 info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage; 599 600 info->color_interpolate[0] = nir->info.fs.color0_interp; 601 info->color_interpolate[1] = nir->info.fs.color1_interp; 602 for (unsigned i = 0; i < 2; i++) { 603 if (info->color_interpolate[i] == INTERP_MODE_NONE) 604 info->color_interpolate[i] = INTERP_MODE_COLOR; 605 } 606 607 info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : 608 nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : 609 TGSI_INTERPOLATE_LOC_CENTER; 610 info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : 611 nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : 612 TGSI_INTERPOLATE_LOC_CENTER; 613 /* Set an invalid value. Will be determined at draw time if needed when the expected 614 * conditions are met. 615 */ 616 info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff; 617 } 618 619 info->constbuf0_num_slots = nir->num_uniforms; 620 621 if (nir->info.stage == MESA_SHADER_TESS_CTRL) { 622 info->tessfactors_are_def_in_all_invocs = are_tessfactors_def_in_all_invocs(nir); 623 } 624 625 /* tess factors are loaded as input instead of system value */ 626 info->reads_tess_factors = nir->info.patch_inputs_read & 627 (BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) | 628 BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER)); 629 630 info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); 631 info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); 632 info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX); 633 info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); 634 info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID); 635 info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS); 636 info->uses_subgroup_info = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) || 637 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) || 638 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS); 639 info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE); 640 info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); 641 info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || 642 nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID; 643 info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); 644 info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE); 645 info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); 646 info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL); 647 info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE); 648 info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID); 649 info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); 650 651 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 652 info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH); 653 info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); 654 info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); 655 656 info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0; 657 if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { 658 info->color0_writes_all_cbufs = true; 659 info->colors_written |= 0x1; 660 } 661 if (nir->info.fs.color_is_dual_source) 662 info->colors_written |= 0x2; 663 } else { 664 info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID; 665 info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT; 666 info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER; 667 info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ; 668 info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX; 669 info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE; 670 info->writes_position = nir->info.outputs_written & VARYING_BIT_POS; 671 } 672 673 nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader*)nir); 674 nir_foreach_block (block, impl) { 675 nir_foreach_instr (instr, block) 676 scan_instruction(nir, info, instr); 677 } 678 679 if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { 680 /* Add the PrimitiveID output, but don't increment num_outputs. 681 * The driver inserts PrimitiveID only when it's used by the pixel shader, 682 * and si_emit_spi_map uses this unconditionally when such a pixel shader is used. 683 */ 684 info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID; 685 info->output_type[info->num_outputs] = nir_type_uint32; 686 info->output_usagemask[info->num_outputs] = 0x1; 687 } 688 689 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 690 info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid || 691 info->uses_persp_sample || info->uses_linear_center || 692 info->uses_linear_centroid || info->uses_linear_sample || 693 info->uses_interp_at_sample || nir->info.writes_memory || 694 nir->info.fs.uses_fbfetch_output || 695 nir->info.fs.needs_quad_helper_invocations || 696 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || 697 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) || 698 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || 699 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) || 700 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) || 701 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION)); 702 703 info->uses_vmem_load_other |= info->base.fs.uses_fbfetch_output; 704 705 /* Add both front and back color inputs. */ 706 unsigned num_inputs_with_colors = info->num_inputs; 707 for (unsigned back = 0; back < 2; back++) { 708 for (unsigned i = 0; i < 2; i++) { 709 if ((info->colors_read >> (i * 4)) & 0xf) { 710 unsigned index = num_inputs_with_colors; 711 712 info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i; 713 info->input[index].interpolate = info->color_interpolate[i]; 714 info->input[index].usage_mask = info->colors_read >> (i * 4); 715 num_inputs_with_colors++; 716 717 /* Back-face color don't increment num_inputs. si_emit_spi_map will use 718 * back-face colors conditionally only when they are needed. 719 */ 720 if (!back) 721 info->num_inputs = num_inputs_with_colors; 722 } 723 } 724 } 725 } 726 727 info->uses_vmem_load_other |= info->uses_indirect_descriptor; 728 729 /* Trim output read masks based on write masks. */ 730 for (unsigned i = 0; i < info->num_outputs; i++) 731 info->output_readmask[i] &= info->output_usagemask[i]; 732 733 info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir); 734 735 if (nir->info.stage == MESA_SHADER_VERTEX || 736 nir->info.stage == MESA_SHADER_TESS_CTRL || 737 nir->info.stage == MESA_SHADER_TESS_EVAL || 738 nir->info.stage == MESA_SHADER_GEOMETRY) { 739 if (nir->info.stage == MESA_SHADER_TESS_CTRL) { 740 /* Always reserve space for these. */ 741 info->patch_outputs_written |= 742 (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) | 743 (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER)); 744 } 745 for (unsigned i = 0; i < info->num_outputs; i++) { 746 unsigned semantic = info->output_semantic[i]; 747 748 if (semantic == VARYING_SLOT_TESS_LEVEL_INNER || 749 semantic == VARYING_SLOT_TESS_LEVEL_OUTER || 750 (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) { 751 info->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic); 752 } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) && 753 semantic != VARYING_SLOT_EDGE) { 754 info->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false); 755 756 /* Ignore outputs that are not passed from VS to PS. */ 757 if (semantic != VARYING_SLOT_POS && 758 semantic != VARYING_SLOT_PSIZ && 759 semantic != VARYING_SLOT_CLIP_VERTEX) { 760 info->outputs_written_before_ps |= 1ull 761 << si_shader_io_get_unique_index(semantic, true); 762 } 763 } 764 } 765 } 766 767 if (nir->info.stage == MESA_SHADER_VERTEX) { 768 info->num_vs_inputs = 769 nir->info.stage == MESA_SHADER_VERTEX && !info->base.vs.blit_sgprs_amd ? info->num_inputs : 0; 770 unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.gfx_level); 771 info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs); 772 773 /* The prolog is a no-op if there are no inputs. */ 774 info->vs_needs_prolog = info->num_inputs && !info->base.vs.blit_sgprs_amd; 775 } 776 777 if (nir->info.stage == MESA_SHADER_VERTEX || 778 nir->info.stage == MESA_SHADER_TESS_CTRL || 779 nir->info.stage == MESA_SHADER_TESS_EVAL) { 780 info->esgs_itemsize = util_last_bit64(info->outputs_written) * 16; 781 info->lshs_vertex_stride = info->esgs_itemsize; 782 783 /* Add 1 dword to reduce LDS bank conflicts, so that each vertex 784 * will start on a different bank. (except for the maximum 32*16). 785 */ 786 if (info->lshs_vertex_stride < 32 * 16) 787 info->lshs_vertex_stride += 4; 788 789 /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank 790 * conflicts, i.e. each vertex will start at a different bank. 791 */ 792 if (sscreen->info.gfx_level >= GFX9) 793 info->esgs_itemsize += 4; 794 795 assert(((info->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0); 796 797 info->tcs_vgpr_only_inputs = ~info->base.tess.tcs_cross_invocation_inputs_read & 798 ~info->base.inputs_read_indirectly & 799 info->base.inputs_read; 800 } 801 802 if (nir->info.stage == MESA_SHADER_GEOMETRY) { 803 info->gsvs_vertex_size = info->num_outputs * 16; 804 info->max_gsvs_emit_size = info->gsvs_vertex_size * info->base.gs.vertices_out; 805 info->gs_input_verts_per_prim = 806 u_vertices_per_prim((enum pipe_prim_type)info->base.gs.input_primitive); 807 } 808 809 info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK : 810 u_bit_consecutive(0, info->base.clip_distance_array_size); 811 info->culldist_mask = u_bit_consecutive(0, info->base.cull_distance_array_size) << 812 info->base.clip_distance_array_size; 813 814 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 815 for (unsigned i = 0; i < info->num_inputs; i++) { 816 unsigned semantic = info->input[i].semantic; 817 818 if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) && 819 semantic != VARYING_SLOT_PNTC) { 820 info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true); 821 } 822 } 823 824 for (unsigned i = 0; i < 8; i++) 825 if (info->colors_written & (1 << i)) 826 info->colors_written_4bit |= 0xf << (4 * i); 827 828 for (unsigned i = 0; i < info->num_inputs; i++) { 829 if (info->input[i].semantic == VARYING_SLOT_COL0) 830 info->color_attr_index[0] = i; 831 else if (info->input[i].semantic == VARYING_SLOT_COL1) 832 info->color_attr_index[1] = i; 833 } 834 } 835} 836