1/* 2 * Copyright © 2017 Red Hat 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23#include "nir/nir.h" 24#include "nir/nir_xfb_info.h" 25#include "radv_private.h" 26#include "radv_shader.h" 27 28#include "ac_nir.h" 29 30static void 31mark_sampler_desc(const nir_variable *var, struct radv_shader_info *info) 32{ 33 info->desc_set_used_mask |= (1u << var->data.descriptor_set); 34} 35 36static void 37gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr, 38 struct radv_shader_info *info) 39{ 40 switch (nir->info.stage) { 41 case MESA_SHADER_VERTEX: { 42 unsigned idx = nir_intrinsic_io_semantics(instr).location; 43 unsigned component = nir_intrinsic_component(instr); 44 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); 45 46 info->vs.input_usage_mask[idx] |= mask << component; 47 break; 48 } 49 default: 50 break; 51 } 52} 53 54static void 55gather_intrinsic_store_output_info(const nir_shader *nir, const nir_intrinsic_instr *instr, 56 struct radv_shader_info *info) 57{ 58 unsigned idx = nir_intrinsic_base(instr); 59 unsigned num_slots = nir_intrinsic_io_semantics(instr).num_slots; 60 unsigned component = nir_intrinsic_component(instr); 61 unsigned write_mask = nir_intrinsic_write_mask(instr); 62 uint8_t *output_usage_mask = NULL; 63 64 if (instr->src[0].ssa->bit_size == 64) 65 write_mask = util_widen_mask(write_mask, 2); 66 67 switch (nir->info.stage) { 68 case MESA_SHADER_VERTEX: 69 output_usage_mask = info->vs.output_usage_mask; 70 break; 71 case MESA_SHADER_TESS_EVAL: 72 output_usage_mask = info->tes.output_usage_mask; 73 break; 74 case MESA_SHADER_GEOMETRY: 75 output_usage_mask = info->gs.output_usage_mask; 76 break; 77 default: 78 break; 79 } 80 81 if (output_usage_mask) { 82 for (unsigned i = 0; i < num_slots; i++) { 83 output_usage_mask[idx + i] |= ((write_mask >> (i * 4)) & 0xf) << component; 84 } 85 } 86} 87 88static void 89gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr, 90 struct radv_shader_info *info) 91{ 92 info->loads_push_constants = true; 93 94 if (nir_src_is_const(instr->src[0]) && instr->dest.ssa.bit_size >= 32) { 95 uint32_t start = (nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0])) / 4u; 96 uint32_t size = instr->num_components * (instr->dest.ssa.bit_size / 32u); 97 98 if (start + size <= (MAX_PUSH_CONSTANTS_SIZE / 4u)) { 99 info->inline_push_constant_mask |= u_bit_consecutive64(start, size); 100 return; 101 } 102 } 103 104 info->can_inline_all_push_constants = false; 105} 106 107static void 108gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, 109 struct radv_shader_info *info) 110{ 111 switch (instr->intrinsic) { 112 case nir_intrinsic_load_barycentric_sample: 113 case nir_intrinsic_load_barycentric_pixel: 114 case nir_intrinsic_load_barycentric_centroid: 115 case nir_intrinsic_load_barycentric_at_sample: 116 case nir_intrinsic_load_barycentric_at_offset: { 117 enum glsl_interp_mode mode = nir_intrinsic_interp_mode(instr); 118 switch (mode) { 119 case INTERP_MODE_SMOOTH: 120 case INTERP_MODE_NONE: 121 if (instr->intrinsic == nir_intrinsic_load_barycentric_pixel || 122 instr->intrinsic == nir_intrinsic_load_barycentric_at_sample || 123 instr->intrinsic == nir_intrinsic_load_barycentric_at_offset) 124 info->ps.reads_persp_center = true; 125 else if (instr->intrinsic == nir_intrinsic_load_barycentric_centroid) 126 info->ps.reads_persp_centroid = true; 127 else if (instr->intrinsic == nir_intrinsic_load_barycentric_sample) 128 info->ps.reads_persp_sample = true; 129 break; 130 case INTERP_MODE_NOPERSPECTIVE: 131 if (instr->intrinsic == nir_intrinsic_load_barycentric_pixel || 132 instr->intrinsic == nir_intrinsic_load_barycentric_at_sample || 133 instr->intrinsic == nir_intrinsic_load_barycentric_at_offset) 134 info->ps.reads_linear_center = true; 135 else if (instr->intrinsic == nir_intrinsic_load_barycentric_centroid) 136 info->ps.reads_linear_centroid = true; 137 else if (instr->intrinsic == nir_intrinsic_load_barycentric_sample) 138 info->ps.reads_linear_sample = true; 139 break; 140 default: 141 break; 142 } 143 if (instr->intrinsic == nir_intrinsic_load_barycentric_at_sample) 144 info->ps.needs_sample_positions = true; 145 break; 146 } 147 case nir_intrinsic_load_local_invocation_id: 148 case nir_intrinsic_load_workgroup_id: { 149 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); 150 while (mask) { 151 unsigned i = u_bit_scan(&mask); 152 153 if (instr->intrinsic == nir_intrinsic_load_workgroup_id) 154 info->cs.uses_block_id[i] = true; 155 else 156 info->cs.uses_thread_id[i] = true; 157 } 158 break; 159 } 160 case nir_intrinsic_load_frag_coord: 161 info->ps.reads_frag_coord_mask |= nir_ssa_def_components_read(&instr->dest.ssa); 162 break; 163 case nir_intrinsic_load_sample_pos: 164 info->ps.reads_sample_pos_mask |= nir_ssa_def_components_read(&instr->dest.ssa); 165 break; 166 case nir_intrinsic_load_push_constant: 167 gather_push_constant_info(nir, instr, info); 168 break; 169 case nir_intrinsic_vulkan_resource_index: 170 info->desc_set_used_mask |= (1u << nir_intrinsic_desc_set(instr)); 171 break; 172 case nir_intrinsic_image_deref_load: 173 case nir_intrinsic_image_deref_sparse_load: 174 case nir_intrinsic_image_deref_store: 175 case nir_intrinsic_image_deref_atomic_add: 176 case nir_intrinsic_image_deref_atomic_imin: 177 case nir_intrinsic_image_deref_atomic_umin: 178 case nir_intrinsic_image_deref_atomic_imax: 179 case nir_intrinsic_image_deref_atomic_umax: 180 case nir_intrinsic_image_deref_atomic_and: 181 case nir_intrinsic_image_deref_atomic_or: 182 case nir_intrinsic_image_deref_atomic_xor: 183 case nir_intrinsic_image_deref_atomic_exchange: 184 case nir_intrinsic_image_deref_atomic_comp_swap: 185 case nir_intrinsic_image_deref_atomic_fmin: 186 case nir_intrinsic_image_deref_atomic_fmax: 187 case nir_intrinsic_image_deref_size: 188 case nir_intrinsic_image_deref_samples: { 189 nir_variable *var = 190 nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); 191 mark_sampler_desc(var, info); 192 break; 193 } 194 case nir_intrinsic_load_input: 195 gather_intrinsic_load_input_info(nir, instr, info); 196 break; 197 case nir_intrinsic_store_output: 198 gather_intrinsic_store_output_info(nir, instr, info); 199 break; 200 case nir_intrinsic_load_sbt_base_amd: 201 info->cs.uses_sbt = true; 202 break; 203 case nir_intrinsic_load_force_vrs_rates_amd: 204 info->force_vrs_per_vertex = true; 205 break; 206 default: 207 break; 208 } 209} 210 211static void 212gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr, struct radv_shader_info *info) 213{ 214 for (unsigned i = 0; i < instr->num_srcs; i++) { 215 switch (instr->src[i].src_type) { 216 case nir_tex_src_texture_deref: 217 mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info); 218 break; 219 case nir_tex_src_sampler_deref: 220 mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info); 221 break; 222 default: 223 break; 224 } 225 } 226} 227 228static void 229gather_info_block(const nir_shader *nir, const nir_block *block, struct radv_shader_info *info) 230{ 231 nir_foreach_instr (instr, block) { 232 switch (instr->type) { 233 case nir_instr_type_intrinsic: 234 gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info); 235 break; 236 case nir_instr_type_tex: 237 gather_tex_info(nir, nir_instr_as_tex(instr), info); 238 break; 239 default: 240 break; 241 } 242 } 243} 244 245static void 246gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var, 247 const struct radv_pipeline_key *key, struct radv_shader_info *info) 248{ 249 unsigned attrib_count = glsl_count_attribute_slots(var->type, true); 250 251 for (unsigned i = 0; i < attrib_count; ++i) { 252 unsigned attrib_index = var->data.location + i - VERT_ATTRIB_GENERIC0; 253 254 if (key->vs.instance_rate_inputs & (1u << attrib_index)) { 255 info->vs.needs_instance_id = true; 256 info->vs.needs_base_instance = true; 257 } 258 259 if (info->vs.use_per_attribute_vb_descs) 260 info->vs.vb_desc_usage_mask |= 1u << attrib_index; 261 else 262 info->vs.vb_desc_usage_mask |= 1u << key->vs.vertex_attribute_bindings[attrib_index]; 263 } 264} 265 266static void 267mark_16bit_ps_input(struct radv_shader_info *info, const struct glsl_type *type, int location) 268{ 269 if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) { 270 unsigned attrib_count = glsl_count_attribute_slots(type, false); 271 if (glsl_type_is_16bit(type)) { 272 info->ps.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location; 273 } 274 } else if (glsl_type_is_array(type)) { 275 unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false); 276 for (unsigned i = 0; i < glsl_get_length(type); ++i) { 277 mark_16bit_ps_input(info, glsl_get_array_element(type), location + i * stride); 278 } 279 } else { 280 assert(glsl_type_is_struct_or_ifc(type)); 281 for (unsigned i = 0; i < glsl_get_length(type); i++) { 282 mark_16bit_ps_input(info, glsl_get_struct_field(type, i), location); 283 location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false); 284 } 285 } 286} 287static void 288gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var, 289 struct radv_shader_info *info) 290{ 291 unsigned attrib_count = glsl_count_attribute_slots(var->type, false); 292 int idx = var->data.location; 293 294 switch (idx) { 295 case VARYING_SLOT_CLIP_DIST0: 296 case VARYING_SLOT_CLIP_DIST1: 297 info->ps.num_input_clips_culls += attrib_count; 298 break; 299 default: 300 break; 301 } 302 303 if (var->data.compact) { 304 unsigned component_count = var->data.location_frac + glsl_get_length(var->type); 305 attrib_count = (component_count + 3) / 4; 306 } else { 307 mark_16bit_ps_input(info, var->type, var->data.driver_location); 308 } 309 310 uint64_t mask = ((1ull << attrib_count) - 1); 311 312 if (!var->data.per_primitive) { 313 if (var->data.interpolation == INTERP_MODE_FLAT) 314 info->ps.flat_shaded_mask |= mask << var->data.driver_location; 315 else if (var->data.interpolation == INTERP_MODE_EXPLICIT) 316 info->ps.explicit_shaded_mask |= mask << var->data.driver_location; 317 } 318 319 if (var->data.location >= VARYING_SLOT_VAR0) { 320 if (var->data.per_primitive) 321 info->ps.input_per_primitive_mask |= mask << (var->data.location - VARYING_SLOT_VAR0); 322 else 323 info->ps.input_mask |= mask << (var->data.location - VARYING_SLOT_VAR0); 324 } 325} 326 327static void 328gather_info_input_decl(const nir_shader *nir, const nir_variable *var, 329 const struct radv_pipeline_key *key, struct radv_shader_info *info) 330{ 331 switch (nir->info.stage) { 332 case MESA_SHADER_VERTEX: 333 gather_info_input_decl_vs(nir, var, key, info); 334 break; 335 case MESA_SHADER_FRAGMENT: 336 gather_info_input_decl_ps(nir, var, info); 337 break; 338 default: 339 break; 340 } 341} 342 343static void 344gather_info_output_decl_gs(const nir_shader *nir, const nir_variable *var, 345 struct radv_shader_info *info) 346{ 347 unsigned num_components = glsl_get_component_slots(var->type); 348 unsigned stream = var->data.stream; 349 unsigned idx = var->data.location; 350 351 assert(stream < 4); 352 353 info->gs.num_stream_output_components[stream] += num_components; 354 info->gs.output_streams[idx] = stream; 355} 356 357static struct radv_vs_output_info * 358get_vs_output_info(const nir_shader *nir, struct radv_shader_info *info) 359{ 360 361 switch (nir->info.stage) { 362 case MESA_SHADER_VERTEX: 363 if (!info->vs.as_ls && !info->vs.as_es) 364 return &info->vs.outinfo; 365 break; 366 case MESA_SHADER_GEOMETRY: 367 return &info->vs.outinfo; 368 break; 369 case MESA_SHADER_TESS_EVAL: 370 if (!info->tes.as_es) 371 return &info->tes.outinfo; 372 break; 373 case MESA_SHADER_MESH: 374 return &info->ms.outinfo; 375 default: 376 break; 377 } 378 379 return NULL; 380} 381 382static void 383gather_info_output_decl(const nir_shader *nir, const nir_variable *var, 384 struct radv_shader_info *info) 385{ 386 switch (nir->info.stage) { 387 case MESA_SHADER_VERTEX: 388 break; 389 case MESA_SHADER_GEOMETRY: 390 gather_info_output_decl_gs(nir, var, info); 391 break; 392 case MESA_SHADER_TESS_EVAL: 393 break; 394 default: 395 break; 396 } 397} 398 399static void 400gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info) 401{ 402 struct radv_streamout_info *so = &info->so; 403 404 if (!nir->xfb_info) 405 return; 406 407 const nir_xfb_info *xfb = nir->xfb_info; 408 assert(xfb->output_count <= MAX_SO_OUTPUTS); 409 so->num_outputs = xfb->output_count; 410 411 for (unsigned i = 0; i < xfb->output_count; i++) { 412 struct radv_stream_output *output = &so->outputs[i]; 413 414 output->buffer = xfb->outputs[i].buffer; 415 output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer]; 416 output->offset = xfb->outputs[i].offset; 417 output->location = xfb->outputs[i].location; 418 output->component_mask = xfb->outputs[i].component_mask; 419 420 so->enabled_stream_buffers_mask |= (1 << output->buffer) << (output->stream * 4); 421 } 422 423 for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) { 424 so->strides[i] = xfb->buffers[i].stride / 4; 425 } 426} 427 428static void 429assign_outinfo_param(struct radv_vs_output_info *outinfo, gl_varying_slot idx, 430 unsigned *total_param_exports) 431{ 432 if (outinfo->vs_output_param_offset[idx] == AC_EXP_PARAM_UNDEFINED) 433 outinfo->vs_output_param_offset[idx] = (*total_param_exports)++; 434} 435 436static void 437assign_outinfo_params(struct radv_vs_output_info *outinfo, uint64_t mask, 438 unsigned *total_param_exports) 439{ 440 u_foreach_bit64(idx, mask) { 441 if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER || 442 idx == VARYING_SLOT_PRIMITIVE_ID || idx == VARYING_SLOT_VIEWPORT || 443 ((idx == VARYING_SLOT_CLIP_DIST0 || idx == VARYING_SLOT_CLIP_DIST1) && 444 outinfo->export_clip_dists)) 445 assign_outinfo_param(outinfo, idx, total_param_exports); 446 } 447} 448 449void 450radv_nir_shader_info_init(struct radv_shader_info *info) 451{ 452 /* Assume that shaders can inline all push constants by default. */ 453 info->can_inline_all_push_constants = true; 454} 455 456void 457radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, 458 const struct radv_pipeline_layout *layout, 459 const struct radv_pipeline_key *pipeline_key, 460 struct radv_shader_info *info) 461{ 462 struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions); 463 464 if (layout && layout->dynamic_offset_count && 465 (layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) { 466 info->loads_push_constants = true; 467 info->loads_dynamic_offsets = true; 468 } 469 470 if (nir->info.stage == MESA_SHADER_VERTEX) { 471 if (pipeline_key->vs.dynamic_input_state && nir->info.inputs_read) { 472 info->vs.has_prolog = true; 473 info->vs.dynamic_inputs = true; 474 } 475 476 /* Use per-attribute vertex descriptors to prevent faults and 477 * for correct bounds checking. 478 */ 479 info->vs.use_per_attribute_vb_descs = device->robust_buffer_access || info->vs.dynamic_inputs; 480 } 481 482 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 483 if (pipeline_key->ps.has_epilog) { 484 info->ps.has_epilog = true; 485 } 486 } 487 488 /* We have to ensure consistent input register assignments between the main shader and the 489 * prolog. */ 490 info->vs.needs_instance_id |= info->vs.has_prolog; 491 info->vs.needs_base_instance |= info->vs.has_prolog; 492 info->vs.needs_draw_id |= info->vs.has_prolog; 493 494 nir_foreach_shader_in_variable (variable, nir) 495 gather_info_input_decl(nir, variable, pipeline_key, info); 496 497 nir_foreach_block (block, func->impl) { 498 gather_info_block(nir, block, info); 499 } 500 501 nir_foreach_shader_out_variable(variable, nir) gather_info_output_decl(nir, variable, info); 502 503 if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL || 504 nir->info.stage == MESA_SHADER_GEOMETRY) 505 gather_xfb_info(nir, info); 506 507 struct radv_vs_output_info *outinfo = get_vs_output_info(nir, info); 508 if (outinfo) { 509 /* These are not compiled into neither output param nor position exports. */ 510 uint64_t special_mask = BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT) | 511 BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES) | 512 BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE); 513 uint64_t per_prim_mask = 514 nir->info.outputs_written & nir->info.per_primitive_outputs & ~special_mask; 515 uint64_t per_vtx_mask = 516 nir->info.outputs_written & ~nir->info.per_primitive_outputs & ~special_mask; 517 518 /* Mesh multivew is only lowered in ac_nir_lower_ngg, so we have to fake it here. */ 519 if (nir->info.stage == MESA_SHADER_MESH && pipeline_key->has_multiview_view_index) { 520 per_prim_mask |= VARYING_BIT_LAYER; 521 info->uses_view_index = true; 522 } 523 524 /* Per vertex outputs. */ 525 outinfo->writes_pointsize = per_vtx_mask & VARYING_BIT_PSIZ; 526 outinfo->writes_viewport_index = per_vtx_mask & VARYING_BIT_VIEWPORT; 527 outinfo->writes_layer = per_vtx_mask & VARYING_BIT_LAYER; 528 outinfo->writes_primitive_shading_rate = per_vtx_mask & VARYING_BIT_PRIMITIVE_SHADING_RATE; 529 530 /* Per primitive outputs. */ 531 outinfo->writes_viewport_index_per_primitive = per_prim_mask & VARYING_BIT_VIEWPORT; 532 outinfo->writes_layer_per_primitive = per_prim_mask & VARYING_BIT_LAYER; 533 outinfo->writes_primitive_shading_rate_per_primitive = per_prim_mask & VARYING_BIT_PRIMITIVE_SHADING_RATE; 534 535 /* Clip/cull distances. */ 536 outinfo->clip_dist_mask = (1 << nir->info.clip_distance_array_size) - 1; 537 outinfo->cull_dist_mask = (1 << nir->info.cull_distance_array_size) - 1; 538 outinfo->cull_dist_mask <<= nir->info.clip_distance_array_size; 539 540 int pos_written = 0x1; 541 542 if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer || 543 outinfo->writes_primitive_shading_rate) 544 pos_written |= 1 << 1; 545 546 unsigned num_clip_distances = util_bitcount(outinfo->clip_dist_mask); 547 unsigned num_cull_distances = util_bitcount(outinfo->cull_dist_mask); 548 549 if (num_clip_distances + num_cull_distances > 0) 550 pos_written |= 1 << 2; 551 if (num_clip_distances + num_cull_distances > 4) 552 pos_written |= 1 << 3; 553 554 outinfo->pos_exports = util_bitcount(pos_written); 555 556 memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, 557 sizeof(outinfo->vs_output_param_offset)); 558 559 unsigned total_param_exports = 0; 560 561 /* Per-vertex outputs */ 562 assign_outinfo_params(outinfo, per_vtx_mask, &total_param_exports); 563 if (outinfo->writes_layer) 564 assign_outinfo_param(outinfo, VARYING_SLOT_LAYER, &total_param_exports); 565 if (outinfo->export_prim_id) 566 assign_outinfo_param(outinfo, VARYING_SLOT_PRIMITIVE_ID, &total_param_exports); 567 568 outinfo->param_exports = total_param_exports; 569 570 /* Per-primitive outputs: the HW needs these to be last. */ 571 assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports); 572 if (outinfo->writes_layer_per_primitive) 573 assign_outinfo_param(outinfo, VARYING_SLOT_LAYER, &total_param_exports); 574 if (outinfo->writes_viewport_index_per_primitive) 575 assign_outinfo_param(outinfo, VARYING_SLOT_VIEWPORT, &total_param_exports); 576 577 outinfo->prim_param_exports = total_param_exports - outinfo->param_exports; 578 } 579 580 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 581 uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs; 582 unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask); 583 assert(num_per_primitive_inputs <= nir->num_inputs); 584 585 info->ps.num_interp = nir->num_inputs - num_per_primitive_inputs; 586 info->ps.num_prim_interp = num_per_primitive_inputs; 587 } 588 589 info->vs.needs_draw_id |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); 590 info->vs.needs_base_instance |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); 591 info->vs.needs_instance_id |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); 592 info->uses_view_index |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_VIEW_INDEX); 593 info->uses_invocation_id |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID); 594 info->uses_prim_id |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); 595 596 /* Used by compute and mesh shaders. */ 597 info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS); 598 info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) | 599 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) | 600 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS); 601 switch (nir->info.stage) { 602 case MESA_SHADER_COMPUTE: 603 case MESA_SHADER_TASK: 604 for (int i = 0; i < 3; ++i) 605 info->cs.block_size[i] = nir->info.workgroup_size[i]; 606 info->cs.uses_ray_launch_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_RAY_LAUNCH_SIZE_ADDR_AMD); 607 608 /* Task shaders always need these for the I/O lowering even if 609 * the API shader doesn't actually use them. 610 */ 611 if (nir->info.stage == MESA_SHADER_TASK) { 612 /* Needed to address the IB to read firstTask. */ 613 info->vs.needs_draw_id |= 614 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID); 615 616 /* Needed to address the task draw/payload rings. */ 617 info->cs.uses_block_id[0] = true; 618 info->cs.uses_block_id[1] = true; 619 info->cs.uses_block_id[2] = true; 620 info->cs.uses_grid_size = true; 621 622 /* Needed for storing draw ready only on the 1st thread. */ 623 info->cs.uses_local_invocation_idx = true; 624 } 625 break; 626 case MESA_SHADER_FRAGMENT: 627 info->ps.can_discard = nir->info.fs.uses_discard; 628 info->ps.early_fragment_test = nir->info.fs.early_fragment_tests; 629 info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage; 630 info->ps.depth_layout = nir->info.fs.depth_layout; 631 info->ps.uses_sample_shading = nir->info.fs.uses_sample_shading; 632 info->ps.writes_memory = nir->info.writes_memory; 633 info->ps.has_pcoord = nir->info.inputs_read & VARYING_BIT_PNTC; 634 info->ps.prim_id_input = nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID; 635 info->ps.layer_input = nir->info.inputs_read & VARYING_BIT_LAYER; 636 info->ps.viewport_index_input = nir->info.inputs_read & VARYING_BIT_VIEWPORT; 637 info->ps.writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH); 638 info->ps.writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); 639 info->ps.writes_sample_mask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); 640 info->ps.reads_sample_mask_in = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); 641 info->ps.reads_sample_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID); 642 info->ps.reads_frag_shading_rate = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_SHADING_RATE); 643 info->ps.reads_front_face = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); 644 info->ps.reads_barycentric_model = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL); 645 break; 646 case MESA_SHADER_GEOMETRY: 647 info->gs.vertices_in = nir->info.gs.vertices_in; 648 info->gs.vertices_out = nir->info.gs.vertices_out; 649 info->gs.output_prim = nir->info.gs.output_primitive; 650 info->gs.invocations = nir->info.gs.invocations; 651 info->gs.max_stream = 652 nir->info.gs.active_stream_mask ? util_last_bit(nir->info.gs.active_stream_mask) - 1 : 0; 653 break; 654 case MESA_SHADER_TESS_EVAL: 655 info->tes._primitive_mode = nir->info.tess._primitive_mode; 656 info->tes.spacing = nir->info.tess.spacing; 657 info->tes.ccw = nir->info.tess.ccw; 658 info->tes.point_mode = nir->info.tess.point_mode; 659 break; 660 case MESA_SHADER_TESS_CTRL: 661 info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; 662 break; 663 case MESA_SHADER_VERTEX: 664 break; 665 case MESA_SHADER_MESH: 666 info->ms.output_prim = nir->info.mesh.primitive_type; 667 break; 668 default: 669 break; 670 } 671 672 if (nir->info.stage == MESA_SHADER_GEOMETRY) { 673 unsigned add_clip = 674 nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4; 675 info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16; 676 info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out; 677 } 678 679 /* Compute the ESGS item size for VS or TES as ES. */ 680 if ((nir->info.stage == MESA_SHADER_VERTEX && info->vs.as_es) || 681 (nir->info.stage == MESA_SHADER_TESS_EVAL && info->tes.as_es)) { 682 struct radv_es_output_info *es_info = 683 nir->info.stage == MESA_SHADER_VERTEX ? &info->vs.es_info : &info->tes.es_info; 684 uint32_t num_outputs_written = nir->info.stage == MESA_SHADER_VERTEX 685 ? info->vs.num_linked_outputs 686 : info->tes.num_linked_outputs; 687 es_info->esgs_itemsize = num_outputs_written * 16; 688 } 689 690 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 691 bool uses_persp_or_linear_interp = info->ps.reads_persp_center || 692 info->ps.reads_persp_centroid || 693 info->ps.reads_persp_sample || 694 info->ps.reads_linear_center || 695 info->ps.reads_linear_centroid || 696 info->ps.reads_linear_sample; 697 698 info->ps.allow_flat_shading = 699 !(uses_persp_or_linear_interp || info->ps.needs_sample_positions || 700 info->ps.writes_memory || nir->info.fs.needs_quad_helper_invocations || 701 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || 702 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) || 703 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || 704 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) || 705 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) || 706 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION)); 707 708 info->ps.spi_ps_input = radv_compute_spi_ps_input(pipeline_key, info); 709 } 710} 711