1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <string.h> 27#include <unistd.h> 28#include <fcntl.h> 29 30#include "util/mesa-sha1.h" 31#include "util/os_time.h" 32#include "common/intel_l3_config.h" 33#include "common/intel_disasm.h" 34#include "common/intel_sample_positions.h" 35#include "anv_private.h" 36#include "compiler/brw_nir.h" 37#include "compiler/brw_nir_rt.h" 38#include "anv_nir.h" 39#include "nir/nir_xfb_info.h" 40#include "spirv/nir_spirv.h" 41#include "vk_pipeline.h" 42#include "vk_render_pass.h" 43#include "vk_util.h" 44 45/* Needed for SWIZZLE macros */ 46#include "program/prog_instruction.h" 47 48/* Eventually, this will become part of anv_CreateShader. Unfortunately, 49 * we can't do that yet because we don't have the ability to copy nir. 50 */ 51static nir_shader * 52anv_shader_stage_to_nir(struct anv_device *device, 53 const VkPipelineShaderStageCreateInfo *stage_info, 54 void *mem_ctx) 55{ 56 const struct anv_physical_device *pdevice = device->physical; 57 const struct anv_instance *instance = pdevice->instance; 58 const struct brw_compiler *compiler = pdevice->compiler; 59 gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage); 60 const nir_shader_compiler_options *nir_options = 61 compiler->nir_options[stage]; 62 63 const struct spirv_to_nir_options spirv_options = { 64 .caps = { 65 .demote_to_helper_invocation = true, 66 .derivative_group = true, 67 .descriptor_array_dynamic_indexing = true, 68 .descriptor_array_non_uniform_indexing = true, 69 .descriptor_indexing = true, 70 .device_group = true, 71 .draw_parameters = true, 72 .float16 = pdevice->info.ver >= 8, 73 .float32_atomic_add = pdevice->info.has_lsc, 74 .float32_atomic_min_max = pdevice->info.ver >= 9, 75 .float64 = pdevice->info.ver >= 8, 76 .float64_atomic_min_max = pdevice->info.has_lsc, 77 .fragment_shader_sample_interlock = pdevice->info.ver >= 9, 78 .fragment_shader_pixel_interlock = pdevice->info.ver >= 9, 79 .geometry_streams = true, 80 /* When using Vulkan 1.3 or KHR_format_feature_flags2 is enabled, the 81 * read/write without format is per format, so just report true. It's 82 * up to the application to check. 83 */ 84 .image_read_without_format = instance->vk.app_info.api_version >= VK_API_VERSION_1_3 || device->vk.enabled_extensions.KHR_format_feature_flags2, 85 .image_write_without_format = true, 86 .int8 = pdevice->info.ver >= 8, 87 .int16 = pdevice->info.ver >= 8, 88 .int64 = pdevice->info.ver >= 8, 89 .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin, 90 .integer_functions2 = pdevice->info.ver >= 8, 91 .mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader, 92 .min_lod = true, 93 .multiview = true, 94 .physical_storage_buffer_address = pdevice->has_a64_buffer_access, 95 .post_depth_coverage = pdevice->info.ver >= 9, 96 .runtime_descriptor_array = true, 97 .float_controls = pdevice->info.ver >= 8, 98 .ray_query = pdevice->info.has_ray_tracing, 99 .ray_tracing = pdevice->info.has_ray_tracing, 100 .shader_clock = true, 101 .shader_viewport_index_layer = true, 102 .stencil_export = pdevice->info.ver >= 9, 103 .storage_8bit = pdevice->info.ver >= 8, 104 .storage_16bit = pdevice->info.ver >= 8, 105 .subgroup_arithmetic = true, 106 .subgroup_basic = true, 107 .subgroup_ballot = true, 108 .subgroup_dispatch = true, 109 .subgroup_quad = true, 110 .subgroup_uniform_control_flow = true, 111 .subgroup_shuffle = true, 112 .subgroup_vote = true, 113 .tessellation = true, 114 .transform_feedback = pdevice->info.ver >= 8, 115 .variable_pointers = true, 116 .vk_memory_model = true, 117 .vk_memory_model_device_scope = true, 118 .workgroup_memory_explicit_layout = true, 119 .fragment_shading_rate = pdevice->info.ver >= 11, 120 }, 121 .ubo_addr_format = 122 anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access), 123 .ssbo_addr_format = 124 anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access), 125 .phys_ssbo_addr_format = nir_address_format_64bit_global, 126 .push_const_addr_format = nir_address_format_logical, 127 128 /* TODO: Consider changing this to an address format that has the NULL 129 * pointer equals to 0. That might be a better format to play nice 130 * with certain code / code generators. 131 */ 132 .shared_addr_format = nir_address_format_32bit_offset, 133 }; 134 135 nir_shader *nir; 136 VkResult result = 137 vk_pipeline_shader_stage_to_nir(&device->vk, stage_info, 138 &spirv_options, nir_options, 139 mem_ctx, &nir); 140 if (result != VK_SUCCESS) 141 return NULL; 142 143 if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) { 144 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n", 145 gl_shader_stage_name(stage)); 146 nir_print_shader(nir, stderr); 147 } 148 149 NIR_PASS_V(nir, nir_lower_io_to_temporaries, 150 nir_shader_get_entrypoint(nir), true, false); 151 152 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { 153 .point_coord = true, 154 }; 155 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); 156 157 const nir_opt_access_options opt_access_options = { 158 .is_vulkan = true, 159 .infer_non_readable = true, 160 }; 161 NIR_PASS(_, nir, nir_opt_access, &opt_access_options); 162 163 NIR_PASS(_, nir, nir_lower_frexp); 164 165 /* Vulkan uses the separate-shader linking model */ 166 nir->info.separate_shader = true; 167 168 brw_preprocess_nir(compiler, nir, NULL); 169 170 return nir; 171} 172 173VkResult 174anv_pipeline_init(struct anv_pipeline *pipeline, 175 struct anv_device *device, 176 enum anv_pipeline_type type, 177 VkPipelineCreateFlags flags, 178 const VkAllocationCallbacks *pAllocator) 179{ 180 VkResult result; 181 182 memset(pipeline, 0, sizeof(*pipeline)); 183 184 vk_object_base_init(&device->vk, &pipeline->base, 185 VK_OBJECT_TYPE_PIPELINE); 186 pipeline->device = device; 187 188 /* It's the job of the child class to provide actual backing storage for 189 * the batch by setting batch.start, batch.next, and batch.end. 190 */ 191 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc; 192 pipeline->batch.relocs = &pipeline->batch_relocs; 193 pipeline->batch.status = VK_SUCCESS; 194 195 result = anv_reloc_list_init(&pipeline->batch_relocs, 196 pipeline->batch.alloc); 197 if (result != VK_SUCCESS) 198 return result; 199 200 pipeline->mem_ctx = ralloc_context(NULL); 201 202 pipeline->type = type; 203 pipeline->flags = flags; 204 205 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx); 206 207 return VK_SUCCESS; 208} 209 210void 211anv_pipeline_finish(struct anv_pipeline *pipeline, 212 struct anv_device *device, 213 const VkAllocationCallbacks *pAllocator) 214{ 215 anv_reloc_list_finish(&pipeline->batch_relocs, 216 pAllocator ? pAllocator : &device->vk.alloc); 217 ralloc_free(pipeline->mem_ctx); 218 vk_object_base_finish(&pipeline->base); 219} 220 221void anv_DestroyPipeline( 222 VkDevice _device, 223 VkPipeline _pipeline, 224 const VkAllocationCallbacks* pAllocator) 225{ 226 ANV_FROM_HANDLE(anv_device, device, _device); 227 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 228 229 if (!pipeline) 230 return; 231 232 switch (pipeline->type) { 233 case ANV_PIPELINE_GRAPHICS: { 234 struct anv_graphics_pipeline *gfx_pipeline = 235 anv_pipeline_to_graphics(pipeline); 236 237 for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) { 238 if (gfx_pipeline->shaders[s]) 239 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]); 240 } 241 break; 242 } 243 244 case ANV_PIPELINE_COMPUTE: { 245 struct anv_compute_pipeline *compute_pipeline = 246 anv_pipeline_to_compute(pipeline); 247 248 if (compute_pipeline->cs) 249 anv_shader_bin_unref(device, compute_pipeline->cs); 250 251 break; 252 } 253 254 case ANV_PIPELINE_RAY_TRACING: { 255 struct anv_ray_tracing_pipeline *rt_pipeline = 256 anv_pipeline_to_ray_tracing(pipeline); 257 258 util_dynarray_foreach(&rt_pipeline->shaders, 259 struct anv_shader_bin *, shader) { 260 anv_shader_bin_unref(device, *shader); 261 } 262 break; 263 } 264 265 default: 266 unreachable("invalid pipeline type"); 267 } 268 269 anv_pipeline_finish(pipeline, device, pAllocator); 270 vk_free2(&device->vk.alloc, pAllocator, pipeline); 271} 272 273static void 274populate_sampler_prog_key(const struct intel_device_info *devinfo, 275 struct brw_sampler_prog_key_data *key) 276{ 277 /* Almost all multisampled textures are compressed. The only time when we 278 * don't compress a multisampled texture is for 16x MSAA with a surface 279 * width greater than 8k which is a bit of an edge case. Since the sampler 280 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe 281 * to tell the compiler to always assume compression. 282 */ 283 key->compressed_multisample_layout_mask = ~0; 284 285 /* SkyLake added support for 16x MSAA. With this came a new message for 286 * reading from a 16x MSAA surface with compression. The new message was 287 * needed because now the MCS data is 64 bits instead of 32 or lower as is 288 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which 289 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x 290 * so we can just use it unconditionally. This may not be quite as 291 * efficient but it saves us from recompiling. 292 */ 293 if (devinfo->ver >= 9) 294 key->msaa_16 = ~0; 295 296 /* XXX: Handle texture swizzle on HSW- */ 297 for (int i = 0; i < BRW_MAX_SAMPLERS; i++) { 298 /* Assume color sampler, no swizzling. (Works for BDW+) */ 299 key->swizzles[i] = SWIZZLE_XYZW; 300 } 301} 302 303static void 304populate_base_prog_key(const struct anv_device *device, 305 bool robust_buffer_acccess, 306 struct brw_base_prog_key *key) 307{ 308 key->robust_buffer_access = robust_buffer_acccess; 309 key->limit_trig_input_range = 310 device->physical->instance->limit_trig_input_range; 311 312 populate_sampler_prog_key(&device->info, &key->tex); 313} 314 315static void 316populate_vs_prog_key(const struct anv_device *device, 317 bool robust_buffer_acccess, 318 struct brw_vs_prog_key *key) 319{ 320 memset(key, 0, sizeof(*key)); 321 322 populate_base_prog_key(device, robust_buffer_acccess, &key->base); 323 324 /* XXX: Handle vertex input work-arounds */ 325 326 /* XXX: Handle sampler_prog_key */ 327} 328 329static void 330populate_tcs_prog_key(const struct anv_device *device, 331 bool robust_buffer_acccess, 332 unsigned input_vertices, 333 struct brw_tcs_prog_key *key) 334{ 335 memset(key, 0, sizeof(*key)); 336 337 populate_base_prog_key(device, robust_buffer_acccess, &key->base); 338 339 key->input_vertices = input_vertices; 340} 341 342static void 343populate_tes_prog_key(const struct anv_device *device, 344 bool robust_buffer_acccess, 345 struct brw_tes_prog_key *key) 346{ 347 memset(key, 0, sizeof(*key)); 348 349 populate_base_prog_key(device, robust_buffer_acccess, &key->base); 350} 351 352static void 353populate_gs_prog_key(const struct anv_device *device, 354 bool robust_buffer_acccess, 355 struct brw_gs_prog_key *key) 356{ 357 memset(key, 0, sizeof(*key)); 358 359 populate_base_prog_key(device, robust_buffer_acccess, &key->base); 360} 361 362static bool 363pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, 364 const BITSET_WORD *dynamic, 365 const struct vk_multisample_state *ms, 366 const struct vk_fragment_shading_rate_state *fsr) 367{ 368 /* The Vulkan 1.2.199 spec says: 369 * 370 * "If any of the following conditions are met, Cxy' must be set to 371 * {1,1}: 372 * 373 * * If Sample Shading is enabled. 374 * * [...]" 375 * 376 * And "sample shading" is defined as follows: 377 * 378 * "Sample shading is enabled for a graphics pipeline: 379 * 380 * * If the interface of the fragment shader entry point of the 381 * graphics pipeline includes an input variable decorated with 382 * SampleId or SamplePosition. In this case minSampleShadingFactor 383 * takes the value 1.0. 384 * 385 * * Else if the sampleShadingEnable member of the 386 * VkPipelineMultisampleStateCreateInfo structure specified when 387 * creating the graphics pipeline is set to VK_TRUE. In this case 388 * minSampleShadingFactor takes the value of 389 * VkPipelineMultisampleStateCreateInfo::minSampleShading. 390 * 391 * Otherwise, sample shading is considered disabled." 392 * 393 * The first bullet above is handled by the back-end compiler because those 394 * inputs both force per-sample dispatch. The second bullet is handled 395 * here. Note that this sample shading being enabled has nothing to do 396 * with minSampleShading. 397 */ 398 if (ms != NULL && ms->sample_shading_enable) 399 return false; 400 401 /* Not dynamic & pipeline has a 1x1 fragment shading rate with no 402 * possibility for element of the pipeline to change the value. 403 */ 404 if (!BITSET_TEST(dynamic, MESA_VK_DYNAMIC_FSR) && 405 fsr->fragment_size.width <= 1 && 406 fsr->fragment_size.height <= 1 && 407 fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR && 408 fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) 409 return false; 410 411 return true; 412} 413 414static void 415populate_task_prog_key(const struct anv_device *device, 416 bool robust_buffer_access, 417 struct brw_task_prog_key *key) 418{ 419 memset(key, 0, sizeof(*key)); 420 421 populate_base_prog_key(device, robust_buffer_access, &key->base); 422} 423 424static void 425populate_mesh_prog_key(const struct anv_device *device, 426 bool robust_buffer_access, 427 struct brw_mesh_prog_key *key) 428{ 429 memset(key, 0, sizeof(*key)); 430 431 populate_base_prog_key(device, robust_buffer_access, &key->base); 432} 433 434static void 435populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, 436 bool robust_buffer_acccess, 437 const BITSET_WORD *dynamic, 438 const struct vk_multisample_state *ms, 439 const struct vk_fragment_shading_rate_state *fsr, 440 const struct vk_render_pass_state *rp, 441 struct brw_wm_prog_key *key) 442{ 443 const struct anv_device *device = pipeline->base.device; 444 445 memset(key, 0, sizeof(*key)); 446 447 populate_base_prog_key(device, robust_buffer_acccess, &key->base); 448 449 /* We set this to 0 here and set to the actual value before we call 450 * brw_compile_fs. 451 */ 452 key->input_slots_valid = 0; 453 454 /* XXX Vulkan doesn't appear to specify */ 455 key->clamp_fragment_color = false; 456 457 key->ignore_sample_mask_out = false; 458 459 assert(rp->color_attachment_count <= MAX_RTS); 460 /* Consider all inputs as valid until look at the NIR variables. */ 461 key->color_outputs_valid = (1u << rp->color_attachment_count) - 1; 462 key->nr_color_regions = rp->color_attachment_count; 463 464 /* To reduce possible shader recompilations we would need to know if 465 * there is a SampleMask output variable to compute if we should emit 466 * code to workaround the issue that hardware disables alpha to coverage 467 * when there is SampleMask output. 468 */ 469 key->alpha_to_coverage = ms != NULL && ms->alpha_to_coverage_enable; 470 471 /* Vulkan doesn't support fixed-function alpha test */ 472 key->alpha_test_replicate_alpha = false; 473 474 if (ms != NULL) { 475 /* We should probably pull this out of the shader, but it's fairly 476 * harmless to compute it and then let dead-code take care of it. 477 */ 478 if (ms->rasterization_samples > 1) { 479 key->persample_interp = ms->sample_shading_enable && 480 (ms->min_sample_shading * ms->rasterization_samples) > 1; 481 key->multisample_fbo = true; 482 } 483 484 if (device->physical->instance->sample_mask_out_opengl_behaviour) 485 key->ignore_sample_mask_out = !key->multisample_fbo; 486 } 487 488 key->coarse_pixel = 489 !key->persample_interp && 490 device->vk.enabled_extensions.KHR_fragment_shading_rate && 491 pipeline_has_coarse_pixel(pipeline, dynamic, ms, fsr); 492} 493 494static void 495populate_cs_prog_key(const struct anv_device *device, 496 bool robust_buffer_acccess, 497 struct brw_cs_prog_key *key) 498{ 499 memset(key, 0, sizeof(*key)); 500 501 populate_base_prog_key(device, robust_buffer_acccess, &key->base); 502} 503 504static void 505populate_bs_prog_key(const struct anv_device *device, 506 bool robust_buffer_access, 507 struct brw_bs_prog_key *key) 508{ 509 memset(key, 0, sizeof(*key)); 510 511 populate_base_prog_key(device, robust_buffer_access, &key->base); 512} 513 514struct anv_pipeline_stage { 515 gl_shader_stage stage; 516 517 const VkPipelineShaderStageCreateInfo *info; 518 519 unsigned char shader_sha1[20]; 520 521 union brw_any_prog_key key; 522 523 struct { 524 gl_shader_stage stage; 525 unsigned char sha1[20]; 526 } cache_key; 527 528 nir_shader *nir; 529 530 struct anv_pipeline_binding surface_to_descriptor[256]; 531 struct anv_pipeline_binding sampler_to_descriptor[256]; 532 struct anv_pipeline_bind_map bind_map; 533 534 union brw_any_prog_data prog_data; 535 536 uint32_t num_stats; 537 struct brw_compile_stats stats[3]; 538 char *disasm[3]; 539 540 VkPipelineCreationFeedback feedback; 541 542 const unsigned *code; 543 544 struct anv_shader_bin *bin; 545}; 546 547static void 548anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline, 549 struct anv_pipeline_layout *layout, 550 struct anv_pipeline_stage *stages, 551 unsigned char *sha1_out) 552{ 553 struct mesa_sha1 ctx; 554 _mesa_sha1_init(&ctx); 555 556 _mesa_sha1_update(&ctx, &pipeline->view_mask, 557 sizeof(pipeline->view_mask)); 558 559 if (layout) 560 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 561 562 const bool rba = pipeline->base.device->robust_buffer_access; 563 _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 564 565 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) { 566 if (stages[s].info) { 567 _mesa_sha1_update(&ctx, stages[s].shader_sha1, 568 sizeof(stages[s].shader_sha1)); 569 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s)); 570 } 571 } 572 573 _mesa_sha1_final(&ctx, sha1_out); 574} 575 576static void 577anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline, 578 struct anv_pipeline_layout *layout, 579 struct anv_pipeline_stage *stage, 580 unsigned char *sha1_out) 581{ 582 struct mesa_sha1 ctx; 583 _mesa_sha1_init(&ctx); 584 585 if (layout) 586 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 587 588 const struct anv_device *device = pipeline->base.device; 589 590 const bool rba = device->robust_buffer_access; 591 _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 592 593 const bool afs = device->physical->instance->assume_full_subgroups; 594 _mesa_sha1_update(&ctx, &afs, sizeof(afs)); 595 596 _mesa_sha1_update(&ctx, stage->shader_sha1, 597 sizeof(stage->shader_sha1)); 598 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs)); 599 600 _mesa_sha1_final(&ctx, sha1_out); 601} 602 603static void 604anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline, 605 struct anv_pipeline_layout *layout, 606 struct anv_pipeline_stage *stage, 607 unsigned char *sha1_out) 608{ 609 struct mesa_sha1 ctx; 610 _mesa_sha1_init(&ctx); 611 612 if (layout != NULL) 613 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 614 615 const bool rba = pipeline->base.device->robust_buffer_access; 616 _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 617 618 _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1)); 619 _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs)); 620 621 _mesa_sha1_final(&ctx, sha1_out); 622} 623 624static void 625anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline, 626 struct anv_pipeline_layout *layout, 627 struct anv_pipeline_stage *intersection, 628 struct anv_pipeline_stage *any_hit, 629 unsigned char *sha1_out) 630{ 631 struct mesa_sha1 ctx; 632 _mesa_sha1_init(&ctx); 633 634 if (layout != NULL) 635 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 636 637 const bool rba = pipeline->base.device->robust_buffer_access; 638 _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 639 640 _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1)); 641 _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs)); 642 _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1)); 643 _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs)); 644 645 _mesa_sha1_final(&ctx, sha1_out); 646} 647 648static nir_shader * 649anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, 650 struct vk_pipeline_cache *cache, 651 void *mem_ctx, 652 struct anv_pipeline_stage *stage) 653{ 654 const struct brw_compiler *compiler = 655 pipeline->device->physical->compiler; 656 const nir_shader_compiler_options *nir_options = 657 compiler->nir_options[stage->stage]; 658 nir_shader *nir; 659 660 nir = anv_device_search_for_nir(pipeline->device, cache, 661 nir_options, 662 stage->shader_sha1, 663 mem_ctx); 664 if (nir) { 665 assert(nir->info.stage == stage->stage); 666 return nir; 667 } 668 669 nir = anv_shader_stage_to_nir(pipeline->device, stage->info, mem_ctx); 670 if (nir) { 671 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1); 672 return nir; 673 } 674 675 return NULL; 676} 677 678static void 679shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 680{ 681 assert(glsl_type_is_vector_or_scalar(type)); 682 683 uint32_t comp_size = glsl_type_is_boolean(type) 684 ? 4 : glsl_get_bit_size(type) / 8; 685 unsigned length = glsl_get_vector_elements(type); 686 *size = comp_size * length, 687 *align = comp_size * (length == 3 ? 4 : length); 688} 689 690static void 691anv_pipeline_lower_nir(struct anv_pipeline *pipeline, 692 void *mem_ctx, 693 struct anv_pipeline_stage *stage, 694 struct anv_pipeline_layout *layout) 695{ 696 const struct anv_physical_device *pdevice = pipeline->device->physical; 697 const struct brw_compiler *compiler = pdevice->compiler; 698 699 struct brw_stage_prog_data *prog_data = &stage->prog_data.base; 700 nir_shader *nir = stage->nir; 701 702 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 703 NIR_PASS(_, nir, nir_lower_wpos_center); 704 NIR_PASS(_, nir, nir_lower_input_attachments, 705 &(nir_input_attachment_options) { 706 .use_fragcoord_sysval = true, 707 .use_layer_id_sysval = true, 708 }); 709 } 710 711 NIR_PASS(_, nir, anv_nir_lower_ycbcr_textures, layout); 712 713 if (pipeline->type == ANV_PIPELINE_GRAPHICS) { 714 NIR_PASS(_, nir, anv_nir_lower_multiview, 715 anv_pipeline_to_graphics(pipeline)); 716 } 717 718 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 719 720 NIR_PASS(_, nir, brw_nir_lower_storage_image, compiler->devinfo); 721 722 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global, 723 nir_address_format_64bit_global); 724 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const, 725 nir_address_format_32bit_offset); 726 727 NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info); 728 729 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ 730 NIR_PASS_V(nir, anv_nir_apply_pipeline_layout, 731 pdevice, pipeline->device->robust_buffer_access, 732 layout, &stage->bind_map); 733 734 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo, 735 anv_nir_ubo_addr_format(pdevice, 736 pipeline->device->robust_buffer_access)); 737 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo, 738 anv_nir_ssbo_addr_format(pdevice, 739 pipeline->device->robust_buffer_access)); 740 741 /* First run copy-prop to get rid of all of the vec() that address 742 * calculations often create and then constant-fold so that, when we 743 * get to anv_nir_lower_ubo_loads, we can detect constant offsets. 744 */ 745 NIR_PASS(_, nir, nir_copy_prop); 746 NIR_PASS(_, nir, nir_opt_constant_folding); 747 748 NIR_PASS(_, nir, anv_nir_lower_ubo_loads); 749 750 /* We don't support non-uniform UBOs and non-uniform SSBO access is 751 * handled naturally by falling back to A64 messages. 752 */ 753 NIR_PASS(_, nir, nir_lower_non_uniform_access, 754 &(nir_lower_non_uniform_access_options) { 755 .types = nir_lower_non_uniform_texture_access | 756 nir_lower_non_uniform_image_access, 757 .callback = NULL, 758 }); 759 760 NIR_PASS_V(nir, anv_nir_compute_push_layout, 761 pdevice, pipeline->device->robust_buffer_access, 762 prog_data, &stage->bind_map, mem_ctx); 763 764 if (gl_shader_stage_uses_workgroup(nir->info.stage)) { 765 if (!nir->info.shared_memory_explicit_layout) { 766 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, 767 nir_var_mem_shared, shared_type_info); 768 } 769 770 NIR_PASS(_, nir, nir_lower_explicit_io, 771 nir_var_mem_shared, nir_address_format_32bit_offset); 772 773 if (nir->info.zero_initialize_shared_memory && 774 nir->info.shared_size > 0) { 775 /* The effective Shared Local Memory size is at least 1024 bytes and 776 * is always rounded to a power of two, so it is OK to align the size 777 * used by the shader to chunk_size -- which does simplify the logic. 778 */ 779 const unsigned chunk_size = 16; 780 const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size); 781 assert(shared_size <= 782 intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size)); 783 784 NIR_PASS(_, nir, nir_zero_initialize_shared_memory, 785 shared_size, chunk_size); 786 } 787 } 788 789 if (gl_shader_stage_is_compute(nir->info.stage) || 790 gl_shader_stage_is_mesh(nir->info.stage)) 791 NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics); 792 793 stage->nir = nir; 794} 795 796static void 797anv_pipeline_link_vs(const struct brw_compiler *compiler, 798 struct anv_pipeline_stage *vs_stage, 799 struct anv_pipeline_stage *next_stage) 800{ 801 if (next_stage) 802 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir); 803} 804 805static void 806anv_pipeline_compile_vs(const struct brw_compiler *compiler, 807 void *mem_ctx, 808 struct anv_graphics_pipeline *pipeline, 809 struct anv_pipeline_stage *vs_stage) 810{ 811 /* When using Primitive Replication for multiview, each view gets its own 812 * position slot. 813 */ 814 uint32_t pos_slots = pipeline->use_primitive_replication ? 815 MAX2(1, util_bitcount(pipeline->view_mask)) : 1; 816 817 brw_compute_vue_map(compiler->devinfo, 818 &vs_stage->prog_data.vs.base.vue_map, 819 vs_stage->nir->info.outputs_written, 820 vs_stage->nir->info.separate_shader, 821 pos_slots); 822 823 vs_stage->num_stats = 1; 824 825 struct brw_compile_vs_params params = { 826 .nir = vs_stage->nir, 827 .key = &vs_stage->key.vs, 828 .prog_data = &vs_stage->prog_data.vs, 829 .stats = vs_stage->stats, 830 .log_data = pipeline->base.device, 831 }; 832 833 vs_stage->code = brw_compile_vs(compiler, mem_ctx, ¶ms); 834} 835 836static void 837merge_tess_info(struct shader_info *tes_info, 838 const struct shader_info *tcs_info) 839{ 840 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: 841 * 842 * "PointMode. Controls generation of points rather than triangles 843 * or lines. This functionality defaults to disabled, and is 844 * enabled if either shader stage includes the execution mode. 845 * 846 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, 847 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, 848 * and OutputVertices, it says: 849 * 850 * "One mode must be set in at least one of the tessellation 851 * shader stages." 852 * 853 * So, the fields can be set in either the TCS or TES, but they must 854 * agree if set in both. Our backend looks at TES, so bitwise-or in 855 * the values from the TCS. 856 */ 857 assert(tcs_info->tess.tcs_vertices_out == 0 || 858 tes_info->tess.tcs_vertices_out == 0 || 859 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); 860 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; 861 862 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 863 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 864 tcs_info->tess.spacing == tes_info->tess.spacing); 865 tes_info->tess.spacing |= tcs_info->tess.spacing; 866 867 assert(tcs_info->tess._primitive_mode == 0 || 868 tes_info->tess._primitive_mode == 0 || 869 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode); 870 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode; 871 tes_info->tess.ccw |= tcs_info->tess.ccw; 872 tes_info->tess.point_mode |= tcs_info->tess.point_mode; 873} 874 875static void 876anv_pipeline_link_tcs(const struct brw_compiler *compiler, 877 struct anv_pipeline_stage *tcs_stage, 878 struct anv_pipeline_stage *tes_stage) 879{ 880 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL); 881 882 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir); 883 884 nir_lower_patch_vertices(tes_stage->nir, 885 tcs_stage->nir->info.tess.tcs_vertices_out, 886 NULL); 887 888 /* Copy TCS info into the TES info */ 889 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info); 890 891 /* Whacking the key after cache lookup is a bit sketchy, but all of 892 * this comes from the SPIR-V, which is part of the hash used for the 893 * pipeline cache. So it should be safe. 894 */ 895 tcs_stage->key.tcs._tes_primitive_mode = 896 tes_stage->nir->info.tess._primitive_mode; 897 tcs_stage->key.tcs.quads_workaround = 898 compiler->devinfo->ver < 9 && 899 tes_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_QUADS && 900 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL; 901} 902 903static void 904anv_pipeline_compile_tcs(const struct brw_compiler *compiler, 905 void *mem_ctx, 906 struct anv_device *device, 907 struct anv_pipeline_stage *tcs_stage, 908 struct anv_pipeline_stage *prev_stage) 909{ 910 tcs_stage->key.tcs.outputs_written = 911 tcs_stage->nir->info.outputs_written; 912 tcs_stage->key.tcs.patch_outputs_written = 913 tcs_stage->nir->info.patch_outputs_written; 914 915 tcs_stage->num_stats = 1; 916 917 struct brw_compile_tcs_params params = { 918 .nir = tcs_stage->nir, 919 .key = &tcs_stage->key.tcs, 920 .prog_data = &tcs_stage->prog_data.tcs, 921 .stats = tcs_stage->stats, 922 .log_data = device, 923 }; 924 925 tcs_stage->code = brw_compile_tcs(compiler, mem_ctx, ¶ms); 926} 927 928static void 929anv_pipeline_link_tes(const struct brw_compiler *compiler, 930 struct anv_pipeline_stage *tes_stage, 931 struct anv_pipeline_stage *next_stage) 932{ 933 if (next_stage) 934 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir); 935} 936 937static void 938anv_pipeline_compile_tes(const struct brw_compiler *compiler, 939 void *mem_ctx, 940 struct anv_device *device, 941 struct anv_pipeline_stage *tes_stage, 942 struct anv_pipeline_stage *tcs_stage) 943{ 944 tes_stage->key.tes.inputs_read = 945 tcs_stage->nir->info.outputs_written; 946 tes_stage->key.tes.patch_inputs_read = 947 tcs_stage->nir->info.patch_outputs_written; 948 949 tes_stage->num_stats = 1; 950 951 struct brw_compile_tes_params params = { 952 .nir = tes_stage->nir, 953 .key = &tes_stage->key.tes, 954 .prog_data = &tes_stage->prog_data.tes, 955 .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map, 956 .stats = tes_stage->stats, 957 .log_data = device, 958 }; 959 960 tes_stage->code = brw_compile_tes(compiler, mem_ctx, ¶ms); 961} 962 963static void 964anv_pipeline_link_gs(const struct brw_compiler *compiler, 965 struct anv_pipeline_stage *gs_stage, 966 struct anv_pipeline_stage *next_stage) 967{ 968 if (next_stage) 969 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir); 970} 971 972static void 973anv_pipeline_compile_gs(const struct brw_compiler *compiler, 974 void *mem_ctx, 975 struct anv_device *device, 976 struct anv_pipeline_stage *gs_stage, 977 struct anv_pipeline_stage *prev_stage) 978{ 979 brw_compute_vue_map(compiler->devinfo, 980 &gs_stage->prog_data.gs.base.vue_map, 981 gs_stage->nir->info.outputs_written, 982 gs_stage->nir->info.separate_shader, 1); 983 984 gs_stage->num_stats = 1; 985 986 struct brw_compile_gs_params params = { 987 .nir = gs_stage->nir, 988 .key = &gs_stage->key.gs, 989 .prog_data = &gs_stage->prog_data.gs, 990 .stats = gs_stage->stats, 991 .log_data = device, 992 }; 993 994 gs_stage->code = brw_compile_gs(compiler, mem_ctx, ¶ms); 995} 996 997static void 998anv_pipeline_link_task(const struct brw_compiler *compiler, 999 struct anv_pipeline_stage *task_stage, 1000 struct anv_pipeline_stage *next_stage) 1001{ 1002 assert(next_stage); 1003 assert(next_stage->stage == MESA_SHADER_MESH); 1004 brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir); 1005} 1006 1007static void 1008anv_pipeline_compile_task(const struct brw_compiler *compiler, 1009 void *mem_ctx, 1010 struct anv_device *device, 1011 struct anv_pipeline_stage *task_stage) 1012{ 1013 task_stage->num_stats = 1; 1014 1015 struct brw_compile_task_params params = { 1016 .nir = task_stage->nir, 1017 .key = &task_stage->key.task, 1018 .prog_data = &task_stage->prog_data.task, 1019 .stats = task_stage->stats, 1020 .log_data = device, 1021 }; 1022 1023 task_stage->code = brw_compile_task(compiler, mem_ctx, ¶ms); 1024} 1025 1026static void 1027anv_pipeline_link_mesh(const struct brw_compiler *compiler, 1028 struct anv_pipeline_stage *mesh_stage, 1029 struct anv_pipeline_stage *next_stage) 1030{ 1031 if (next_stage) { 1032 brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir); 1033 } 1034} 1035 1036static void 1037anv_pipeline_compile_mesh(const struct brw_compiler *compiler, 1038 void *mem_ctx, 1039 struct anv_device *device, 1040 struct anv_pipeline_stage *mesh_stage, 1041 struct anv_pipeline_stage *prev_stage) 1042{ 1043 mesh_stage->num_stats = 1; 1044 1045 struct brw_compile_mesh_params params = { 1046 .nir = mesh_stage->nir, 1047 .key = &mesh_stage->key.mesh, 1048 .prog_data = &mesh_stage->prog_data.mesh, 1049 .stats = mesh_stage->stats, 1050 .log_data = device, 1051 }; 1052 1053 if (prev_stage) { 1054 assert(prev_stage->stage == MESA_SHADER_TASK); 1055 params.tue_map = &prev_stage->prog_data.task.map; 1056 } 1057 1058 mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, ¶ms); 1059} 1060 1061static void 1062anv_pipeline_link_fs(const struct brw_compiler *compiler, 1063 struct anv_pipeline_stage *stage, 1064 const struct vk_render_pass_state *rp) 1065{ 1066 /* Initially the valid outputs value is set to all possible render targets 1067 * valid (see populate_wm_prog_key()), before we look at the shader 1068 * variables. Here we look at the output variables of the shader an compute 1069 * a correct number of render target outputs. 1070 */ 1071 stage->key.wm.color_outputs_valid = 0; 1072 nir_foreach_shader_out_variable_safe(var, stage->nir) { 1073 if (var->data.location < FRAG_RESULT_DATA0) 1074 continue; 1075 1076 const unsigned rt = var->data.location - FRAG_RESULT_DATA0; 1077 const unsigned array_len = 1078 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; 1079 assert(rt + array_len <= MAX_RTS); 1080 1081 stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len); 1082 } 1083 stage->key.wm.color_outputs_valid &= 1084 (1u << rp->color_attachment_count) - 1; 1085 stage->key.wm.nr_color_regions = 1086 util_last_bit(stage->key.wm.color_outputs_valid); 1087 1088 unsigned num_rt_bindings; 1089 struct anv_pipeline_binding rt_bindings[MAX_RTS]; 1090 if (stage->key.wm.nr_color_regions > 0) { 1091 assert(stage->key.wm.nr_color_regions <= MAX_RTS); 1092 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) { 1093 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) { 1094 rt_bindings[rt] = (struct anv_pipeline_binding) { 1095 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1096 .index = rt, 1097 }; 1098 } else { 1099 /* Setup a null render target */ 1100 rt_bindings[rt] = (struct anv_pipeline_binding) { 1101 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1102 .index = UINT32_MAX, 1103 }; 1104 } 1105 } 1106 num_rt_bindings = stage->key.wm.nr_color_regions; 1107 } else { 1108 /* Setup a null render target */ 1109 rt_bindings[0] = (struct anv_pipeline_binding) { 1110 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1111 .index = UINT32_MAX, 1112 }; 1113 num_rt_bindings = 1; 1114 } 1115 1116 assert(num_rt_bindings <= MAX_RTS); 1117 assert(stage->bind_map.surface_count == 0); 1118 typed_memcpy(stage->bind_map.surface_to_descriptor, 1119 rt_bindings, num_rt_bindings); 1120 stage->bind_map.surface_count += num_rt_bindings; 1121} 1122 1123static void 1124anv_pipeline_compile_fs(const struct brw_compiler *compiler, 1125 void *mem_ctx, 1126 struct anv_device *device, 1127 struct anv_pipeline_stage *fs_stage, 1128 struct anv_pipeline_stage *prev_stage) 1129{ 1130 /* TODO: we could set this to 0 based on the information in nir_shader, but 1131 * we need this before we call spirv_to_nir. 1132 */ 1133 assert(prev_stage); 1134 1135 struct brw_compile_fs_params params = { 1136 .nir = fs_stage->nir, 1137 .key = &fs_stage->key.wm, 1138 .prog_data = &fs_stage->prog_data.wm, 1139 1140 .allow_spilling = true, 1141 .stats = fs_stage->stats, 1142 .log_data = device, 1143 }; 1144 1145 if (prev_stage->stage == MESA_SHADER_MESH) { 1146 params.mue_map = &prev_stage->prog_data.mesh.map; 1147 /* TODO(mesh): Slots valid, do we even use/rely on it? */ 1148 } else { 1149 fs_stage->key.wm.input_slots_valid = 1150 prev_stage->prog_data.vue.vue_map.slots_valid; 1151 } 1152 1153 fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms); 1154 1155 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 + 1156 (uint32_t)fs_stage->prog_data.wm.dispatch_16 + 1157 (uint32_t)fs_stage->prog_data.wm.dispatch_32; 1158} 1159 1160static void 1161anv_pipeline_add_executable(struct anv_pipeline *pipeline, 1162 struct anv_pipeline_stage *stage, 1163 struct brw_compile_stats *stats, 1164 uint32_t code_offset) 1165{ 1166 char *nir = NULL; 1167 if (stage->nir && 1168 (pipeline->flags & 1169 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) { 1170 nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx); 1171 } 1172 1173 char *disasm = NULL; 1174 if (stage->code && 1175 (pipeline->flags & 1176 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) { 1177 char *stream_data = NULL; 1178 size_t stream_size = 0; 1179 FILE *stream = open_memstream(&stream_data, &stream_size); 1180 1181 uint32_t push_size = 0; 1182 for (unsigned i = 0; i < 4; i++) 1183 push_size += stage->bind_map.push_ranges[i].length; 1184 if (push_size > 0) { 1185 fprintf(stream, "Push constant ranges:\n"); 1186 for (unsigned i = 0; i < 4; i++) { 1187 if (stage->bind_map.push_ranges[i].length == 0) 1188 continue; 1189 1190 fprintf(stream, " RANGE%d (%dB): ", i, 1191 stage->bind_map.push_ranges[i].length * 32); 1192 1193 switch (stage->bind_map.push_ranges[i].set) { 1194 case ANV_DESCRIPTOR_SET_NULL: 1195 fprintf(stream, "NULL"); 1196 break; 1197 1198 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: 1199 fprintf(stream, "Vulkan push constants and API params"); 1200 break; 1201 1202 case ANV_DESCRIPTOR_SET_DESCRIPTORS: 1203 fprintf(stream, "Descriptor buffer for set %d (start=%dB)", 1204 stage->bind_map.push_ranges[i].index, 1205 stage->bind_map.push_ranges[i].start * 32); 1206 break; 1207 1208 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: 1209 unreachable("gl_NumWorkgroups is never pushed"); 1210 1211 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS: 1212 fprintf(stream, "Inline shader constant data (start=%dB)", 1213 stage->bind_map.push_ranges[i].start * 32); 1214 break; 1215 1216 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS: 1217 unreachable("Color attachments can't be pushed"); 1218 1219 default: 1220 fprintf(stream, "UBO (set=%d binding=%d start=%dB)", 1221 stage->bind_map.push_ranges[i].set, 1222 stage->bind_map.push_ranges[i].index, 1223 stage->bind_map.push_ranges[i].start * 32); 1224 break; 1225 } 1226 fprintf(stream, "\n"); 1227 } 1228 fprintf(stream, "\n"); 1229 } 1230 1231 /* Creating this is far cheaper than it looks. It's perfectly fine to 1232 * do it for every binary. 1233 */ 1234 intel_disassemble(&pipeline->device->physical->compiler->isa, 1235 stage->code, code_offset, stream); 1236 1237 fclose(stream); 1238 1239 /* Copy it to a ralloc'd thing */ 1240 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1); 1241 memcpy(disasm, stream_data, stream_size); 1242 disasm[stream_size] = 0; 1243 1244 free(stream_data); 1245 } 1246 1247 const struct anv_pipeline_executable exe = { 1248 .stage = stage->stage, 1249 .stats = *stats, 1250 .nir = nir, 1251 .disasm = disasm, 1252 }; 1253 util_dynarray_append(&pipeline->executables, 1254 struct anv_pipeline_executable, exe); 1255} 1256 1257static void 1258anv_pipeline_add_executables(struct anv_pipeline *pipeline, 1259 struct anv_pipeline_stage *stage, 1260 struct anv_shader_bin *bin) 1261{ 1262 if (stage->stage == MESA_SHADER_FRAGMENT) { 1263 /* We pull the prog data and stats out of the anv_shader_bin because 1264 * the anv_pipeline_stage may not be fully populated if we successfully 1265 * looked up the shader in a cache. 1266 */ 1267 const struct brw_wm_prog_data *wm_prog_data = 1268 (const struct brw_wm_prog_data *)bin->prog_data; 1269 struct brw_compile_stats *stats = bin->stats; 1270 1271 if (wm_prog_data->dispatch_8) { 1272 anv_pipeline_add_executable(pipeline, stage, stats++, 0); 1273 } 1274 1275 if (wm_prog_data->dispatch_16) { 1276 anv_pipeline_add_executable(pipeline, stage, stats++, 1277 wm_prog_data->prog_offset_16); 1278 } 1279 1280 if (wm_prog_data->dispatch_32) { 1281 anv_pipeline_add_executable(pipeline, stage, stats++, 1282 wm_prog_data->prog_offset_32); 1283 } 1284 } else { 1285 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0); 1286 } 1287 1288 pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries); 1289} 1290 1291static void 1292anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline) 1293{ 1294 /* TODO: Cache this pipeline-wide information. */ 1295 1296 if (anv_pipeline_is_primitive(pipeline)) { 1297 /* Primitive replication depends on information from all the shaders. 1298 * Recover this bit from the fact that we have more than one position slot 1299 * in the vertex shader when using it. 1300 */ 1301 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); 1302 int pos_slots = 0; 1303 const struct brw_vue_prog_data *vue_prog_data = 1304 (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data; 1305 const struct brw_vue_map *vue_map = &vue_prog_data->vue_map; 1306 for (int i = 0; i < vue_map->num_slots; i++) { 1307 if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS) 1308 pos_slots++; 1309 } 1310 pipeline->use_primitive_replication = pos_slots > 1; 1311 } 1312} 1313 1314static void 1315anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline, 1316 const struct vk_graphics_pipeline_state *state, 1317 struct anv_pipeline_stage *stages) 1318{ 1319 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) { 1320 if (!stages[s].info) 1321 continue; 1322 1323 int64_t stage_start = os_time_get_nano(); 1324 1325 vk_pipeline_hash_shader_stage(stages[s].info, stages[s].shader_sha1); 1326 1327 const struct anv_device *device = pipeline->base.device; 1328 switch (stages[s].stage) { 1329 case MESA_SHADER_VERTEX: 1330 populate_vs_prog_key(device, 1331 pipeline->base.device->robust_buffer_access, 1332 &stages[s].key.vs); 1333 break; 1334 case MESA_SHADER_TESS_CTRL: 1335 populate_tcs_prog_key(device, 1336 pipeline->base.device->robust_buffer_access, 1337 state->ts->patch_control_points, 1338 &stages[s].key.tcs); 1339 break; 1340 case MESA_SHADER_TESS_EVAL: 1341 populate_tes_prog_key(device, 1342 pipeline->base.device->robust_buffer_access, 1343 &stages[s].key.tes); 1344 break; 1345 case MESA_SHADER_GEOMETRY: 1346 populate_gs_prog_key(device, 1347 pipeline->base.device->robust_buffer_access, 1348 &stages[s].key.gs); 1349 break; 1350 case MESA_SHADER_FRAGMENT: { 1351 populate_wm_prog_key(pipeline, 1352 pipeline->base.device->robust_buffer_access, 1353 state->dynamic, state->ms, state->fsr, state->rp, 1354 &stages[s].key.wm); 1355 break; 1356 } 1357 case MESA_SHADER_TASK: 1358 populate_task_prog_key(device, 1359 pipeline->base.device->robust_buffer_access, 1360 &stages[s].key.task); 1361 break; 1362 case MESA_SHADER_MESH: 1363 populate_mesh_prog_key(device, 1364 pipeline->base.device->robust_buffer_access, 1365 &stages[s].key.mesh); 1366 break; 1367 default: 1368 unreachable("Invalid graphics shader stage"); 1369 } 1370 1371 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1372 stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 1373 } 1374 1375 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT || 1376 pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV); 1377} 1378 1379static bool 1380anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline, 1381 struct vk_pipeline_cache *cache, 1382 struct anv_pipeline_stage *stages, 1383 VkPipelineCreationFeedbackEXT *pipeline_feedback) 1384{ 1385 unsigned found = 0; 1386 unsigned cache_hits = 0; 1387 for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) { 1388 if (!stages[s].info) 1389 continue; 1390 1391 int64_t stage_start = os_time_get_nano(); 1392 1393 bool cache_hit; 1394 struct anv_shader_bin *bin = 1395 anv_device_search_for_kernel(pipeline->base.device, cache, 1396 &stages[s].cache_key, 1397 sizeof(stages[s].cache_key), &cache_hit); 1398 if (bin) { 1399 found++; 1400 pipeline->shaders[s] = bin; 1401 } 1402 1403 if (cache_hit) { 1404 cache_hits++; 1405 stages[s].feedback.flags |= 1406 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 1407 } 1408 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1409 } 1410 1411 if (found == __builtin_popcount(pipeline->active_stages)) { 1412 if (cache_hits == found) { 1413 pipeline_feedback->flags |= 1414 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 1415 } 1416 /* We found all our shaders in the cache. We're done. */ 1417 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1418 if (!stages[s].info) 1419 continue; 1420 1421 anv_pipeline_add_executables(&pipeline->base, &stages[s], 1422 pipeline->shaders[s]); 1423 } 1424 anv_pipeline_init_from_cached_graphics(pipeline); 1425 return true; 1426 } else if (found > 0) { 1427 /* We found some but not all of our shaders. This shouldn't happen most 1428 * of the time but it can if we have a partially populated pipeline 1429 * cache. 1430 */ 1431 assert(found < __builtin_popcount(pipeline->active_stages)); 1432 1433 vk_perf(VK_LOG_OBJS(cache ? &cache->base : 1434 &pipeline->base.device->vk.base), 1435 "Found a partial pipeline in the cache. This is " 1436 "most likely caused by an incomplete pipeline cache " 1437 "import or export"); 1438 1439 /* We're going to have to recompile anyway, so just throw away our 1440 * references to the shaders in the cache. We'll get them out of the 1441 * cache again as part of the compilation process. 1442 */ 1443 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1444 stages[s].feedback.flags = 0; 1445 if (pipeline->shaders[s]) { 1446 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]); 1447 pipeline->shaders[s] = NULL; 1448 } 1449 } 1450 } 1451 1452 return false; 1453} 1454 1455static const gl_shader_stage graphics_shader_order[] = { 1456 MESA_SHADER_VERTEX, 1457 MESA_SHADER_TESS_CTRL, 1458 MESA_SHADER_TESS_EVAL, 1459 MESA_SHADER_GEOMETRY, 1460 1461 MESA_SHADER_TASK, 1462 MESA_SHADER_MESH, 1463 1464 MESA_SHADER_FRAGMENT, 1465}; 1466 1467static VkResult 1468anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline, 1469 struct vk_pipeline_cache *cache, 1470 struct anv_pipeline_stage *stages, 1471 void *pipeline_ctx) 1472{ 1473 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { 1474 gl_shader_stage s = graphics_shader_order[i]; 1475 if (!stages[s].info) 1476 continue; 1477 1478 int64_t stage_start = os_time_get_nano(); 1479 1480 assert(stages[s].stage == s); 1481 assert(pipeline->shaders[s] == NULL); 1482 1483 stages[s].bind_map = (struct anv_pipeline_bind_map) { 1484 .surface_to_descriptor = stages[s].surface_to_descriptor, 1485 .sampler_to_descriptor = stages[s].sampler_to_descriptor 1486 }; 1487 1488 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, 1489 pipeline_ctx, 1490 &stages[s]); 1491 if (stages[s].nir == NULL) { 1492 return vk_error(pipeline, VK_ERROR_UNKNOWN); 1493 } 1494 1495 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1496 } 1497 1498 return VK_SUCCESS; 1499} 1500 1501static VkResult 1502anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline, 1503 struct vk_pipeline_cache *cache, 1504 const VkGraphicsPipelineCreateInfo *info, 1505 const struct vk_graphics_pipeline_state *state) 1506{ 1507 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 1508 VkResult result; 1509 1510 VkPipelineCreationFeedbackEXT pipeline_feedback = { 1511 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 1512 }; 1513 int64_t pipeline_start = os_time_get_nano(); 1514 1515 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler; 1516 struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {}; 1517 for (uint32_t i = 0; i < info->stageCount; i++) { 1518 gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage); 1519 stages[stage].stage = stage; 1520 stages[stage].info = &info->pStages[i]; 1521 } 1522 1523 anv_graphics_pipeline_init_keys(pipeline, state, stages); 1524 1525 unsigned char sha1[20]; 1526 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1); 1527 1528 for (unsigned s = 0; s < ARRAY_SIZE(stages); s++) { 1529 if (!stages[s].info) 1530 continue; 1531 1532 stages[s].cache_key.stage = s; 1533 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1)); 1534 } 1535 1536 const bool skip_cache_lookup = 1537 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 1538 if (!skip_cache_lookup) { 1539 bool found_all_shaders = 1540 anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages, 1541 &pipeline_feedback); 1542 if (found_all_shaders) 1543 goto done; 1544 } 1545 1546 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) 1547 return VK_PIPELINE_COMPILE_REQUIRED; 1548 1549 void *pipeline_ctx = ralloc_context(NULL); 1550 1551 result = anv_graphics_pipeline_load_nir(pipeline, cache, stages, 1552 pipeline_ctx); 1553 if (result != VK_SUCCESS) 1554 goto fail; 1555 1556 /* Walk backwards to link */ 1557 struct anv_pipeline_stage *next_stage = NULL; 1558 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) { 1559 gl_shader_stage s = graphics_shader_order[i]; 1560 if (!stages[s].info) 1561 continue; 1562 1563 switch (s) { 1564 case MESA_SHADER_VERTEX: 1565 anv_pipeline_link_vs(compiler, &stages[s], next_stage); 1566 break; 1567 case MESA_SHADER_TESS_CTRL: 1568 anv_pipeline_link_tcs(compiler, &stages[s], next_stage); 1569 break; 1570 case MESA_SHADER_TESS_EVAL: 1571 anv_pipeline_link_tes(compiler, &stages[s], next_stage); 1572 break; 1573 case MESA_SHADER_GEOMETRY: 1574 anv_pipeline_link_gs(compiler, &stages[s], next_stage); 1575 break; 1576 case MESA_SHADER_TASK: 1577 anv_pipeline_link_task(compiler, &stages[s], next_stage); 1578 break; 1579 case MESA_SHADER_MESH: 1580 anv_pipeline_link_mesh(compiler, &stages[s], next_stage); 1581 break; 1582 case MESA_SHADER_FRAGMENT: 1583 anv_pipeline_link_fs(compiler, &stages[s], state->rp); 1584 break; 1585 default: 1586 unreachable("Invalid graphics shader stage"); 1587 } 1588 1589 next_stage = &stages[s]; 1590 } 1591 1592 if (pipeline->base.device->info.ver >= 12 && 1593 pipeline->view_mask != 0) { 1594 /* For some pipelines HW Primitive Replication can be used instead of 1595 * instancing to implement Multiview. This depend on how viewIndex is 1596 * used in all the active shaders, so this check can't be done per 1597 * individual shaders. 1598 */ 1599 nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {}; 1600 for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++) 1601 shaders[s] = stages[s].nir; 1602 1603 pipeline->use_primitive_replication = 1604 anv_check_for_primitive_replication(shaders, pipeline); 1605 } else { 1606 pipeline->use_primitive_replication = false; 1607 } 1608 1609 struct anv_pipeline_stage *prev_stage = NULL; 1610 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { 1611 gl_shader_stage s = graphics_shader_order[i]; 1612 if (!stages[s].info) 1613 continue; 1614 1615 int64_t stage_start = os_time_get_nano(); 1616 1617 void *stage_ctx = ralloc_context(NULL); 1618 1619 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout); 1620 1621 if (prev_stage && compiler->nir_options[s]->unify_interfaces) { 1622 prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read & 1623 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 1624 stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written & 1625 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 1626 prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read; 1627 stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written; 1628 } 1629 1630 ralloc_free(stage_ctx); 1631 1632 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1633 1634 prev_stage = &stages[s]; 1635 } 1636 1637 /* In the case the platform can write the primitive variable shading rate, 1638 * figure out the last geometry stage that should write the primitive 1639 * shading rate, and ensure it is marked as used there. The backend will 1640 * write a default value if the shader doesn't actually write it. 1641 * 1642 * We iterate backwards in the stage and stop on the first shader that can 1643 * set the value. 1644 */ 1645 const struct intel_device_info *devinfo = &pipeline->base.device->info; 1646 if (devinfo->has_coarse_pixel_primitive_and_cb && 1647 stages[MESA_SHADER_FRAGMENT].info && 1648 stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel && 1649 !stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading && 1650 stages[MESA_SHADER_MESH].info == NULL) { 1651 struct anv_pipeline_stage *last_psr = NULL; 1652 1653 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { 1654 gl_shader_stage s = 1655 graphics_shader_order[ARRAY_SIZE(graphics_shader_order) - i - 1]; 1656 1657 if (!stages[s].info || 1658 !gl_shader_stage_can_set_fragment_shading_rate(s)) 1659 continue; 1660 1661 last_psr = &stages[s]; 1662 break; 1663 } 1664 1665 assert(last_psr); 1666 last_psr->nir->info.outputs_written |= VARYING_BIT_PRIMITIVE_SHADING_RATE; 1667 } 1668 1669 prev_stage = NULL; 1670 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { 1671 gl_shader_stage s = graphics_shader_order[i]; 1672 if (!stages[s].info) 1673 continue; 1674 1675 int64_t stage_start = os_time_get_nano(); 1676 1677 void *stage_ctx = ralloc_context(NULL); 1678 1679 switch (s) { 1680 case MESA_SHADER_VERTEX: 1681 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline, 1682 &stages[s]); 1683 break; 1684 case MESA_SHADER_TESS_CTRL: 1685 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device, 1686 &stages[s], prev_stage); 1687 break; 1688 case MESA_SHADER_TESS_EVAL: 1689 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device, 1690 &stages[s], prev_stage); 1691 break; 1692 case MESA_SHADER_GEOMETRY: 1693 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device, 1694 &stages[s], prev_stage); 1695 break; 1696 case MESA_SHADER_TASK: 1697 anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device, 1698 &stages[s]); 1699 break; 1700 case MESA_SHADER_MESH: 1701 anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device, 1702 &stages[s], prev_stage); 1703 break; 1704 case MESA_SHADER_FRAGMENT: 1705 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device, 1706 &stages[s], prev_stage); 1707 break; 1708 default: 1709 unreachable("Invalid graphics shader stage"); 1710 } 1711 if (stages[s].code == NULL) { 1712 ralloc_free(stage_ctx); 1713 result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); 1714 goto fail; 1715 } 1716 1717 anv_nir_validate_push_layout(&stages[s].prog_data.base, 1718 &stages[s].bind_map); 1719 1720 struct anv_shader_bin *bin = 1721 anv_device_upload_kernel(pipeline->base.device, cache, s, 1722 &stages[s].cache_key, 1723 sizeof(stages[s].cache_key), 1724 stages[s].code, 1725 stages[s].prog_data.base.program_size, 1726 &stages[s].prog_data.base, 1727 brw_prog_data_size(s), 1728 stages[s].stats, stages[s].num_stats, 1729 stages[s].nir->xfb_info, 1730 &stages[s].bind_map); 1731 if (!bin) { 1732 ralloc_free(stage_ctx); 1733 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1734 goto fail; 1735 } 1736 1737 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin); 1738 1739 pipeline->shaders[s] = bin; 1740 ralloc_free(stage_ctx); 1741 1742 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1743 1744 prev_stage = &stages[s]; 1745 } 1746 1747 ralloc_free(pipeline_ctx); 1748 1749done: 1750 1751 pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 1752 1753 const VkPipelineCreationFeedbackCreateInfo *create_feedback = 1754 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 1755 if (create_feedback) { 1756 *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 1757 1758 uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount; 1759 assert(stage_count == 0 || info->stageCount == stage_count); 1760 for (uint32_t i = 0; i < stage_count; i++) { 1761 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); 1762 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 1763 } 1764 } 1765 1766 return VK_SUCCESS; 1767 1768fail: 1769 ralloc_free(pipeline_ctx); 1770 1771 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1772 if (pipeline->shaders[s]) 1773 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]); 1774 } 1775 1776 return result; 1777} 1778 1779static VkResult 1780anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, 1781 struct vk_pipeline_cache *cache, 1782 const VkComputePipelineCreateInfo *info) 1783{ 1784 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage; 1785 assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT); 1786 1787 VkPipelineCreationFeedback pipeline_feedback = { 1788 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 1789 }; 1790 int64_t pipeline_start = os_time_get_nano(); 1791 1792 struct anv_device *device = pipeline->base.device; 1793 const struct brw_compiler *compiler = device->physical->compiler; 1794 1795 struct anv_pipeline_stage stage = { 1796 .stage = MESA_SHADER_COMPUTE, 1797 .info = &info->stage, 1798 .cache_key = { 1799 .stage = MESA_SHADER_COMPUTE, 1800 }, 1801 .feedback = { 1802 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 1803 }, 1804 }; 1805 vk_pipeline_hash_shader_stage(&info->stage, stage.shader_sha1); 1806 1807 struct anv_shader_bin *bin = NULL; 1808 1809 populate_cs_prog_key(device, device->robust_buffer_access, &stage.key.cs); 1810 1811 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 1812 1813 const bool skip_cache_lookup = 1814 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 1815 1816 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1); 1817 1818 bool cache_hit = false; 1819 if (!skip_cache_lookup) { 1820 bin = anv_device_search_for_kernel(device, cache, 1821 &stage.cache_key, 1822 sizeof(stage.cache_key), 1823 &cache_hit); 1824 } 1825 1826 if (bin == NULL && 1827 (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)) 1828 return VK_PIPELINE_COMPILE_REQUIRED; 1829 1830 void *mem_ctx = ralloc_context(NULL); 1831 if (bin == NULL) { 1832 int64_t stage_start = os_time_get_nano(); 1833 1834 stage.bind_map = (struct anv_pipeline_bind_map) { 1835 .surface_to_descriptor = stage.surface_to_descriptor, 1836 .sampler_to_descriptor = stage.sampler_to_descriptor 1837 }; 1838 1839 /* Set up a binding for the gl_NumWorkGroups */ 1840 stage.bind_map.surface_count = 1; 1841 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) { 1842 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS, 1843 }; 1844 1845 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage); 1846 if (stage.nir == NULL) { 1847 ralloc_free(mem_ctx); 1848 return vk_error(pipeline, VK_ERROR_UNKNOWN); 1849 } 1850 1851 NIR_PASS(_, stage.nir, anv_nir_add_base_work_group_id); 1852 1853 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout); 1854 1855 unsigned local_size = stage.nir->info.workgroup_size[0] * 1856 stage.nir->info.workgroup_size[1] * 1857 stage.nir->info.workgroup_size[2]; 1858 1859 /* Games don't always request full subgroups when they should, 1860 * which can cause bugs, as they may expect bigger size of the 1861 * subgroup than we choose for the execution. 1862 */ 1863 if (device->physical->instance->assume_full_subgroups && 1864 stage.nir->info.cs.uses_wide_subgroup_intrinsics && 1865 stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT && 1866 local_size && 1867 local_size % BRW_SUBGROUP_SIZE == 0) 1868 stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS; 1869 1870 /* If the client requests that we dispatch full subgroups but doesn't 1871 * allow us to pick a subgroup size, we have to smash it to the API 1872 * value of 32. Performance will likely be terrible in this case but 1873 * there's nothing we can do about that. The client should have chosen 1874 * a size. 1875 */ 1876 if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS) 1877 stage.nir->info.subgroup_size = BRW_SUBGROUP_SIZE; 1878 1879 stage.num_stats = 1; 1880 1881 struct brw_compile_cs_params params = { 1882 .nir = stage.nir, 1883 .key = &stage.key.cs, 1884 .prog_data = &stage.prog_data.cs, 1885 .stats = stage.stats, 1886 .log_data = device, 1887 }; 1888 1889 stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms); 1890 if (stage.code == NULL) { 1891 ralloc_free(mem_ctx); 1892 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1893 } 1894 1895 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map); 1896 1897 if (!stage.prog_data.cs.uses_num_work_groups) { 1898 assert(stage.bind_map.surface_to_descriptor[0].set == 1899 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS); 1900 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL; 1901 } 1902 1903 const unsigned code_size = stage.prog_data.base.program_size; 1904 bin = anv_device_upload_kernel(device, cache, 1905 MESA_SHADER_COMPUTE, 1906 &stage.cache_key, sizeof(stage.cache_key), 1907 stage.code, code_size, 1908 &stage.prog_data.base, 1909 sizeof(stage.prog_data.cs), 1910 stage.stats, stage.num_stats, 1911 NULL, &stage.bind_map); 1912 if (!bin) { 1913 ralloc_free(mem_ctx); 1914 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1915 } 1916 1917 stage.feedback.duration = os_time_get_nano() - stage_start; 1918 } 1919 1920 anv_pipeline_add_executables(&pipeline->base, &stage, bin); 1921 1922 ralloc_free(mem_ctx); 1923 1924 if (cache_hit) { 1925 stage.feedback.flags |= 1926 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 1927 pipeline_feedback.flags |= 1928 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 1929 } 1930 pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 1931 1932 const VkPipelineCreationFeedbackCreateInfo *create_feedback = 1933 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 1934 if (create_feedback) { 1935 *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 1936 1937 if (create_feedback->pipelineStageCreationFeedbackCount) { 1938 assert(create_feedback->pipelineStageCreationFeedbackCount == 1); 1939 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback; 1940 } 1941 } 1942 1943 pipeline->cs = bin; 1944 1945 return VK_SUCCESS; 1946} 1947 1948static VkResult 1949anv_compute_pipeline_create(struct anv_device *device, 1950 struct vk_pipeline_cache *cache, 1951 const VkComputePipelineCreateInfo *pCreateInfo, 1952 const VkAllocationCallbacks *pAllocator, 1953 VkPipeline *pPipeline) 1954{ 1955 struct anv_compute_pipeline *pipeline; 1956 VkResult result; 1957 1958 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); 1959 1960 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 1961 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1962 if (pipeline == NULL) 1963 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1964 1965 result = anv_pipeline_init(&pipeline->base, device, 1966 ANV_PIPELINE_COMPUTE, pCreateInfo->flags, 1967 pAllocator); 1968 if (result != VK_SUCCESS) { 1969 vk_free2(&device->vk.alloc, pAllocator, pipeline); 1970 return result; 1971 } 1972 1973 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS, 1974 pipeline->batch_data, sizeof(pipeline->batch_data)); 1975 1976 result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo); 1977 if (result != VK_SUCCESS) { 1978 anv_pipeline_finish(&pipeline->base, device, pAllocator); 1979 vk_free2(&device->vk.alloc, pAllocator, pipeline); 1980 return result; 1981 } 1982 1983 anv_genX(&device->info, compute_pipeline_emit)(pipeline); 1984 1985 *pPipeline = anv_pipeline_to_handle(&pipeline->base); 1986 1987 return pipeline->base.batch.status; 1988} 1989 1990VkResult anv_CreateComputePipelines( 1991 VkDevice _device, 1992 VkPipelineCache pipelineCache, 1993 uint32_t count, 1994 const VkComputePipelineCreateInfo* pCreateInfos, 1995 const VkAllocationCallbacks* pAllocator, 1996 VkPipeline* pPipelines) 1997{ 1998 ANV_FROM_HANDLE(anv_device, device, _device); 1999 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); 2000 2001 VkResult result = VK_SUCCESS; 2002 2003 unsigned i; 2004 for (i = 0; i < count; i++) { 2005 VkResult res = anv_compute_pipeline_create(device, pipeline_cache, 2006 &pCreateInfos[i], 2007 pAllocator, &pPipelines[i]); 2008 2009 if (res == VK_SUCCESS) 2010 continue; 2011 2012 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it 2013 * is not obvious what error should be report upon 2 different failures. 2014 * */ 2015 result = res; 2016 if (res != VK_PIPELINE_COMPILE_REQUIRED) 2017 break; 2018 2019 pPipelines[i] = VK_NULL_HANDLE; 2020 2021 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 2022 break; 2023 } 2024 2025 for (; i < count; i++) 2026 pPipelines[i] = VK_NULL_HANDLE; 2027 2028 return result; 2029} 2030 2031/** 2032 * Calculate the desired L3 partitioning based on the current state of the 2033 * pipeline. For now this simply returns the conservative defaults calculated 2034 * by get_default_l3_weights(), but we could probably do better by gathering 2035 * more statistics from the pipeline state (e.g. guess of expected URB usage 2036 * and bound surfaces), or by using feed-back from performance counters. 2037 */ 2038void 2039anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm) 2040{ 2041 const struct intel_device_info *devinfo = &pipeline->device->info; 2042 2043 const struct intel_l3_weights w = 2044 intel_get_default_l3_weights(devinfo, true, needs_slm); 2045 2046 pipeline->l3_config = intel_get_l3_config(devinfo, w); 2047} 2048 2049static VkResult 2050anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, 2051 struct anv_device *device, 2052 struct vk_pipeline_cache *cache, 2053 const struct VkGraphicsPipelineCreateInfo *pCreateInfo, 2054 const struct vk_graphics_pipeline_state *state, 2055 const VkAllocationCallbacks *alloc) 2056{ 2057 VkResult result; 2058 2059 result = anv_pipeline_init(&pipeline->base, device, 2060 ANV_PIPELINE_GRAPHICS, pCreateInfo->flags, 2061 alloc); 2062 if (result != VK_SUCCESS) 2063 return result; 2064 2065 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS, 2066 pipeline->batch_data, sizeof(pipeline->batch_data)); 2067 2068 pipeline->active_stages = 0; 2069 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) 2070 pipeline->active_stages |= pCreateInfo->pStages[i].stage; 2071 2072 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) 2073 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; 2074 2075 if (anv_pipeline_is_mesh(pipeline)) 2076 assert(device->physical->vk.supported_extensions.NV_mesh_shader); 2077 2078 pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations; 2079 vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state); 2080 2081 pipeline->depth_clamp_enable = state->rs->depth_clamp_enable; 2082 pipeline->depth_clip_enable = state->rs->depth_clip_enable; 2083 pipeline->view_mask = state->rp->view_mask; 2084 2085 result = anv_graphics_pipeline_compile(pipeline, cache, pCreateInfo, state); 2086 if (result != VK_SUCCESS) { 2087 anv_pipeline_finish(&pipeline->base, device, alloc); 2088 return result; 2089 } 2090 2091 anv_pipeline_setup_l3_config(&pipeline->base, false); 2092 2093 if (anv_pipeline_is_primitive(pipeline)) { 2094 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read; 2095 2096 u_foreach_bit(a, state->vi->attributes_valid) { 2097 if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a)) 2098 pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding); 2099 } 2100 2101 u_foreach_bit(b, state->vi->bindings_valid) { 2102 pipeline->vb[b].stride = state->vi->bindings[b].stride; 2103 pipeline->vb[b].instanced = state->vi->bindings[b].input_rate == 2104 VK_VERTEX_INPUT_RATE_INSTANCE; 2105 pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor; 2106 } 2107 2108 /* Our implementation of VK_KHR_multiview uses instancing to draw the 2109 * different views. If the client asks for instancing, we need to multiply 2110 * the instance divisor by the number of views ensure that we repeat the 2111 * client's per-instance data once for each view. 2112 */ 2113 pipeline->instance_multiplier = 1; 2114 if (pipeline->view_mask && !pipeline->use_primitive_replication) 2115 pipeline->instance_multiplier = util_bitcount(pipeline->view_mask); 2116 } else { 2117 assert(anv_pipeline_is_mesh(pipeline)); 2118 /* TODO(mesh): Mesh vs. Multiview with Instancing. */ 2119 } 2120 2121 pipeline->negative_one_to_one = 2122 state->vp != NULL && state->vp->negative_one_to_one; 2123 2124 /* Store line mode, polygon mode and rasterization samples, these are used 2125 * for dynamic primitive topology. 2126 */ 2127 pipeline->polygon_mode = state->rs->polygon_mode; 2128 pipeline->rasterization_samples = 2129 state->ms != NULL ? state->ms->rasterization_samples : 1; 2130 pipeline->line_mode = state->rs->line.mode; 2131 if (pipeline->line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) { 2132 if (pipeline->rasterization_samples > 1) { 2133 pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; 2134 } else { 2135 pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; 2136 } 2137 } 2138 pipeline->patch_control_points = 2139 state->ts != NULL ? state->ts->patch_control_points : 0; 2140 2141 /* Store the color write masks, to be merged with color write enable if 2142 * dynamic. 2143 */ 2144 if (state->cb != NULL) { 2145 for (unsigned i = 0; i < state->cb->attachment_count; i++) 2146 pipeline->color_comp_writes[i] = state->cb->attachments[i].write_mask; 2147 } 2148 2149 return VK_SUCCESS; 2150} 2151 2152static VkResult 2153anv_graphics_pipeline_create(struct anv_device *device, 2154 struct vk_pipeline_cache *cache, 2155 const VkGraphicsPipelineCreateInfo *pCreateInfo, 2156 const VkAllocationCallbacks *pAllocator, 2157 VkPipeline *pPipeline) 2158{ 2159 struct anv_graphics_pipeline *pipeline; 2160 VkResult result; 2161 2162 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); 2163 2164 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 2165 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2166 if (pipeline == NULL) 2167 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2168 2169 struct vk_graphics_pipeline_all_state all; 2170 struct vk_graphics_pipeline_state state = { }; 2171 result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo, 2172 NULL /* sp_info */, 2173 &all, NULL, 0, NULL); 2174 if (result != VK_SUCCESS) { 2175 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2176 return result; 2177 } 2178 2179 result = anv_graphics_pipeline_init(pipeline, device, cache, 2180 pCreateInfo, &state, pAllocator); 2181 if (result != VK_SUCCESS) { 2182 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2183 return result; 2184 } 2185 2186 anv_genX(&device->info, graphics_pipeline_emit)(pipeline, &state); 2187 2188 *pPipeline = anv_pipeline_to_handle(&pipeline->base); 2189 2190 return pipeline->base.batch.status; 2191} 2192 2193VkResult anv_CreateGraphicsPipelines( 2194 VkDevice _device, 2195 VkPipelineCache pipelineCache, 2196 uint32_t count, 2197 const VkGraphicsPipelineCreateInfo* pCreateInfos, 2198 const VkAllocationCallbacks* pAllocator, 2199 VkPipeline* pPipelines) 2200{ 2201 ANV_FROM_HANDLE(anv_device, device, _device); 2202 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); 2203 2204 VkResult result = VK_SUCCESS; 2205 2206 unsigned i; 2207 for (i = 0; i < count; i++) { 2208 VkResult res = anv_graphics_pipeline_create(device, 2209 pipeline_cache, 2210 &pCreateInfos[i], 2211 pAllocator, &pPipelines[i]); 2212 2213 if (res == VK_SUCCESS) 2214 continue; 2215 2216 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it 2217 * is not obvious what error should be report upon 2 different failures. 2218 * */ 2219 result = res; 2220 if (res != VK_PIPELINE_COMPILE_REQUIRED) 2221 break; 2222 2223 pPipelines[i] = VK_NULL_HANDLE; 2224 2225 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 2226 break; 2227 } 2228 2229 for (; i < count; i++) 2230 pPipelines[i] = VK_NULL_HANDLE; 2231 2232 return result; 2233} 2234 2235static VkResult 2236compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, 2237 struct vk_pipeline_cache *cache, 2238 nir_shader *nir, 2239 struct anv_pipeline_stage *stage, 2240 struct anv_shader_bin **shader_out, 2241 void *mem_ctx) 2242{ 2243 const struct brw_compiler *compiler = 2244 pipeline->base.device->physical->compiler; 2245 const struct intel_device_info *devinfo = compiler->devinfo; 2246 2247 nir_shader **resume_shaders = NULL; 2248 uint32_t num_resume_shaders = 0; 2249 if (nir->info.stage != MESA_SHADER_COMPUTE) { 2250 NIR_PASS(_, nir, nir_lower_shader_calls, 2251 nir_address_format_64bit_global, 2252 BRW_BTD_STACK_ALIGN, 2253 &resume_shaders, &num_resume_shaders, mem_ctx); 2254 NIR_PASS(_, nir, brw_nir_lower_shader_calls); 2255 NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo); 2256 } 2257 2258 for (unsigned i = 0; i < num_resume_shaders; i++) { 2259 NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls); 2260 NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo); 2261 } 2262 2263 struct brw_compile_bs_params params = { 2264 .nir = nir, 2265 .key = &stage->key.bs, 2266 .prog_data = &stage->prog_data.bs, 2267 .num_resume_shaders = num_resume_shaders, 2268 .resume_shaders = resume_shaders, 2269 2270 .stats = stage->stats, 2271 .log_data = pipeline->base.device, 2272 }; 2273 2274 stage->code = brw_compile_bs(compiler, mem_ctx, ¶ms); 2275 if (stage->code == NULL) 2276 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2277 2278 /* Ray-tracing shaders don't have a "real" bind map */ 2279 struct anv_pipeline_bind_map empty_bind_map = {}; 2280 2281 const unsigned code_size = stage->prog_data.base.program_size; 2282 struct anv_shader_bin *bin = 2283 anv_device_upload_kernel(pipeline->base.device, 2284 cache, 2285 stage->stage, 2286 &stage->cache_key, sizeof(stage->cache_key), 2287 stage->code, code_size, 2288 &stage->prog_data.base, 2289 sizeof(stage->prog_data.bs), 2290 stage->stats, 1, 2291 NULL, &empty_bind_map); 2292 if (bin == NULL) 2293 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2294 2295 /* TODO: Figure out executables for resume shaders */ 2296 anv_pipeline_add_executables(&pipeline->base, stage, bin); 2297 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin); 2298 2299 *shader_out = bin; 2300 2301 return VK_SUCCESS; 2302} 2303 2304static bool 2305is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info) 2306{ 2307 if (info->pDynamicState == NULL) 2308 return false; 2309 2310 for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) { 2311 if (info->pDynamicState->pDynamicStates[i] == 2312 VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) 2313 return true; 2314 } 2315 2316 return false; 2317} 2318 2319static void 2320anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline, 2321 const VkRayTracingPipelineCreateInfoKHR *info, 2322 uint32_t *stack_max) 2323{ 2324 if (is_rt_stack_size_dynamic(info)) { 2325 pipeline->stack_size = 0; /* 0 means dynamic */ 2326 } else { 2327 /* From the Vulkan spec: 2328 * 2329 * "If the stack size is not set explicitly, the stack size for a 2330 * pipeline is: 2331 * 2332 * rayGenStackMax + 2333 * min(1, maxPipelineRayRecursionDepth) × 2334 * max(closestHitStackMax, missStackMax, 2335 * intersectionStackMax + anyHitStackMax) + 2336 * max(0, maxPipelineRayRecursionDepth-1) × 2337 * max(closestHitStackMax, missStackMax) + 2338 * 2 × callableStackMax" 2339 */ 2340 pipeline->stack_size = 2341 stack_max[MESA_SHADER_RAYGEN] + 2342 MIN2(1, info->maxPipelineRayRecursionDepth) * 2343 MAX4(stack_max[MESA_SHADER_CLOSEST_HIT], 2344 stack_max[MESA_SHADER_MISS], 2345 stack_max[MESA_SHADER_INTERSECTION], 2346 stack_max[MESA_SHADER_ANY_HIT]) + 2347 MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) * 2348 MAX2(stack_max[MESA_SHADER_CLOSEST_HIT], 2349 stack_max[MESA_SHADER_MISS]) + 2350 2 * stack_max[MESA_SHADER_CALLABLE]; 2351 2352 /* This is an extremely unlikely case but we need to set it to some 2353 * non-zero value so that we don't accidentally think it's dynamic. 2354 * Our minimum stack size is 2KB anyway so we could set to any small 2355 * value we like. 2356 */ 2357 if (pipeline->stack_size == 0) 2358 pipeline->stack_size = 1; 2359 } 2360} 2361 2362static struct anv_pipeline_stage * 2363anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, 2364 const VkRayTracingPipelineCreateInfoKHR *info, 2365 void *pipeline_ctx) 2366{ 2367 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 2368 2369 /* Create enough stage entries for all shader modules plus potential 2370 * combinaisons in the groups. 2371 */ 2372 struct anv_pipeline_stage *stages = 2373 rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount); 2374 2375 for (uint32_t i = 0; i < info->stageCount; i++) { 2376 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i]; 2377 if (vk_pipeline_shader_stage_is_null(sinfo)) 2378 continue; 2379 2380 int64_t stage_start = os_time_get_nano(); 2381 2382 stages[i] = (struct anv_pipeline_stage) { 2383 .stage = vk_to_mesa_shader_stage(sinfo->stage), 2384 .info = sinfo, 2385 .cache_key = { 2386 .stage = vk_to_mesa_shader_stage(sinfo->stage), 2387 }, 2388 .feedback = { 2389 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 2390 }, 2391 }; 2392 2393 populate_bs_prog_key(pipeline->base.device, 2394 pipeline->base.device->robust_buffer_access, 2395 &stages[i].key.bs); 2396 2397 vk_pipeline_hash_shader_stage(sinfo, stages[i].shader_sha1); 2398 2399 if (stages[i].stage != MESA_SHADER_INTERSECTION) { 2400 anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i], 2401 stages[i].cache_key.sha1); 2402 } 2403 2404 stages[i].feedback.duration += os_time_get_nano() - stage_start; 2405 } 2406 2407 for (uint32_t i = 0; i < info->groupCount; i++) { 2408 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; 2409 2410 if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR) 2411 continue; 2412 2413 int64_t stage_start = os_time_get_nano(); 2414 2415 uint32_t intersection_idx = ginfo->intersectionShader; 2416 assert(intersection_idx < info->stageCount); 2417 2418 uint32_t any_hit_idx = ginfo->anyHitShader; 2419 if (any_hit_idx != VK_SHADER_UNUSED_KHR) { 2420 assert(any_hit_idx < info->stageCount); 2421 anv_pipeline_hash_ray_tracing_combined_shader(pipeline, 2422 layout, 2423 &stages[intersection_idx], 2424 &stages[any_hit_idx], 2425 stages[intersection_idx].cache_key.sha1); 2426 } else { 2427 anv_pipeline_hash_ray_tracing_shader(pipeline, layout, 2428 &stages[intersection_idx], 2429 stages[intersection_idx].cache_key.sha1); 2430 } 2431 2432 stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start; 2433 } 2434 2435 return stages; 2436} 2437 2438static bool 2439anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, 2440 struct vk_pipeline_cache *cache, 2441 const VkRayTracingPipelineCreateInfoKHR *info, 2442 struct anv_pipeline_stage *stages, 2443 uint32_t *stack_max) 2444{ 2445 uint32_t shaders = 0, cache_hits = 0; 2446 for (uint32_t i = 0; i < info->stageCount; i++) { 2447 if (stages[i].info == NULL) 2448 continue; 2449 2450 shaders++; 2451 2452 int64_t stage_start = os_time_get_nano(); 2453 2454 bool cache_hit; 2455 stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache, 2456 &stages[i].cache_key, 2457 sizeof(stages[i].cache_key), 2458 &cache_hit); 2459 if (cache_hit) { 2460 cache_hits++; 2461 stages[i].feedback.flags |= 2462 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 2463 } 2464 2465 if (stages[i].bin != NULL) { 2466 anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin); 2467 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin); 2468 2469 uint32_t stack_size = 2470 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; 2471 stack_max[stages[i].stage] = 2472 MAX2(stack_max[stages[i].stage], stack_size); 2473 } 2474 2475 stages[i].feedback.duration += os_time_get_nano() - stage_start; 2476 } 2477 2478 return cache_hits == shaders; 2479} 2480 2481static VkResult 2482anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline, 2483 struct vk_pipeline_cache *cache, 2484 const VkRayTracingPipelineCreateInfoKHR *info) 2485{ 2486 const struct intel_device_info *devinfo = &pipeline->base.device->info; 2487 VkResult result; 2488 2489 VkPipelineCreationFeedback pipeline_feedback = { 2490 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 2491 }; 2492 int64_t pipeline_start = os_time_get_nano(); 2493 2494 void *pipeline_ctx = ralloc_context(NULL); 2495 2496 struct anv_pipeline_stage *stages = 2497 anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx); 2498 2499 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 2500 2501 const bool skip_cache_lookup = 2502 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 2503 2504 uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {}; 2505 2506 if (!skip_cache_lookup && 2507 anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) { 2508 pipeline_feedback.flags |= 2509 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 2510 goto done; 2511 } 2512 2513 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { 2514 ralloc_free(pipeline_ctx); 2515 return VK_PIPELINE_COMPILE_REQUIRED; 2516 } 2517 2518 for (uint32_t i = 0; i < info->stageCount; i++) { 2519 if (stages[i].info == NULL) 2520 continue; 2521 2522 int64_t stage_start = os_time_get_nano(); 2523 2524 stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, 2525 pipeline_ctx, &stages[i]); 2526 if (stages[i].nir == NULL) { 2527 ralloc_free(pipeline_ctx); 2528 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2529 } 2530 2531 anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout); 2532 2533 stages[i].feedback.duration += os_time_get_nano() - stage_start; 2534 } 2535 2536 for (uint32_t i = 0; i < info->stageCount; i++) { 2537 if (stages[i].info == NULL) 2538 continue; 2539 2540 /* Shader found in cache already. */ 2541 if (stages[i].bin != NULL) 2542 continue; 2543 2544 /* We handle intersection shaders as part of the group */ 2545 if (stages[i].stage == MESA_SHADER_INTERSECTION) 2546 continue; 2547 2548 int64_t stage_start = os_time_get_nano(); 2549 2550 void *stage_ctx = ralloc_context(pipeline_ctx); 2551 2552 nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir); 2553 switch (stages[i].stage) { 2554 case MESA_SHADER_RAYGEN: 2555 brw_nir_lower_raygen(nir); 2556 break; 2557 2558 case MESA_SHADER_ANY_HIT: 2559 brw_nir_lower_any_hit(nir, devinfo); 2560 break; 2561 2562 case MESA_SHADER_CLOSEST_HIT: 2563 brw_nir_lower_closest_hit(nir); 2564 break; 2565 2566 case MESA_SHADER_MISS: 2567 brw_nir_lower_miss(nir); 2568 break; 2569 2570 case MESA_SHADER_INTERSECTION: 2571 unreachable("These are handled later"); 2572 2573 case MESA_SHADER_CALLABLE: 2574 brw_nir_lower_callable(nir); 2575 break; 2576 2577 default: 2578 unreachable("Invalid ray-tracing shader stage"); 2579 } 2580 2581 result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i], 2582 &stages[i].bin, stage_ctx); 2583 if (result != VK_SUCCESS) { 2584 ralloc_free(pipeline_ctx); 2585 return result; 2586 } 2587 2588 uint32_t stack_size = 2589 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; 2590 stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size); 2591 2592 ralloc_free(stage_ctx); 2593 2594 stages[i].feedback.duration += os_time_get_nano() - stage_start; 2595 } 2596 2597 for (uint32_t i = 0; i < info->groupCount; i++) { 2598 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; 2599 struct anv_rt_shader_group *group = &pipeline->groups[i]; 2600 group->type = ginfo->type; 2601 switch (ginfo->type) { 2602 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 2603 assert(ginfo->generalShader < info->stageCount); 2604 group->general = stages[ginfo->generalShader].bin; 2605 break; 2606 2607 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 2608 if (ginfo->anyHitShader < info->stageCount) 2609 group->any_hit = stages[ginfo->anyHitShader].bin; 2610 2611 if (ginfo->closestHitShader < info->stageCount) 2612 group->closest_hit = stages[ginfo->closestHitShader].bin; 2613 break; 2614 2615 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: { 2616 if (ginfo->closestHitShader < info->stageCount) 2617 group->closest_hit = stages[ginfo->closestHitShader].bin; 2618 2619 uint32_t intersection_idx = info->pGroups[i].intersectionShader; 2620 assert(intersection_idx < info->stageCount); 2621 2622 /* Only compile this stage if not already found in the cache. */ 2623 if (stages[intersection_idx].bin == NULL) { 2624 /* The any-hit and intersection shader have to be combined */ 2625 uint32_t any_hit_idx = info->pGroups[i].anyHitShader; 2626 const nir_shader *any_hit = NULL; 2627 if (any_hit_idx < info->stageCount) 2628 any_hit = stages[any_hit_idx].nir; 2629 2630 void *group_ctx = ralloc_context(pipeline_ctx); 2631 nir_shader *intersection = 2632 nir_shader_clone(group_ctx, stages[intersection_idx].nir); 2633 2634 brw_nir_lower_combined_intersection_any_hit(intersection, any_hit, 2635 devinfo); 2636 2637 result = compile_upload_rt_shader(pipeline, cache, 2638 intersection, 2639 &stages[intersection_idx], 2640 &group->intersection, 2641 group_ctx); 2642 ralloc_free(group_ctx); 2643 if (result != VK_SUCCESS) 2644 return result; 2645 } else { 2646 group->intersection = stages[intersection_idx].bin; 2647 } 2648 2649 uint32_t stack_size = 2650 brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size; 2651 stack_max[MESA_SHADER_INTERSECTION] = 2652 MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size); 2653 2654 break; 2655 } 2656 2657 default: 2658 unreachable("Invalid ray tracing shader group type"); 2659 } 2660 } 2661 2662 done: 2663 ralloc_free(pipeline_ctx); 2664 2665 anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max); 2666 2667 pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 2668 2669 const VkPipelineCreationFeedbackCreateInfo *create_feedback = 2670 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 2671 if (create_feedback) { 2672 *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 2673 2674 uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount; 2675 assert(stage_count == 0 || info->stageCount == stage_count); 2676 for (uint32_t i = 0; i < stage_count; i++) { 2677 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); 2678 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 2679 } 2680 } 2681 2682 return VK_SUCCESS; 2683} 2684 2685VkResult 2686anv_device_init_rt_shaders(struct anv_device *device) 2687{ 2688 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) 2689 return VK_SUCCESS; 2690 2691 bool cache_hit; 2692 2693 struct brw_rt_trampoline { 2694 char name[16]; 2695 struct brw_cs_prog_key key; 2696 } trampoline_key = { 2697 .name = "rt-trampoline", 2698 }; 2699 device->rt_trampoline = 2700 anv_device_search_for_kernel(device, device->internal_cache, 2701 &trampoline_key, sizeof(trampoline_key), 2702 &cache_hit); 2703 if (device->rt_trampoline == NULL) { 2704 2705 void *tmp_ctx = ralloc_context(NULL); 2706 nir_shader *trampoline_nir = 2707 brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx); 2708 2709 trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8; 2710 2711 struct anv_pipeline_bind_map bind_map = { 2712 .surface_count = 0, 2713 .sampler_count = 0, 2714 }; 2715 uint32_t dummy_params[4] = { 0, }; 2716 struct brw_cs_prog_data trampoline_prog_data = { 2717 .base.nr_params = 4, 2718 .base.param = dummy_params, 2719 .uses_inline_data = true, 2720 .uses_btd_stack_ids = true, 2721 }; 2722 struct brw_compile_cs_params params = { 2723 .nir = trampoline_nir, 2724 .key = &trampoline_key.key, 2725 .prog_data = &trampoline_prog_data, 2726 .log_data = device, 2727 }; 2728 const unsigned *tramp_data = 2729 brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms); 2730 2731 device->rt_trampoline = 2732 anv_device_upload_kernel(device, device->internal_cache, 2733 MESA_SHADER_COMPUTE, 2734 &trampoline_key, sizeof(trampoline_key), 2735 tramp_data, 2736 trampoline_prog_data.base.program_size, 2737 &trampoline_prog_data.base, 2738 sizeof(trampoline_prog_data), 2739 NULL, 0, NULL, &bind_map); 2740 2741 ralloc_free(tmp_ctx); 2742 2743 if (device->rt_trampoline == NULL) 2744 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2745 } 2746 2747 /* The cache already has a reference and it's not going anywhere so there 2748 * is no need to hold a second reference. 2749 */ 2750 anv_shader_bin_unref(device, device->rt_trampoline); 2751 2752 struct brw_rt_trivial_return { 2753 char name[16]; 2754 struct brw_bs_prog_key key; 2755 } return_key = { 2756 .name = "rt-trivial-ret", 2757 }; 2758 device->rt_trivial_return = 2759 anv_device_search_for_kernel(device, device->internal_cache, 2760 &return_key, sizeof(return_key), 2761 &cache_hit); 2762 if (device->rt_trivial_return == NULL) { 2763 void *tmp_ctx = ralloc_context(NULL); 2764 nir_shader *trivial_return_nir = 2765 brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx); 2766 2767 NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info); 2768 2769 struct anv_pipeline_bind_map bind_map = { 2770 .surface_count = 0, 2771 .sampler_count = 0, 2772 }; 2773 struct brw_bs_prog_data return_prog_data = { 0, }; 2774 struct brw_compile_bs_params params = { 2775 .nir = trivial_return_nir, 2776 .key = &return_key.key, 2777 .prog_data = &return_prog_data, 2778 2779 .log_data = device, 2780 }; 2781 const unsigned *return_data = 2782 brw_compile_bs(device->physical->compiler, tmp_ctx, ¶ms); 2783 2784 device->rt_trivial_return = 2785 anv_device_upload_kernel(device, device->internal_cache, 2786 MESA_SHADER_CALLABLE, 2787 &return_key, sizeof(return_key), 2788 return_data, return_prog_data.base.program_size, 2789 &return_prog_data.base, sizeof(return_prog_data), 2790 NULL, 0, NULL, &bind_map); 2791 2792 ralloc_free(tmp_ctx); 2793 2794 if (device->rt_trivial_return == NULL) 2795 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2796 } 2797 2798 /* The cache already has a reference and it's not going anywhere so there 2799 * is no need to hold a second reference. 2800 */ 2801 anv_shader_bin_unref(device, device->rt_trivial_return); 2802 2803 return VK_SUCCESS; 2804} 2805 2806void 2807anv_device_finish_rt_shaders(struct anv_device *device) 2808{ 2809 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) 2810 return; 2811} 2812 2813static VkResult 2814anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, 2815 struct anv_device *device, 2816 struct vk_pipeline_cache *cache, 2817 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 2818 const VkAllocationCallbacks *alloc) 2819{ 2820 VkResult result; 2821 2822 util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx); 2823 2824 result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo); 2825 if (result != VK_SUCCESS) 2826 goto fail; 2827 2828 anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false); 2829 2830 return VK_SUCCESS; 2831 2832fail: 2833 util_dynarray_foreach(&pipeline->shaders, 2834 struct anv_shader_bin *, shader) { 2835 anv_shader_bin_unref(device, *shader); 2836 } 2837 return result; 2838} 2839 2840static void 2841assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, 2842 uint32_t stage_idx, 2843 VkShaderStageFlags valid_stages) 2844{ 2845 if (stage_idx == VK_SHADER_UNUSED_KHR) 2846 return; 2847 2848 assert(stage_idx <= pCreateInfo->stageCount); 2849 assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1); 2850 assert(pCreateInfo->pStages[stage_idx].stage & valid_stages); 2851} 2852 2853static VkResult 2854anv_ray_tracing_pipeline_create( 2855 VkDevice _device, 2856 struct vk_pipeline_cache * cache, 2857 const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, 2858 const VkAllocationCallbacks* pAllocator, 2859 VkPipeline* pPipeline) 2860{ 2861 ANV_FROM_HANDLE(anv_device, device, _device); 2862 VkResult result; 2863 2864 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR); 2865 2866 VK_MULTIALLOC(ma); 2867 VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1); 2868 VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount); 2869 if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, 2870 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) 2871 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2872 2873 result = anv_pipeline_init(&pipeline->base, device, 2874 ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags, 2875 pAllocator); 2876 if (result != VK_SUCCESS) { 2877 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2878 return result; 2879 } 2880 2881 pipeline->group_count = pCreateInfo->groupCount; 2882 pipeline->groups = groups; 2883 2884 ASSERTED const VkShaderStageFlags ray_tracing_stages = 2885 VK_SHADER_STAGE_RAYGEN_BIT_KHR | 2886 VK_SHADER_STAGE_ANY_HIT_BIT_KHR | 2887 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | 2888 VK_SHADER_STAGE_MISS_BIT_KHR | 2889 VK_SHADER_STAGE_INTERSECTION_BIT_KHR | 2890 VK_SHADER_STAGE_CALLABLE_BIT_KHR; 2891 2892 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) 2893 assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0); 2894 2895 for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) { 2896 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = 2897 &pCreateInfo->pGroups[i]; 2898 assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader, 2899 VK_SHADER_STAGE_RAYGEN_BIT_KHR | 2900 VK_SHADER_STAGE_MISS_BIT_KHR | 2901 VK_SHADER_STAGE_CALLABLE_BIT_KHR); 2902 assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader, 2903 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR); 2904 assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader, 2905 VK_SHADER_STAGE_ANY_HIT_BIT_KHR); 2906 assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader, 2907 VK_SHADER_STAGE_INTERSECTION_BIT_KHR); 2908 switch (ginfo->type) { 2909 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 2910 assert(ginfo->generalShader < pCreateInfo->stageCount); 2911 assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR); 2912 assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR); 2913 assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR); 2914 break; 2915 2916 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 2917 assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR); 2918 assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR); 2919 break; 2920 2921 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 2922 assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR); 2923 break; 2924 2925 default: 2926 unreachable("Invalid ray-tracing shader group type"); 2927 } 2928 } 2929 2930 result = anv_ray_tracing_pipeline_init(pipeline, device, cache, 2931 pCreateInfo, pAllocator); 2932 if (result != VK_SUCCESS) { 2933 anv_pipeline_finish(&pipeline->base, device, pAllocator); 2934 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2935 return result; 2936 } 2937 2938 anv_genX(&device->info, ray_tracing_pipeline_emit)(pipeline); 2939 2940 *pPipeline = anv_pipeline_to_handle(&pipeline->base); 2941 2942 return pipeline->base.batch.status; 2943} 2944 2945VkResult 2946anv_CreateRayTracingPipelinesKHR( 2947 VkDevice _device, 2948 VkDeferredOperationKHR deferredOperation, 2949 VkPipelineCache pipelineCache, 2950 uint32_t createInfoCount, 2951 const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, 2952 const VkAllocationCallbacks* pAllocator, 2953 VkPipeline* pPipelines) 2954{ 2955 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); 2956 2957 VkResult result = VK_SUCCESS; 2958 2959 unsigned i; 2960 for (i = 0; i < createInfoCount; i++) { 2961 VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache, 2962 &pCreateInfos[i], 2963 pAllocator, &pPipelines[i]); 2964 2965 if (res == VK_SUCCESS) 2966 continue; 2967 2968 /* Bail out on the first error as it is not obvious what error should be 2969 * report upon 2 different failures. */ 2970 result = res; 2971 if (result != VK_PIPELINE_COMPILE_REQUIRED) 2972 break; 2973 2974 pPipelines[i] = VK_NULL_HANDLE; 2975 2976 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 2977 break; 2978 } 2979 2980 for (; i < createInfoCount; i++) 2981 pPipelines[i] = VK_NULL_HANDLE; 2982 2983 return result; 2984} 2985 2986#define WRITE_STR(field, ...) ({ \ 2987 memset(field, 0, sizeof(field)); \ 2988 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \ 2989 assert(i > 0 && i < sizeof(field)); \ 2990}) 2991 2992VkResult anv_GetPipelineExecutablePropertiesKHR( 2993 VkDevice device, 2994 const VkPipelineInfoKHR* pPipelineInfo, 2995 uint32_t* pExecutableCount, 2996 VkPipelineExecutablePropertiesKHR* pProperties) 2997{ 2998 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline); 2999 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, 3000 pProperties, pExecutableCount); 3001 3002 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) { 3003 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { 3004 gl_shader_stage stage = exe->stage; 3005 props->stages = mesa_to_vk_shader_stage(stage); 3006 3007 unsigned simd_width = exe->stats.dispatch_width; 3008 if (stage == MESA_SHADER_FRAGMENT) { 3009 WRITE_STR(props->name, "%s%d %s", 3010 simd_width ? "SIMD" : "vec", 3011 simd_width ? simd_width : 4, 3012 _mesa_shader_stage_to_string(stage)); 3013 } else { 3014 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage)); 3015 } 3016 WRITE_STR(props->description, "%s%d %s shader", 3017 simd_width ? "SIMD" : "vec", 3018 simd_width ? simd_width : 4, 3019 _mesa_shader_stage_to_string(stage)); 3020 3021 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan 3022 * wants a subgroup size of 1. 3023 */ 3024 props->subgroupSize = MAX2(simd_width, 1); 3025 } 3026 } 3027 3028 return vk_outarray_status(&out); 3029} 3030 3031static const struct anv_pipeline_executable * 3032anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index) 3033{ 3034 assert(index < util_dynarray_num_elements(&pipeline->executables, 3035 struct anv_pipeline_executable)); 3036 return util_dynarray_element( 3037 &pipeline->executables, struct anv_pipeline_executable, index); 3038} 3039 3040VkResult anv_GetPipelineExecutableStatisticsKHR( 3041 VkDevice device, 3042 const VkPipelineExecutableInfoKHR* pExecutableInfo, 3043 uint32_t* pStatisticCount, 3044 VkPipelineExecutableStatisticKHR* pStatistics) 3045{ 3046 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline); 3047 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, 3048 pStatistics, pStatisticCount); 3049 3050 const struct anv_pipeline_executable *exe = 3051 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3052 3053 const struct brw_stage_prog_data *prog_data; 3054 switch (pipeline->type) { 3055 case ANV_PIPELINE_GRAPHICS: { 3056 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data; 3057 break; 3058 } 3059 case ANV_PIPELINE_COMPUTE: { 3060 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data; 3061 break; 3062 } 3063 case ANV_PIPELINE_RAY_TRACING: { 3064 struct anv_shader_bin **shader = 3065 util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders, 3066 struct anv_shader_bin *, 3067 pExecutableInfo->executableIndex); 3068 prog_data = (*shader)->prog_data; 3069 break; 3070 } 3071 default: 3072 unreachable("invalid pipeline type"); 3073 } 3074 3075 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3076 WRITE_STR(stat->name, "Instruction Count"); 3077 WRITE_STR(stat->description, 3078 "Number of GEN instructions in the final generated " 3079 "shader executable."); 3080 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3081 stat->value.u64 = exe->stats.instructions; 3082 } 3083 3084 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3085 WRITE_STR(stat->name, "SEND Count"); 3086 WRITE_STR(stat->description, 3087 "Number of instructions in the final generated shader " 3088 "executable which access external units such as the " 3089 "constant cache or the sampler."); 3090 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3091 stat->value.u64 = exe->stats.sends; 3092 } 3093 3094 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3095 WRITE_STR(stat->name, "Loop Count"); 3096 WRITE_STR(stat->description, 3097 "Number of loops (not unrolled) in the final generated " 3098 "shader executable."); 3099 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3100 stat->value.u64 = exe->stats.loops; 3101 } 3102 3103 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3104 WRITE_STR(stat->name, "Cycle Count"); 3105 WRITE_STR(stat->description, 3106 "Estimate of the number of EU cycles required to execute " 3107 "the final generated executable. This is an estimate only " 3108 "and may vary greatly from actual run-time performance."); 3109 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3110 stat->value.u64 = exe->stats.cycles; 3111 } 3112 3113 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3114 WRITE_STR(stat->name, "Spill Count"); 3115 WRITE_STR(stat->description, 3116 "Number of scratch spill operations. This gives a rough " 3117 "estimate of the cost incurred due to spilling temporary " 3118 "values to memory. If this is non-zero, you may want to " 3119 "adjust your shader to reduce register pressure."); 3120 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3121 stat->value.u64 = exe->stats.spills; 3122 } 3123 3124 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3125 WRITE_STR(stat->name, "Fill Count"); 3126 WRITE_STR(stat->description, 3127 "Number of scratch fill operations. This gives a rough " 3128 "estimate of the cost incurred due to spilling temporary " 3129 "values to memory. If this is non-zero, you may want to " 3130 "adjust your shader to reduce register pressure."); 3131 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3132 stat->value.u64 = exe->stats.fills; 3133 } 3134 3135 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3136 WRITE_STR(stat->name, "Scratch Memory Size"); 3137 WRITE_STR(stat->description, 3138 "Number of bytes of scratch memory required by the " 3139 "generated shader executable. If this is non-zero, you " 3140 "may want to adjust your shader to reduce register " 3141 "pressure."); 3142 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3143 stat->value.u64 = prog_data->total_scratch; 3144 } 3145 3146 if (gl_shader_stage_uses_workgroup(exe->stage)) { 3147 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3148 WRITE_STR(stat->name, "Workgroup Memory Size"); 3149 WRITE_STR(stat->description, 3150 "Number of bytes of workgroup shared memory used by this " 3151 "shader including any padding."); 3152 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3153 stat->value.u64 = prog_data->total_shared; 3154 } 3155 } 3156 3157 return vk_outarray_status(&out); 3158} 3159 3160static bool 3161write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, 3162 const char *data) 3163{ 3164 ir->isText = VK_TRUE; 3165 3166 size_t data_len = strlen(data) + 1; 3167 3168 if (ir->pData == NULL) { 3169 ir->dataSize = data_len; 3170 return true; 3171 } 3172 3173 strncpy(ir->pData, data, ir->dataSize); 3174 if (ir->dataSize < data_len) 3175 return false; 3176 3177 ir->dataSize = data_len; 3178 return true; 3179} 3180 3181VkResult anv_GetPipelineExecutableInternalRepresentationsKHR( 3182 VkDevice device, 3183 const VkPipelineExecutableInfoKHR* pExecutableInfo, 3184 uint32_t* pInternalRepresentationCount, 3185 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations) 3186{ 3187 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline); 3188 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out, 3189 pInternalRepresentations, pInternalRepresentationCount); 3190 bool incomplete_text = false; 3191 3192 const struct anv_pipeline_executable *exe = 3193 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3194 3195 if (exe->nir) { 3196 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { 3197 WRITE_STR(ir->name, "Final NIR"); 3198 WRITE_STR(ir->description, 3199 "Final NIR before going into the back-end compiler"); 3200 3201 if (!write_ir_text(ir, exe->nir)) 3202 incomplete_text = true; 3203 } 3204 } 3205 3206 if (exe->disasm) { 3207 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { 3208 WRITE_STR(ir->name, "GEN Assembly"); 3209 WRITE_STR(ir->description, 3210 "Final GEN assembly for the generated shader binary"); 3211 3212 if (!write_ir_text(ir, exe->disasm)) 3213 incomplete_text = true; 3214 } 3215 } 3216 3217 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out); 3218} 3219 3220VkResult 3221anv_GetRayTracingShaderGroupHandlesKHR( 3222 VkDevice _device, 3223 VkPipeline _pipeline, 3224 uint32_t firstGroup, 3225 uint32_t groupCount, 3226 size_t dataSize, 3227 void* pData) 3228{ 3229 ANV_FROM_HANDLE(anv_device, device, _device); 3230 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 3231 3232 if (pipeline->type != ANV_PIPELINE_RAY_TRACING) 3233 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 3234 3235 struct anv_ray_tracing_pipeline *rt_pipeline = 3236 anv_pipeline_to_ray_tracing(pipeline); 3237 3238 for (uint32_t i = 0; i < groupCount; i++) { 3239 struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i]; 3240 memcpy(pData, group->handle, sizeof(group->handle)); 3241 pData += sizeof(group->handle); 3242 } 3243 3244 return VK_SUCCESS; 3245} 3246 3247VkResult 3248anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR( 3249 VkDevice _device, 3250 VkPipeline pipeline, 3251 uint32_t firstGroup, 3252 uint32_t groupCount, 3253 size_t dataSize, 3254 void* pData) 3255{ 3256 ANV_FROM_HANDLE(anv_device, device, _device); 3257 unreachable("Unimplemented"); 3258 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 3259} 3260 3261VkDeviceSize 3262anv_GetRayTracingShaderGroupStackSizeKHR( 3263 VkDevice device, 3264 VkPipeline _pipeline, 3265 uint32_t group, 3266 VkShaderGroupShaderKHR groupShader) 3267{ 3268 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 3269 assert(pipeline->type == ANV_PIPELINE_RAY_TRACING); 3270 3271 struct anv_ray_tracing_pipeline *rt_pipeline = 3272 anv_pipeline_to_ray_tracing(pipeline); 3273 3274 assert(group < rt_pipeline->group_count); 3275 3276 struct anv_shader_bin *bin; 3277 switch (groupShader) { 3278 case VK_SHADER_GROUP_SHADER_GENERAL_KHR: 3279 bin = rt_pipeline->groups[group].general; 3280 break; 3281 3282 case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR: 3283 bin = rt_pipeline->groups[group].closest_hit; 3284 break; 3285 3286 case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR: 3287 bin = rt_pipeline->groups[group].any_hit; 3288 break; 3289 3290 case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR: 3291 bin = rt_pipeline->groups[group].intersection; 3292 break; 3293 3294 default: 3295 unreachable("Invalid VkShaderGroupShader enum"); 3296 } 3297 3298 if (bin == NULL) 3299 return 0; 3300 3301 return brw_bs_prog_data_const(bin->prog_data)->max_stack_size; 3302} 3303