1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include <assert.h> 25bf215546Sopenharmony_ci#include <stdbool.h> 26bf215546Sopenharmony_ci#include <string.h> 27bf215546Sopenharmony_ci#include <unistd.h> 28bf215546Sopenharmony_ci#include <fcntl.h> 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "util/mesa-sha1.h" 31bf215546Sopenharmony_ci#include "util/os_time.h" 32bf215546Sopenharmony_ci#include "common/intel_l3_config.h" 33bf215546Sopenharmony_ci#include "common/intel_disasm.h" 34bf215546Sopenharmony_ci#include "common/intel_sample_positions.h" 35bf215546Sopenharmony_ci#include "anv_private.h" 36bf215546Sopenharmony_ci#include "compiler/brw_nir.h" 37bf215546Sopenharmony_ci#include "compiler/brw_nir_rt.h" 38bf215546Sopenharmony_ci#include "anv_nir.h" 39bf215546Sopenharmony_ci#include "nir/nir_xfb_info.h" 40bf215546Sopenharmony_ci#include "spirv/nir_spirv.h" 41bf215546Sopenharmony_ci#include "vk_pipeline.h" 42bf215546Sopenharmony_ci#include "vk_render_pass.h" 43bf215546Sopenharmony_ci#include "vk_util.h" 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci/* Needed for SWIZZLE macros */ 46bf215546Sopenharmony_ci#include "program/prog_instruction.h" 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci/* Eventually, this will become part of anv_CreateShader. Unfortunately, 49bf215546Sopenharmony_ci * we can't do that yet because we don't have the ability to copy nir. 50bf215546Sopenharmony_ci */ 51bf215546Sopenharmony_cistatic nir_shader * 52bf215546Sopenharmony_cianv_shader_stage_to_nir(struct anv_device *device, 53bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage_info, 54bf215546Sopenharmony_ci void *mem_ctx) 55bf215546Sopenharmony_ci{ 56bf215546Sopenharmony_ci const struct anv_physical_device *pdevice = device->physical; 57bf215546Sopenharmony_ci const struct anv_instance *instance = pdevice->instance; 58bf215546Sopenharmony_ci const struct brw_compiler *compiler = pdevice->compiler; 59bf215546Sopenharmony_ci gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage); 60bf215546Sopenharmony_ci const nir_shader_compiler_options *nir_options = 61bf215546Sopenharmony_ci compiler->nir_options[stage]; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci const struct spirv_to_nir_options spirv_options = { 64bf215546Sopenharmony_ci .caps = { 65bf215546Sopenharmony_ci .demote_to_helper_invocation = true, 66bf215546Sopenharmony_ci .derivative_group = true, 67bf215546Sopenharmony_ci .descriptor_array_dynamic_indexing = true, 68bf215546Sopenharmony_ci .descriptor_array_non_uniform_indexing = true, 69bf215546Sopenharmony_ci .descriptor_indexing = true, 70bf215546Sopenharmony_ci .device_group = true, 71bf215546Sopenharmony_ci .draw_parameters = true, 72bf215546Sopenharmony_ci .float16 = pdevice->info.ver >= 8, 73bf215546Sopenharmony_ci .float32_atomic_add = pdevice->info.has_lsc, 74bf215546Sopenharmony_ci .float32_atomic_min_max = pdevice->info.ver >= 9, 75bf215546Sopenharmony_ci .float64 = pdevice->info.ver >= 8, 76bf215546Sopenharmony_ci .float64_atomic_min_max = pdevice->info.has_lsc, 77bf215546Sopenharmony_ci .fragment_shader_sample_interlock = pdevice->info.ver >= 9, 78bf215546Sopenharmony_ci .fragment_shader_pixel_interlock = pdevice->info.ver >= 9, 79bf215546Sopenharmony_ci .geometry_streams = true, 80bf215546Sopenharmony_ci /* When using Vulkan 1.3 or KHR_format_feature_flags2 is enabled, the 81bf215546Sopenharmony_ci * read/write without format is per format, so just report true. It's 82bf215546Sopenharmony_ci * up to the application to check. 83bf215546Sopenharmony_ci */ 84bf215546Sopenharmony_ci .image_read_without_format = instance->vk.app_info.api_version >= VK_API_VERSION_1_3 || device->vk.enabled_extensions.KHR_format_feature_flags2, 85bf215546Sopenharmony_ci .image_write_without_format = true, 86bf215546Sopenharmony_ci .int8 = pdevice->info.ver >= 8, 87bf215546Sopenharmony_ci .int16 = pdevice->info.ver >= 8, 88bf215546Sopenharmony_ci .int64 = pdevice->info.ver >= 8, 89bf215546Sopenharmony_ci .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin, 90bf215546Sopenharmony_ci .integer_functions2 = pdevice->info.ver >= 8, 91bf215546Sopenharmony_ci .mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader, 92bf215546Sopenharmony_ci .min_lod = true, 93bf215546Sopenharmony_ci .multiview = true, 94bf215546Sopenharmony_ci .physical_storage_buffer_address = pdevice->has_a64_buffer_access, 95bf215546Sopenharmony_ci .post_depth_coverage = pdevice->info.ver >= 9, 96bf215546Sopenharmony_ci .runtime_descriptor_array = true, 97bf215546Sopenharmony_ci .float_controls = pdevice->info.ver >= 8, 98bf215546Sopenharmony_ci .ray_query = pdevice->info.has_ray_tracing, 99bf215546Sopenharmony_ci .ray_tracing = pdevice->info.has_ray_tracing, 100bf215546Sopenharmony_ci .shader_clock = true, 101bf215546Sopenharmony_ci .shader_viewport_index_layer = true, 102bf215546Sopenharmony_ci .stencil_export = pdevice->info.ver >= 9, 103bf215546Sopenharmony_ci .storage_8bit = pdevice->info.ver >= 8, 104bf215546Sopenharmony_ci .storage_16bit = pdevice->info.ver >= 8, 105bf215546Sopenharmony_ci .subgroup_arithmetic = true, 106bf215546Sopenharmony_ci .subgroup_basic = true, 107bf215546Sopenharmony_ci .subgroup_ballot = true, 108bf215546Sopenharmony_ci .subgroup_dispatch = true, 109bf215546Sopenharmony_ci .subgroup_quad = true, 110bf215546Sopenharmony_ci .subgroup_uniform_control_flow = true, 111bf215546Sopenharmony_ci .subgroup_shuffle = true, 112bf215546Sopenharmony_ci .subgroup_vote = true, 113bf215546Sopenharmony_ci .tessellation = true, 114bf215546Sopenharmony_ci .transform_feedback = pdevice->info.ver >= 8, 115bf215546Sopenharmony_ci .variable_pointers = true, 116bf215546Sopenharmony_ci .vk_memory_model = true, 117bf215546Sopenharmony_ci .vk_memory_model_device_scope = true, 118bf215546Sopenharmony_ci .workgroup_memory_explicit_layout = true, 119bf215546Sopenharmony_ci .fragment_shading_rate = pdevice->info.ver >= 11, 120bf215546Sopenharmony_ci }, 121bf215546Sopenharmony_ci .ubo_addr_format = 122bf215546Sopenharmony_ci anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access), 123bf215546Sopenharmony_ci .ssbo_addr_format = 124bf215546Sopenharmony_ci anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access), 125bf215546Sopenharmony_ci .phys_ssbo_addr_format = nir_address_format_64bit_global, 126bf215546Sopenharmony_ci .push_const_addr_format = nir_address_format_logical, 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci /* TODO: Consider changing this to an address format that has the NULL 129bf215546Sopenharmony_ci * pointer equals to 0. That might be a better format to play nice 130bf215546Sopenharmony_ci * with certain code / code generators. 131bf215546Sopenharmony_ci */ 132bf215546Sopenharmony_ci .shared_addr_format = nir_address_format_32bit_offset, 133bf215546Sopenharmony_ci }; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci nir_shader *nir; 136bf215546Sopenharmony_ci VkResult result = 137bf215546Sopenharmony_ci vk_pipeline_shader_stage_to_nir(&device->vk, stage_info, 138bf215546Sopenharmony_ci &spirv_options, nir_options, 139bf215546Sopenharmony_ci mem_ctx, &nir); 140bf215546Sopenharmony_ci if (result != VK_SUCCESS) 141bf215546Sopenharmony_ci return NULL; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) { 144bf215546Sopenharmony_ci fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n", 145bf215546Sopenharmony_ci gl_shader_stage_name(stage)); 146bf215546Sopenharmony_ci nir_print_shader(nir, stderr); 147bf215546Sopenharmony_ci } 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_to_temporaries, 150bf215546Sopenharmony_ci nir_shader_get_entrypoint(nir), true, false); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { 153bf215546Sopenharmony_ci .point_coord = true, 154bf215546Sopenharmony_ci }; 155bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci const nir_opt_access_options opt_access_options = { 158bf215546Sopenharmony_ci .is_vulkan = true, 159bf215546Sopenharmony_ci .infer_non_readable = true, 160bf215546Sopenharmony_ci }; 161bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_opt_access, &opt_access_options); 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_frexp); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci /* Vulkan uses the separate-shader linking model */ 166bf215546Sopenharmony_ci nir->info.separate_shader = true; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci brw_preprocess_nir(compiler, nir, NULL); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci return nir; 171bf215546Sopenharmony_ci} 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ciVkResult 174bf215546Sopenharmony_cianv_pipeline_init(struct anv_pipeline *pipeline, 175bf215546Sopenharmony_ci struct anv_device *device, 176bf215546Sopenharmony_ci enum anv_pipeline_type type, 177bf215546Sopenharmony_ci VkPipelineCreateFlags flags, 178bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 179bf215546Sopenharmony_ci{ 180bf215546Sopenharmony_ci VkResult result; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci memset(pipeline, 0, sizeof(*pipeline)); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci vk_object_base_init(&device->vk, &pipeline->base, 185bf215546Sopenharmony_ci VK_OBJECT_TYPE_PIPELINE); 186bf215546Sopenharmony_ci pipeline->device = device; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci /* It's the job of the child class to provide actual backing storage for 189bf215546Sopenharmony_ci * the batch by setting batch.start, batch.next, and batch.end. 190bf215546Sopenharmony_ci */ 191bf215546Sopenharmony_ci pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc; 192bf215546Sopenharmony_ci pipeline->batch.relocs = &pipeline->batch_relocs; 193bf215546Sopenharmony_ci pipeline->batch.status = VK_SUCCESS; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci result = anv_reloc_list_init(&pipeline->batch_relocs, 196bf215546Sopenharmony_ci pipeline->batch.alloc); 197bf215546Sopenharmony_ci if (result != VK_SUCCESS) 198bf215546Sopenharmony_ci return result; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci pipeline->mem_ctx = ralloc_context(NULL); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci pipeline->type = type; 203bf215546Sopenharmony_ci pipeline->flags = flags; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci util_dynarray_init(&pipeline->executables, pipeline->mem_ctx); 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci return VK_SUCCESS; 208bf215546Sopenharmony_ci} 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_civoid 211bf215546Sopenharmony_cianv_pipeline_finish(struct anv_pipeline *pipeline, 212bf215546Sopenharmony_ci struct anv_device *device, 213bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 214bf215546Sopenharmony_ci{ 215bf215546Sopenharmony_ci anv_reloc_list_finish(&pipeline->batch_relocs, 216bf215546Sopenharmony_ci pAllocator ? pAllocator : &device->vk.alloc); 217bf215546Sopenharmony_ci ralloc_free(pipeline->mem_ctx); 218bf215546Sopenharmony_ci vk_object_base_finish(&pipeline->base); 219bf215546Sopenharmony_ci} 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_civoid anv_DestroyPipeline( 222bf215546Sopenharmony_ci VkDevice _device, 223bf215546Sopenharmony_ci VkPipeline _pipeline, 224bf215546Sopenharmony_ci const VkAllocationCallbacks* pAllocator) 225bf215546Sopenharmony_ci{ 226bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_device, device, _device); 227bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci if (!pipeline) 230bf215546Sopenharmony_ci return; 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci switch (pipeline->type) { 233bf215546Sopenharmony_ci case ANV_PIPELINE_GRAPHICS: { 234bf215546Sopenharmony_ci struct anv_graphics_pipeline *gfx_pipeline = 235bf215546Sopenharmony_ci anv_pipeline_to_graphics(pipeline); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) { 238bf215546Sopenharmony_ci if (gfx_pipeline->shaders[s]) 239bf215546Sopenharmony_ci anv_shader_bin_unref(device, gfx_pipeline->shaders[s]); 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci break; 242bf215546Sopenharmony_ci } 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci case ANV_PIPELINE_COMPUTE: { 245bf215546Sopenharmony_ci struct anv_compute_pipeline *compute_pipeline = 246bf215546Sopenharmony_ci anv_pipeline_to_compute(pipeline); 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci if (compute_pipeline->cs) 249bf215546Sopenharmony_ci anv_shader_bin_unref(device, compute_pipeline->cs); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci break; 252bf215546Sopenharmony_ci } 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci case ANV_PIPELINE_RAY_TRACING: { 255bf215546Sopenharmony_ci struct anv_ray_tracing_pipeline *rt_pipeline = 256bf215546Sopenharmony_ci anv_pipeline_to_ray_tracing(pipeline); 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci util_dynarray_foreach(&rt_pipeline->shaders, 259bf215546Sopenharmony_ci struct anv_shader_bin *, shader) { 260bf215546Sopenharmony_ci anv_shader_bin_unref(device, *shader); 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci break; 263bf215546Sopenharmony_ci } 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci default: 266bf215546Sopenharmony_ci unreachable("invalid pipeline type"); 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci anv_pipeline_finish(pipeline, device, pAllocator); 270bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pipeline); 271bf215546Sopenharmony_ci} 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_cistatic void 274bf215546Sopenharmony_cipopulate_sampler_prog_key(const struct intel_device_info *devinfo, 275bf215546Sopenharmony_ci struct brw_sampler_prog_key_data *key) 276bf215546Sopenharmony_ci{ 277bf215546Sopenharmony_ci /* Almost all multisampled textures are compressed. The only time when we 278bf215546Sopenharmony_ci * don't compress a multisampled texture is for 16x MSAA with a surface 279bf215546Sopenharmony_ci * width greater than 8k which is a bit of an edge case. Since the sampler 280bf215546Sopenharmony_ci * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe 281bf215546Sopenharmony_ci * to tell the compiler to always assume compression. 282bf215546Sopenharmony_ci */ 283bf215546Sopenharmony_ci key->compressed_multisample_layout_mask = ~0; 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci /* SkyLake added support for 16x MSAA. With this came a new message for 286bf215546Sopenharmony_ci * reading from a 16x MSAA surface with compression. The new message was 287bf215546Sopenharmony_ci * needed because now the MCS data is 64 bits instead of 32 or lower as is 288bf215546Sopenharmony_ci * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which 289bf215546Sopenharmony_ci * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x 290bf215546Sopenharmony_ci * so we can just use it unconditionally. This may not be quite as 291bf215546Sopenharmony_ci * efficient but it saves us from recompiling. 292bf215546Sopenharmony_ci */ 293bf215546Sopenharmony_ci if (devinfo->ver >= 9) 294bf215546Sopenharmony_ci key->msaa_16 = ~0; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci /* XXX: Handle texture swizzle on HSW- */ 297bf215546Sopenharmony_ci for (int i = 0; i < BRW_MAX_SAMPLERS; i++) { 298bf215546Sopenharmony_ci /* Assume color sampler, no swizzling. (Works for BDW+) */ 299bf215546Sopenharmony_ci key->swizzles[i] = SWIZZLE_XYZW; 300bf215546Sopenharmony_ci } 301bf215546Sopenharmony_ci} 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_cistatic void 304bf215546Sopenharmony_cipopulate_base_prog_key(const struct anv_device *device, 305bf215546Sopenharmony_ci bool robust_buffer_acccess, 306bf215546Sopenharmony_ci struct brw_base_prog_key *key) 307bf215546Sopenharmony_ci{ 308bf215546Sopenharmony_ci key->robust_buffer_access = robust_buffer_acccess; 309bf215546Sopenharmony_ci key->limit_trig_input_range = 310bf215546Sopenharmony_ci device->physical->instance->limit_trig_input_range; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci populate_sampler_prog_key(&device->info, &key->tex); 313bf215546Sopenharmony_ci} 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_cistatic void 316bf215546Sopenharmony_cipopulate_vs_prog_key(const struct anv_device *device, 317bf215546Sopenharmony_ci bool robust_buffer_acccess, 318bf215546Sopenharmony_ci struct brw_vs_prog_key *key) 319bf215546Sopenharmony_ci{ 320bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_acccess, &key->base); 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci /* XXX: Handle vertex input work-arounds */ 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci /* XXX: Handle sampler_prog_key */ 327bf215546Sopenharmony_ci} 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_cistatic void 330bf215546Sopenharmony_cipopulate_tcs_prog_key(const struct anv_device *device, 331bf215546Sopenharmony_ci bool robust_buffer_acccess, 332bf215546Sopenharmony_ci unsigned input_vertices, 333bf215546Sopenharmony_ci struct brw_tcs_prog_key *key) 334bf215546Sopenharmony_ci{ 335bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_acccess, &key->base); 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci key->input_vertices = input_vertices; 340bf215546Sopenharmony_ci} 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_cistatic void 343bf215546Sopenharmony_cipopulate_tes_prog_key(const struct anv_device *device, 344bf215546Sopenharmony_ci bool robust_buffer_acccess, 345bf215546Sopenharmony_ci struct brw_tes_prog_key *key) 346bf215546Sopenharmony_ci{ 347bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_acccess, &key->base); 350bf215546Sopenharmony_ci} 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_cistatic void 353bf215546Sopenharmony_cipopulate_gs_prog_key(const struct anv_device *device, 354bf215546Sopenharmony_ci bool robust_buffer_acccess, 355bf215546Sopenharmony_ci struct brw_gs_prog_key *key) 356bf215546Sopenharmony_ci{ 357bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_acccess, &key->base); 360bf215546Sopenharmony_ci} 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_cistatic bool 363bf215546Sopenharmony_cipipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, 364bf215546Sopenharmony_ci const BITSET_WORD *dynamic, 365bf215546Sopenharmony_ci const struct vk_multisample_state *ms, 366bf215546Sopenharmony_ci const struct vk_fragment_shading_rate_state *fsr) 367bf215546Sopenharmony_ci{ 368bf215546Sopenharmony_ci /* The Vulkan 1.2.199 spec says: 369bf215546Sopenharmony_ci * 370bf215546Sopenharmony_ci * "If any of the following conditions are met, Cxy' must be set to 371bf215546Sopenharmony_ci * {1,1}: 372bf215546Sopenharmony_ci * 373bf215546Sopenharmony_ci * * If Sample Shading is enabled. 374bf215546Sopenharmony_ci * * [...]" 375bf215546Sopenharmony_ci * 376bf215546Sopenharmony_ci * And "sample shading" is defined as follows: 377bf215546Sopenharmony_ci * 378bf215546Sopenharmony_ci * "Sample shading is enabled for a graphics pipeline: 379bf215546Sopenharmony_ci * 380bf215546Sopenharmony_ci * * If the interface of the fragment shader entry point of the 381bf215546Sopenharmony_ci * graphics pipeline includes an input variable decorated with 382bf215546Sopenharmony_ci * SampleId or SamplePosition. In this case minSampleShadingFactor 383bf215546Sopenharmony_ci * takes the value 1.0. 384bf215546Sopenharmony_ci * 385bf215546Sopenharmony_ci * * Else if the sampleShadingEnable member of the 386bf215546Sopenharmony_ci * VkPipelineMultisampleStateCreateInfo structure specified when 387bf215546Sopenharmony_ci * creating the graphics pipeline is set to VK_TRUE. In this case 388bf215546Sopenharmony_ci * minSampleShadingFactor takes the value of 389bf215546Sopenharmony_ci * VkPipelineMultisampleStateCreateInfo::minSampleShading. 390bf215546Sopenharmony_ci * 391bf215546Sopenharmony_ci * Otherwise, sample shading is considered disabled." 392bf215546Sopenharmony_ci * 393bf215546Sopenharmony_ci * The first bullet above is handled by the back-end compiler because those 394bf215546Sopenharmony_ci * inputs both force per-sample dispatch. The second bullet is handled 395bf215546Sopenharmony_ci * here. Note that this sample shading being enabled has nothing to do 396bf215546Sopenharmony_ci * with minSampleShading. 397bf215546Sopenharmony_ci */ 398bf215546Sopenharmony_ci if (ms != NULL && ms->sample_shading_enable) 399bf215546Sopenharmony_ci return false; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci /* Not dynamic & pipeline has a 1x1 fragment shading rate with no 402bf215546Sopenharmony_ci * possibility for element of the pipeline to change the value. 403bf215546Sopenharmony_ci */ 404bf215546Sopenharmony_ci if (!BITSET_TEST(dynamic, MESA_VK_DYNAMIC_FSR) && 405bf215546Sopenharmony_ci fsr->fragment_size.width <= 1 && 406bf215546Sopenharmony_ci fsr->fragment_size.height <= 1 && 407bf215546Sopenharmony_ci fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR && 408bf215546Sopenharmony_ci fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) 409bf215546Sopenharmony_ci return false; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci return true; 412bf215546Sopenharmony_ci} 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_cistatic void 415bf215546Sopenharmony_cipopulate_task_prog_key(const struct anv_device *device, 416bf215546Sopenharmony_ci bool robust_buffer_access, 417bf215546Sopenharmony_ci struct brw_task_prog_key *key) 418bf215546Sopenharmony_ci{ 419bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_access, &key->base); 422bf215546Sopenharmony_ci} 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_cistatic void 425bf215546Sopenharmony_cipopulate_mesh_prog_key(const struct anv_device *device, 426bf215546Sopenharmony_ci bool robust_buffer_access, 427bf215546Sopenharmony_ci struct brw_mesh_prog_key *key) 428bf215546Sopenharmony_ci{ 429bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_access, &key->base); 432bf215546Sopenharmony_ci} 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_cistatic void 435bf215546Sopenharmony_cipopulate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, 436bf215546Sopenharmony_ci bool robust_buffer_acccess, 437bf215546Sopenharmony_ci const BITSET_WORD *dynamic, 438bf215546Sopenharmony_ci const struct vk_multisample_state *ms, 439bf215546Sopenharmony_ci const struct vk_fragment_shading_rate_state *fsr, 440bf215546Sopenharmony_ci const struct vk_render_pass_state *rp, 441bf215546Sopenharmony_ci struct brw_wm_prog_key *key) 442bf215546Sopenharmony_ci{ 443bf215546Sopenharmony_ci const struct anv_device *device = pipeline->base.device; 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_acccess, &key->base); 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci /* We set this to 0 here and set to the actual value before we call 450bf215546Sopenharmony_ci * brw_compile_fs. 451bf215546Sopenharmony_ci */ 452bf215546Sopenharmony_ci key->input_slots_valid = 0; 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci /* XXX Vulkan doesn't appear to specify */ 455bf215546Sopenharmony_ci key->clamp_fragment_color = false; 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci key->ignore_sample_mask_out = false; 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci assert(rp->color_attachment_count <= MAX_RTS); 460bf215546Sopenharmony_ci /* Consider all inputs as valid until look at the NIR variables. */ 461bf215546Sopenharmony_ci key->color_outputs_valid = (1u << rp->color_attachment_count) - 1; 462bf215546Sopenharmony_ci key->nr_color_regions = rp->color_attachment_count; 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci /* To reduce possible shader recompilations we would need to know if 465bf215546Sopenharmony_ci * there is a SampleMask output variable to compute if we should emit 466bf215546Sopenharmony_ci * code to workaround the issue that hardware disables alpha to coverage 467bf215546Sopenharmony_ci * when there is SampleMask output. 468bf215546Sopenharmony_ci */ 469bf215546Sopenharmony_ci key->alpha_to_coverage = ms != NULL && ms->alpha_to_coverage_enable; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci /* Vulkan doesn't support fixed-function alpha test */ 472bf215546Sopenharmony_ci key->alpha_test_replicate_alpha = false; 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci if (ms != NULL) { 475bf215546Sopenharmony_ci /* We should probably pull this out of the shader, but it's fairly 476bf215546Sopenharmony_ci * harmless to compute it and then let dead-code take care of it. 477bf215546Sopenharmony_ci */ 478bf215546Sopenharmony_ci if (ms->rasterization_samples > 1) { 479bf215546Sopenharmony_ci key->persample_interp = ms->sample_shading_enable && 480bf215546Sopenharmony_ci (ms->min_sample_shading * ms->rasterization_samples) > 1; 481bf215546Sopenharmony_ci key->multisample_fbo = true; 482bf215546Sopenharmony_ci } 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci if (device->physical->instance->sample_mask_out_opengl_behaviour) 485bf215546Sopenharmony_ci key->ignore_sample_mask_out = !key->multisample_fbo; 486bf215546Sopenharmony_ci } 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci key->coarse_pixel = 489bf215546Sopenharmony_ci !key->persample_interp && 490bf215546Sopenharmony_ci device->vk.enabled_extensions.KHR_fragment_shading_rate && 491bf215546Sopenharmony_ci pipeline_has_coarse_pixel(pipeline, dynamic, ms, fsr); 492bf215546Sopenharmony_ci} 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_cistatic void 495bf215546Sopenharmony_cipopulate_cs_prog_key(const struct anv_device *device, 496bf215546Sopenharmony_ci bool robust_buffer_acccess, 497bf215546Sopenharmony_ci struct brw_cs_prog_key *key) 498bf215546Sopenharmony_ci{ 499bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_acccess, &key->base); 502bf215546Sopenharmony_ci} 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_cistatic void 505bf215546Sopenharmony_cipopulate_bs_prog_key(const struct anv_device *device, 506bf215546Sopenharmony_ci bool robust_buffer_access, 507bf215546Sopenharmony_ci struct brw_bs_prog_key *key) 508bf215546Sopenharmony_ci{ 509bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci populate_base_prog_key(device, robust_buffer_access, &key->base); 512bf215546Sopenharmony_ci} 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_cistruct anv_pipeline_stage { 515bf215546Sopenharmony_ci gl_shader_stage stage; 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *info; 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci unsigned char shader_sha1[20]; 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci union brw_any_prog_key key; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci struct { 524bf215546Sopenharmony_ci gl_shader_stage stage; 525bf215546Sopenharmony_ci unsigned char sha1[20]; 526bf215546Sopenharmony_ci } cache_key; 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci nir_shader *nir; 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci struct anv_pipeline_binding surface_to_descriptor[256]; 531bf215546Sopenharmony_ci struct anv_pipeline_binding sampler_to_descriptor[256]; 532bf215546Sopenharmony_ci struct anv_pipeline_bind_map bind_map; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci union brw_any_prog_data prog_data; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci uint32_t num_stats; 537bf215546Sopenharmony_ci struct brw_compile_stats stats[3]; 538bf215546Sopenharmony_ci char *disasm[3]; 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci VkPipelineCreationFeedback feedback; 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci const unsigned *code; 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci struct anv_shader_bin *bin; 545bf215546Sopenharmony_ci}; 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_cistatic void 548bf215546Sopenharmony_cianv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline, 549bf215546Sopenharmony_ci struct anv_pipeline_layout *layout, 550bf215546Sopenharmony_ci struct anv_pipeline_stage *stages, 551bf215546Sopenharmony_ci unsigned char *sha1_out) 552bf215546Sopenharmony_ci{ 553bf215546Sopenharmony_ci struct mesa_sha1 ctx; 554bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 555bf215546Sopenharmony_ci 556bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &pipeline->view_mask, 557bf215546Sopenharmony_ci sizeof(pipeline->view_mask)); 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci if (layout) 560bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci const bool rba = pipeline->base.device->robust_buffer_access; 563bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) { 566bf215546Sopenharmony_ci if (stages[s].info) { 567bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, stages[s].shader_sha1, 568bf215546Sopenharmony_ci sizeof(stages[s].shader_sha1)); 569bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s)); 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci } 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, sha1_out); 574bf215546Sopenharmony_ci} 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_cistatic void 577bf215546Sopenharmony_cianv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline, 578bf215546Sopenharmony_ci struct anv_pipeline_layout *layout, 579bf215546Sopenharmony_ci struct anv_pipeline_stage *stage, 580bf215546Sopenharmony_ci unsigned char *sha1_out) 581bf215546Sopenharmony_ci{ 582bf215546Sopenharmony_ci struct mesa_sha1 ctx; 583bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci if (layout) 586bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci const struct anv_device *device = pipeline->base.device; 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci const bool rba = device->robust_buffer_access; 591bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci const bool afs = device->physical->instance->assume_full_subgroups; 594bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &afs, sizeof(afs)); 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, stage->shader_sha1, 597bf215546Sopenharmony_ci sizeof(stage->shader_sha1)); 598bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs)); 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, sha1_out); 601bf215546Sopenharmony_ci} 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_cistatic void 604bf215546Sopenharmony_cianv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline, 605bf215546Sopenharmony_ci struct anv_pipeline_layout *layout, 606bf215546Sopenharmony_ci struct anv_pipeline_stage *stage, 607bf215546Sopenharmony_ci unsigned char *sha1_out) 608bf215546Sopenharmony_ci{ 609bf215546Sopenharmony_ci struct mesa_sha1 ctx; 610bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci if (layout != NULL) 613bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci const bool rba = pipeline->base.device->robust_buffer_access; 616bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1)); 619bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs)); 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, sha1_out); 622bf215546Sopenharmony_ci} 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_cistatic void 625bf215546Sopenharmony_cianv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline, 626bf215546Sopenharmony_ci struct anv_pipeline_layout *layout, 627bf215546Sopenharmony_ci struct anv_pipeline_stage *intersection, 628bf215546Sopenharmony_ci struct anv_pipeline_stage *any_hit, 629bf215546Sopenharmony_ci unsigned char *sha1_out) 630bf215546Sopenharmony_ci{ 631bf215546Sopenharmony_ci struct mesa_sha1 ctx; 632bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci if (layout != NULL) 635bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci const bool rba = pipeline->base.device->robust_buffer_access; 638bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1)); 641bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs)); 642bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1)); 643bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs)); 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, sha1_out); 646bf215546Sopenharmony_ci} 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_cistatic nir_shader * 649bf215546Sopenharmony_cianv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, 650bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 651bf215546Sopenharmony_ci void *mem_ctx, 652bf215546Sopenharmony_ci struct anv_pipeline_stage *stage) 653bf215546Sopenharmony_ci{ 654bf215546Sopenharmony_ci const struct brw_compiler *compiler = 655bf215546Sopenharmony_ci pipeline->device->physical->compiler; 656bf215546Sopenharmony_ci const nir_shader_compiler_options *nir_options = 657bf215546Sopenharmony_ci compiler->nir_options[stage->stage]; 658bf215546Sopenharmony_ci nir_shader *nir; 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci nir = anv_device_search_for_nir(pipeline->device, cache, 661bf215546Sopenharmony_ci nir_options, 662bf215546Sopenharmony_ci stage->shader_sha1, 663bf215546Sopenharmony_ci mem_ctx); 664bf215546Sopenharmony_ci if (nir) { 665bf215546Sopenharmony_ci assert(nir->info.stage == stage->stage); 666bf215546Sopenharmony_ci return nir; 667bf215546Sopenharmony_ci } 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci nir = anv_shader_stage_to_nir(pipeline->device, stage->info, mem_ctx); 670bf215546Sopenharmony_ci if (nir) { 671bf215546Sopenharmony_ci anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1); 672bf215546Sopenharmony_ci return nir; 673bf215546Sopenharmony_ci } 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci return NULL; 676bf215546Sopenharmony_ci} 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_cistatic void 679bf215546Sopenharmony_cishared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 680bf215546Sopenharmony_ci{ 681bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(type)); 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci uint32_t comp_size = glsl_type_is_boolean(type) 684bf215546Sopenharmony_ci ? 4 : glsl_get_bit_size(type) / 8; 685bf215546Sopenharmony_ci unsigned length = glsl_get_vector_elements(type); 686bf215546Sopenharmony_ci *size = comp_size * length, 687bf215546Sopenharmony_ci *align = comp_size * (length == 3 ? 4 : length); 688bf215546Sopenharmony_ci} 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_cistatic void 691bf215546Sopenharmony_cianv_pipeline_lower_nir(struct anv_pipeline *pipeline, 692bf215546Sopenharmony_ci void *mem_ctx, 693bf215546Sopenharmony_ci struct anv_pipeline_stage *stage, 694bf215546Sopenharmony_ci struct anv_pipeline_layout *layout) 695bf215546Sopenharmony_ci{ 696bf215546Sopenharmony_ci const struct anv_physical_device *pdevice = pipeline->device->physical; 697bf215546Sopenharmony_ci const struct brw_compiler *compiler = pdevice->compiler; 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data = &stage->prog_data.base; 700bf215546Sopenharmony_ci nir_shader *nir = stage->nir; 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT) { 703bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_wpos_center); 704bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_input_attachments, 705bf215546Sopenharmony_ci &(nir_input_attachment_options) { 706bf215546Sopenharmony_ci .use_fragcoord_sysval = true, 707bf215546Sopenharmony_ci .use_layer_id_sysval = true, 708bf215546Sopenharmony_ci }); 709bf215546Sopenharmony_ci } 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci NIR_PASS(_, nir, anv_nir_lower_ycbcr_textures, layout); 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci if (pipeline->type == ANV_PIPELINE_GRAPHICS) { 714bf215546Sopenharmony_ci NIR_PASS(_, nir, anv_nir_lower_multiview, 715bf215546Sopenharmony_ci anv_pipeline_to_graphics(pipeline)); 716bf215546Sopenharmony_ci } 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci NIR_PASS(_, nir, brw_nir_lower_storage_image, compiler->devinfo); 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global, 723bf215546Sopenharmony_ci nir_address_format_64bit_global); 724bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const, 725bf215546Sopenharmony_ci nir_address_format_32bit_offset); 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_ci NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info); 728bf215546Sopenharmony_ci 729bf215546Sopenharmony_ci /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ 730bf215546Sopenharmony_ci NIR_PASS_V(nir, anv_nir_apply_pipeline_layout, 731bf215546Sopenharmony_ci pdevice, pipeline->device->robust_buffer_access, 732bf215546Sopenharmony_ci layout, &stage->bind_map); 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo, 735bf215546Sopenharmony_ci anv_nir_ubo_addr_format(pdevice, 736bf215546Sopenharmony_ci pipeline->device->robust_buffer_access)); 737bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo, 738bf215546Sopenharmony_ci anv_nir_ssbo_addr_format(pdevice, 739bf215546Sopenharmony_ci pipeline->device->robust_buffer_access)); 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci /* First run copy-prop to get rid of all of the vec() that address 742bf215546Sopenharmony_ci * calculations often create and then constant-fold so that, when we 743bf215546Sopenharmony_ci * get to anv_nir_lower_ubo_loads, we can detect constant offsets. 744bf215546Sopenharmony_ci */ 745bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_copy_prop); 746bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_opt_constant_folding); 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci NIR_PASS(_, nir, anv_nir_lower_ubo_loads); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci /* We don't support non-uniform UBOs and non-uniform SSBO access is 751bf215546Sopenharmony_ci * handled naturally by falling back to A64 messages. 752bf215546Sopenharmony_ci */ 753bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_non_uniform_access, 754bf215546Sopenharmony_ci &(nir_lower_non_uniform_access_options) { 755bf215546Sopenharmony_ci .types = nir_lower_non_uniform_texture_access | 756bf215546Sopenharmony_ci nir_lower_non_uniform_image_access, 757bf215546Sopenharmony_ci .callback = NULL, 758bf215546Sopenharmony_ci }); 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci NIR_PASS_V(nir, anv_nir_compute_push_layout, 761bf215546Sopenharmony_ci pdevice, pipeline->device->robust_buffer_access, 762bf215546Sopenharmony_ci prog_data, &stage->bind_map, mem_ctx); 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci if (gl_shader_stage_uses_workgroup(nir->info.stage)) { 765bf215546Sopenharmony_ci if (!nir->info.shared_memory_explicit_layout) { 766bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, 767bf215546Sopenharmony_ci nir_var_mem_shared, shared_type_info); 768bf215546Sopenharmony_ci } 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, 771bf215546Sopenharmony_ci nir_var_mem_shared, nir_address_format_32bit_offset); 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci if (nir->info.zero_initialize_shared_memory && 774bf215546Sopenharmony_ci nir->info.shared_size > 0) { 775bf215546Sopenharmony_ci /* The effective Shared Local Memory size is at least 1024 bytes and 776bf215546Sopenharmony_ci * is always rounded to a power of two, so it is OK to align the size 777bf215546Sopenharmony_ci * used by the shader to chunk_size -- which does simplify the logic. 778bf215546Sopenharmony_ci */ 779bf215546Sopenharmony_ci const unsigned chunk_size = 16; 780bf215546Sopenharmony_ci const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size); 781bf215546Sopenharmony_ci assert(shared_size <= 782bf215546Sopenharmony_ci intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size)); 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_zero_initialize_shared_memory, 785bf215546Sopenharmony_ci shared_size, chunk_size); 786bf215546Sopenharmony_ci } 787bf215546Sopenharmony_ci } 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci if (gl_shader_stage_is_compute(nir->info.stage) || 790bf215546Sopenharmony_ci gl_shader_stage_is_mesh(nir->info.stage)) 791bf215546Sopenharmony_ci NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics); 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci stage->nir = nir; 794bf215546Sopenharmony_ci} 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_cistatic void 797bf215546Sopenharmony_cianv_pipeline_link_vs(const struct brw_compiler *compiler, 798bf215546Sopenharmony_ci struct anv_pipeline_stage *vs_stage, 799bf215546Sopenharmony_ci struct anv_pipeline_stage *next_stage) 800bf215546Sopenharmony_ci{ 801bf215546Sopenharmony_ci if (next_stage) 802bf215546Sopenharmony_ci brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir); 803bf215546Sopenharmony_ci} 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_cistatic void 806bf215546Sopenharmony_cianv_pipeline_compile_vs(const struct brw_compiler *compiler, 807bf215546Sopenharmony_ci void *mem_ctx, 808bf215546Sopenharmony_ci struct anv_graphics_pipeline *pipeline, 809bf215546Sopenharmony_ci struct anv_pipeline_stage *vs_stage) 810bf215546Sopenharmony_ci{ 811bf215546Sopenharmony_ci /* When using Primitive Replication for multiview, each view gets its own 812bf215546Sopenharmony_ci * position slot. 813bf215546Sopenharmony_ci */ 814bf215546Sopenharmony_ci uint32_t pos_slots = pipeline->use_primitive_replication ? 815bf215546Sopenharmony_ci MAX2(1, util_bitcount(pipeline->view_mask)) : 1; 816bf215546Sopenharmony_ci 817bf215546Sopenharmony_ci brw_compute_vue_map(compiler->devinfo, 818bf215546Sopenharmony_ci &vs_stage->prog_data.vs.base.vue_map, 819bf215546Sopenharmony_ci vs_stage->nir->info.outputs_written, 820bf215546Sopenharmony_ci vs_stage->nir->info.separate_shader, 821bf215546Sopenharmony_ci pos_slots); 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci vs_stage->num_stats = 1; 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci struct brw_compile_vs_params params = { 826bf215546Sopenharmony_ci .nir = vs_stage->nir, 827bf215546Sopenharmony_ci .key = &vs_stage->key.vs, 828bf215546Sopenharmony_ci .prog_data = &vs_stage->prog_data.vs, 829bf215546Sopenharmony_ci .stats = vs_stage->stats, 830bf215546Sopenharmony_ci .log_data = pipeline->base.device, 831bf215546Sopenharmony_ci }; 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci vs_stage->code = brw_compile_vs(compiler, mem_ctx, ¶ms); 834bf215546Sopenharmony_ci} 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_cistatic void 837bf215546Sopenharmony_cimerge_tess_info(struct shader_info *tes_info, 838bf215546Sopenharmony_ci const struct shader_info *tcs_info) 839bf215546Sopenharmony_ci{ 840bf215546Sopenharmony_ci /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: 841bf215546Sopenharmony_ci * 842bf215546Sopenharmony_ci * "PointMode. Controls generation of points rather than triangles 843bf215546Sopenharmony_ci * or lines. This functionality defaults to disabled, and is 844bf215546Sopenharmony_ci * enabled if either shader stage includes the execution mode. 845bf215546Sopenharmony_ci * 846bf215546Sopenharmony_ci * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, 847bf215546Sopenharmony_ci * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, 848bf215546Sopenharmony_ci * and OutputVertices, it says: 849bf215546Sopenharmony_ci * 850bf215546Sopenharmony_ci * "One mode must be set in at least one of the tessellation 851bf215546Sopenharmony_ci * shader stages." 852bf215546Sopenharmony_ci * 853bf215546Sopenharmony_ci * So, the fields can be set in either the TCS or TES, but they must 854bf215546Sopenharmony_ci * agree if set in both. Our backend looks at TES, so bitwise-or in 855bf215546Sopenharmony_ci * the values from the TCS. 856bf215546Sopenharmony_ci */ 857bf215546Sopenharmony_ci assert(tcs_info->tess.tcs_vertices_out == 0 || 858bf215546Sopenharmony_ci tes_info->tess.tcs_vertices_out == 0 || 859bf215546Sopenharmony_ci tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); 860bf215546Sopenharmony_ci tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 863bf215546Sopenharmony_ci tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 864bf215546Sopenharmony_ci tcs_info->tess.spacing == tes_info->tess.spacing); 865bf215546Sopenharmony_ci tes_info->tess.spacing |= tcs_info->tess.spacing; 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci assert(tcs_info->tess._primitive_mode == 0 || 868bf215546Sopenharmony_ci tes_info->tess._primitive_mode == 0 || 869bf215546Sopenharmony_ci tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode); 870bf215546Sopenharmony_ci tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode; 871bf215546Sopenharmony_ci tes_info->tess.ccw |= tcs_info->tess.ccw; 872bf215546Sopenharmony_ci tes_info->tess.point_mode |= tcs_info->tess.point_mode; 873bf215546Sopenharmony_ci} 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_cistatic void 876bf215546Sopenharmony_cianv_pipeline_link_tcs(const struct brw_compiler *compiler, 877bf215546Sopenharmony_ci struct anv_pipeline_stage *tcs_stage, 878bf215546Sopenharmony_ci struct anv_pipeline_stage *tes_stage) 879bf215546Sopenharmony_ci{ 880bf215546Sopenharmony_ci assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL); 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir); 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci nir_lower_patch_vertices(tes_stage->nir, 885bf215546Sopenharmony_ci tcs_stage->nir->info.tess.tcs_vertices_out, 886bf215546Sopenharmony_ci NULL); 887bf215546Sopenharmony_ci 888bf215546Sopenharmony_ci /* Copy TCS info into the TES info */ 889bf215546Sopenharmony_ci merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info); 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci /* Whacking the key after cache lookup is a bit sketchy, but all of 892bf215546Sopenharmony_ci * this comes from the SPIR-V, which is part of the hash used for the 893bf215546Sopenharmony_ci * pipeline cache. So it should be safe. 894bf215546Sopenharmony_ci */ 895bf215546Sopenharmony_ci tcs_stage->key.tcs._tes_primitive_mode = 896bf215546Sopenharmony_ci tes_stage->nir->info.tess._primitive_mode; 897bf215546Sopenharmony_ci tcs_stage->key.tcs.quads_workaround = 898bf215546Sopenharmony_ci compiler->devinfo->ver < 9 && 899bf215546Sopenharmony_ci tes_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_QUADS && 900bf215546Sopenharmony_ci tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL; 901bf215546Sopenharmony_ci} 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_cistatic void 904bf215546Sopenharmony_cianv_pipeline_compile_tcs(const struct brw_compiler *compiler, 905bf215546Sopenharmony_ci void *mem_ctx, 906bf215546Sopenharmony_ci struct anv_device *device, 907bf215546Sopenharmony_ci struct anv_pipeline_stage *tcs_stage, 908bf215546Sopenharmony_ci struct anv_pipeline_stage *prev_stage) 909bf215546Sopenharmony_ci{ 910bf215546Sopenharmony_ci tcs_stage->key.tcs.outputs_written = 911bf215546Sopenharmony_ci tcs_stage->nir->info.outputs_written; 912bf215546Sopenharmony_ci tcs_stage->key.tcs.patch_outputs_written = 913bf215546Sopenharmony_ci tcs_stage->nir->info.patch_outputs_written; 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_ci tcs_stage->num_stats = 1; 916bf215546Sopenharmony_ci 917bf215546Sopenharmony_ci struct brw_compile_tcs_params params = { 918bf215546Sopenharmony_ci .nir = tcs_stage->nir, 919bf215546Sopenharmony_ci .key = &tcs_stage->key.tcs, 920bf215546Sopenharmony_ci .prog_data = &tcs_stage->prog_data.tcs, 921bf215546Sopenharmony_ci .stats = tcs_stage->stats, 922bf215546Sopenharmony_ci .log_data = device, 923bf215546Sopenharmony_ci }; 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci tcs_stage->code = brw_compile_tcs(compiler, mem_ctx, ¶ms); 926bf215546Sopenharmony_ci} 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_cistatic void 929bf215546Sopenharmony_cianv_pipeline_link_tes(const struct brw_compiler *compiler, 930bf215546Sopenharmony_ci struct anv_pipeline_stage *tes_stage, 931bf215546Sopenharmony_ci struct anv_pipeline_stage *next_stage) 932bf215546Sopenharmony_ci{ 933bf215546Sopenharmony_ci if (next_stage) 934bf215546Sopenharmony_ci brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir); 935bf215546Sopenharmony_ci} 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_cistatic void 938bf215546Sopenharmony_cianv_pipeline_compile_tes(const struct brw_compiler *compiler, 939bf215546Sopenharmony_ci void *mem_ctx, 940bf215546Sopenharmony_ci struct anv_device *device, 941bf215546Sopenharmony_ci struct anv_pipeline_stage *tes_stage, 942bf215546Sopenharmony_ci struct anv_pipeline_stage *tcs_stage) 943bf215546Sopenharmony_ci{ 944bf215546Sopenharmony_ci tes_stage->key.tes.inputs_read = 945bf215546Sopenharmony_ci tcs_stage->nir->info.outputs_written; 946bf215546Sopenharmony_ci tes_stage->key.tes.patch_inputs_read = 947bf215546Sopenharmony_ci tcs_stage->nir->info.patch_outputs_written; 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci tes_stage->num_stats = 1; 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci struct brw_compile_tes_params params = { 952bf215546Sopenharmony_ci .nir = tes_stage->nir, 953bf215546Sopenharmony_ci .key = &tes_stage->key.tes, 954bf215546Sopenharmony_ci .prog_data = &tes_stage->prog_data.tes, 955bf215546Sopenharmony_ci .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map, 956bf215546Sopenharmony_ci .stats = tes_stage->stats, 957bf215546Sopenharmony_ci .log_data = device, 958bf215546Sopenharmony_ci }; 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci tes_stage->code = brw_compile_tes(compiler, mem_ctx, ¶ms); 961bf215546Sopenharmony_ci} 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_cistatic void 964bf215546Sopenharmony_cianv_pipeline_link_gs(const struct brw_compiler *compiler, 965bf215546Sopenharmony_ci struct anv_pipeline_stage *gs_stage, 966bf215546Sopenharmony_ci struct anv_pipeline_stage *next_stage) 967bf215546Sopenharmony_ci{ 968bf215546Sopenharmony_ci if (next_stage) 969bf215546Sopenharmony_ci brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir); 970bf215546Sopenharmony_ci} 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_cistatic void 973bf215546Sopenharmony_cianv_pipeline_compile_gs(const struct brw_compiler *compiler, 974bf215546Sopenharmony_ci void *mem_ctx, 975bf215546Sopenharmony_ci struct anv_device *device, 976bf215546Sopenharmony_ci struct anv_pipeline_stage *gs_stage, 977bf215546Sopenharmony_ci struct anv_pipeline_stage *prev_stage) 978bf215546Sopenharmony_ci{ 979bf215546Sopenharmony_ci brw_compute_vue_map(compiler->devinfo, 980bf215546Sopenharmony_ci &gs_stage->prog_data.gs.base.vue_map, 981bf215546Sopenharmony_ci gs_stage->nir->info.outputs_written, 982bf215546Sopenharmony_ci gs_stage->nir->info.separate_shader, 1); 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci gs_stage->num_stats = 1; 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci struct brw_compile_gs_params params = { 987bf215546Sopenharmony_ci .nir = gs_stage->nir, 988bf215546Sopenharmony_ci .key = &gs_stage->key.gs, 989bf215546Sopenharmony_ci .prog_data = &gs_stage->prog_data.gs, 990bf215546Sopenharmony_ci .stats = gs_stage->stats, 991bf215546Sopenharmony_ci .log_data = device, 992bf215546Sopenharmony_ci }; 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci gs_stage->code = brw_compile_gs(compiler, mem_ctx, ¶ms); 995bf215546Sopenharmony_ci} 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_cistatic void 998bf215546Sopenharmony_cianv_pipeline_link_task(const struct brw_compiler *compiler, 999bf215546Sopenharmony_ci struct anv_pipeline_stage *task_stage, 1000bf215546Sopenharmony_ci struct anv_pipeline_stage *next_stage) 1001bf215546Sopenharmony_ci{ 1002bf215546Sopenharmony_ci assert(next_stage); 1003bf215546Sopenharmony_ci assert(next_stage->stage == MESA_SHADER_MESH); 1004bf215546Sopenharmony_ci brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir); 1005bf215546Sopenharmony_ci} 1006bf215546Sopenharmony_ci 1007bf215546Sopenharmony_cistatic void 1008bf215546Sopenharmony_cianv_pipeline_compile_task(const struct brw_compiler *compiler, 1009bf215546Sopenharmony_ci void *mem_ctx, 1010bf215546Sopenharmony_ci struct anv_device *device, 1011bf215546Sopenharmony_ci struct anv_pipeline_stage *task_stage) 1012bf215546Sopenharmony_ci{ 1013bf215546Sopenharmony_ci task_stage->num_stats = 1; 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci struct brw_compile_task_params params = { 1016bf215546Sopenharmony_ci .nir = task_stage->nir, 1017bf215546Sopenharmony_ci .key = &task_stage->key.task, 1018bf215546Sopenharmony_ci .prog_data = &task_stage->prog_data.task, 1019bf215546Sopenharmony_ci .stats = task_stage->stats, 1020bf215546Sopenharmony_ci .log_data = device, 1021bf215546Sopenharmony_ci }; 1022bf215546Sopenharmony_ci 1023bf215546Sopenharmony_ci task_stage->code = brw_compile_task(compiler, mem_ctx, ¶ms); 1024bf215546Sopenharmony_ci} 1025bf215546Sopenharmony_ci 1026bf215546Sopenharmony_cistatic void 1027bf215546Sopenharmony_cianv_pipeline_link_mesh(const struct brw_compiler *compiler, 1028bf215546Sopenharmony_ci struct anv_pipeline_stage *mesh_stage, 1029bf215546Sopenharmony_ci struct anv_pipeline_stage *next_stage) 1030bf215546Sopenharmony_ci{ 1031bf215546Sopenharmony_ci if (next_stage) { 1032bf215546Sopenharmony_ci brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir); 1033bf215546Sopenharmony_ci } 1034bf215546Sopenharmony_ci} 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_cistatic void 1037bf215546Sopenharmony_cianv_pipeline_compile_mesh(const struct brw_compiler *compiler, 1038bf215546Sopenharmony_ci void *mem_ctx, 1039bf215546Sopenharmony_ci struct anv_device *device, 1040bf215546Sopenharmony_ci struct anv_pipeline_stage *mesh_stage, 1041bf215546Sopenharmony_ci struct anv_pipeline_stage *prev_stage) 1042bf215546Sopenharmony_ci{ 1043bf215546Sopenharmony_ci mesh_stage->num_stats = 1; 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci struct brw_compile_mesh_params params = { 1046bf215546Sopenharmony_ci .nir = mesh_stage->nir, 1047bf215546Sopenharmony_ci .key = &mesh_stage->key.mesh, 1048bf215546Sopenharmony_ci .prog_data = &mesh_stage->prog_data.mesh, 1049bf215546Sopenharmony_ci .stats = mesh_stage->stats, 1050bf215546Sopenharmony_ci .log_data = device, 1051bf215546Sopenharmony_ci }; 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci if (prev_stage) { 1054bf215546Sopenharmony_ci assert(prev_stage->stage == MESA_SHADER_TASK); 1055bf215546Sopenharmony_ci params.tue_map = &prev_stage->prog_data.task.map; 1056bf215546Sopenharmony_ci } 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, ¶ms); 1059bf215546Sopenharmony_ci} 1060bf215546Sopenharmony_ci 1061bf215546Sopenharmony_cistatic void 1062bf215546Sopenharmony_cianv_pipeline_link_fs(const struct brw_compiler *compiler, 1063bf215546Sopenharmony_ci struct anv_pipeline_stage *stage, 1064bf215546Sopenharmony_ci const struct vk_render_pass_state *rp) 1065bf215546Sopenharmony_ci{ 1066bf215546Sopenharmony_ci /* Initially the valid outputs value is set to all possible render targets 1067bf215546Sopenharmony_ci * valid (see populate_wm_prog_key()), before we look at the shader 1068bf215546Sopenharmony_ci * variables. Here we look at the output variables of the shader an compute 1069bf215546Sopenharmony_ci * a correct number of render target outputs. 1070bf215546Sopenharmony_ci */ 1071bf215546Sopenharmony_ci stage->key.wm.color_outputs_valid = 0; 1072bf215546Sopenharmony_ci nir_foreach_shader_out_variable_safe(var, stage->nir) { 1073bf215546Sopenharmony_ci if (var->data.location < FRAG_RESULT_DATA0) 1074bf215546Sopenharmony_ci continue; 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_ci const unsigned rt = var->data.location - FRAG_RESULT_DATA0; 1077bf215546Sopenharmony_ci const unsigned array_len = 1078bf215546Sopenharmony_ci glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; 1079bf215546Sopenharmony_ci assert(rt + array_len <= MAX_RTS); 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_ci stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len); 1082bf215546Sopenharmony_ci } 1083bf215546Sopenharmony_ci stage->key.wm.color_outputs_valid &= 1084bf215546Sopenharmony_ci (1u << rp->color_attachment_count) - 1; 1085bf215546Sopenharmony_ci stage->key.wm.nr_color_regions = 1086bf215546Sopenharmony_ci util_last_bit(stage->key.wm.color_outputs_valid); 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci unsigned num_rt_bindings; 1089bf215546Sopenharmony_ci struct anv_pipeline_binding rt_bindings[MAX_RTS]; 1090bf215546Sopenharmony_ci if (stage->key.wm.nr_color_regions > 0) { 1091bf215546Sopenharmony_ci assert(stage->key.wm.nr_color_regions <= MAX_RTS); 1092bf215546Sopenharmony_ci for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) { 1093bf215546Sopenharmony_ci if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) { 1094bf215546Sopenharmony_ci rt_bindings[rt] = (struct anv_pipeline_binding) { 1095bf215546Sopenharmony_ci .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1096bf215546Sopenharmony_ci .index = rt, 1097bf215546Sopenharmony_ci }; 1098bf215546Sopenharmony_ci } else { 1099bf215546Sopenharmony_ci /* Setup a null render target */ 1100bf215546Sopenharmony_ci rt_bindings[rt] = (struct anv_pipeline_binding) { 1101bf215546Sopenharmony_ci .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1102bf215546Sopenharmony_ci .index = UINT32_MAX, 1103bf215546Sopenharmony_ci }; 1104bf215546Sopenharmony_ci } 1105bf215546Sopenharmony_ci } 1106bf215546Sopenharmony_ci num_rt_bindings = stage->key.wm.nr_color_regions; 1107bf215546Sopenharmony_ci } else { 1108bf215546Sopenharmony_ci /* Setup a null render target */ 1109bf215546Sopenharmony_ci rt_bindings[0] = (struct anv_pipeline_binding) { 1110bf215546Sopenharmony_ci .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1111bf215546Sopenharmony_ci .index = UINT32_MAX, 1112bf215546Sopenharmony_ci }; 1113bf215546Sopenharmony_ci num_rt_bindings = 1; 1114bf215546Sopenharmony_ci } 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci assert(num_rt_bindings <= MAX_RTS); 1117bf215546Sopenharmony_ci assert(stage->bind_map.surface_count == 0); 1118bf215546Sopenharmony_ci typed_memcpy(stage->bind_map.surface_to_descriptor, 1119bf215546Sopenharmony_ci rt_bindings, num_rt_bindings); 1120bf215546Sopenharmony_ci stage->bind_map.surface_count += num_rt_bindings; 1121bf215546Sopenharmony_ci} 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_cistatic void 1124bf215546Sopenharmony_cianv_pipeline_compile_fs(const struct brw_compiler *compiler, 1125bf215546Sopenharmony_ci void *mem_ctx, 1126bf215546Sopenharmony_ci struct anv_device *device, 1127bf215546Sopenharmony_ci struct anv_pipeline_stage *fs_stage, 1128bf215546Sopenharmony_ci struct anv_pipeline_stage *prev_stage) 1129bf215546Sopenharmony_ci{ 1130bf215546Sopenharmony_ci /* TODO: we could set this to 0 based on the information in nir_shader, but 1131bf215546Sopenharmony_ci * we need this before we call spirv_to_nir. 1132bf215546Sopenharmony_ci */ 1133bf215546Sopenharmony_ci assert(prev_stage); 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_ci struct brw_compile_fs_params params = { 1136bf215546Sopenharmony_ci .nir = fs_stage->nir, 1137bf215546Sopenharmony_ci .key = &fs_stage->key.wm, 1138bf215546Sopenharmony_ci .prog_data = &fs_stage->prog_data.wm, 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci .allow_spilling = true, 1141bf215546Sopenharmony_ci .stats = fs_stage->stats, 1142bf215546Sopenharmony_ci .log_data = device, 1143bf215546Sopenharmony_ci }; 1144bf215546Sopenharmony_ci 1145bf215546Sopenharmony_ci if (prev_stage->stage == MESA_SHADER_MESH) { 1146bf215546Sopenharmony_ci params.mue_map = &prev_stage->prog_data.mesh.map; 1147bf215546Sopenharmony_ci /* TODO(mesh): Slots valid, do we even use/rely on it? */ 1148bf215546Sopenharmony_ci } else { 1149bf215546Sopenharmony_ci fs_stage->key.wm.input_slots_valid = 1150bf215546Sopenharmony_ci prev_stage->prog_data.vue.vue_map.slots_valid; 1151bf215546Sopenharmony_ci } 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms); 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 + 1156bf215546Sopenharmony_ci (uint32_t)fs_stage->prog_data.wm.dispatch_16 + 1157bf215546Sopenharmony_ci (uint32_t)fs_stage->prog_data.wm.dispatch_32; 1158bf215546Sopenharmony_ci} 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_cistatic void 1161bf215546Sopenharmony_cianv_pipeline_add_executable(struct anv_pipeline *pipeline, 1162bf215546Sopenharmony_ci struct anv_pipeline_stage *stage, 1163bf215546Sopenharmony_ci struct brw_compile_stats *stats, 1164bf215546Sopenharmony_ci uint32_t code_offset) 1165bf215546Sopenharmony_ci{ 1166bf215546Sopenharmony_ci char *nir = NULL; 1167bf215546Sopenharmony_ci if (stage->nir && 1168bf215546Sopenharmony_ci (pipeline->flags & 1169bf215546Sopenharmony_ci VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) { 1170bf215546Sopenharmony_ci nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx); 1171bf215546Sopenharmony_ci } 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci char *disasm = NULL; 1174bf215546Sopenharmony_ci if (stage->code && 1175bf215546Sopenharmony_ci (pipeline->flags & 1176bf215546Sopenharmony_ci VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) { 1177bf215546Sopenharmony_ci char *stream_data = NULL; 1178bf215546Sopenharmony_ci size_t stream_size = 0; 1179bf215546Sopenharmony_ci FILE *stream = open_memstream(&stream_data, &stream_size); 1180bf215546Sopenharmony_ci 1181bf215546Sopenharmony_ci uint32_t push_size = 0; 1182bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) 1183bf215546Sopenharmony_ci push_size += stage->bind_map.push_ranges[i].length; 1184bf215546Sopenharmony_ci if (push_size > 0) { 1185bf215546Sopenharmony_ci fprintf(stream, "Push constant ranges:\n"); 1186bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 1187bf215546Sopenharmony_ci if (stage->bind_map.push_ranges[i].length == 0) 1188bf215546Sopenharmony_ci continue; 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_ci fprintf(stream, " RANGE%d (%dB): ", i, 1191bf215546Sopenharmony_ci stage->bind_map.push_ranges[i].length * 32); 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci switch (stage->bind_map.push_ranges[i].set) { 1194bf215546Sopenharmony_ci case ANV_DESCRIPTOR_SET_NULL: 1195bf215546Sopenharmony_ci fprintf(stream, "NULL"); 1196bf215546Sopenharmony_ci break; 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: 1199bf215546Sopenharmony_ci fprintf(stream, "Vulkan push constants and API params"); 1200bf215546Sopenharmony_ci break; 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci case ANV_DESCRIPTOR_SET_DESCRIPTORS: 1203bf215546Sopenharmony_ci fprintf(stream, "Descriptor buffer for set %d (start=%dB)", 1204bf215546Sopenharmony_ci stage->bind_map.push_ranges[i].index, 1205bf215546Sopenharmony_ci stage->bind_map.push_ranges[i].start * 32); 1206bf215546Sopenharmony_ci break; 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_ci case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: 1209bf215546Sopenharmony_ci unreachable("gl_NumWorkgroups is never pushed"); 1210bf215546Sopenharmony_ci 1211bf215546Sopenharmony_ci case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS: 1212bf215546Sopenharmony_ci fprintf(stream, "Inline shader constant data (start=%dB)", 1213bf215546Sopenharmony_ci stage->bind_map.push_ranges[i].start * 32); 1214bf215546Sopenharmony_ci break; 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS: 1217bf215546Sopenharmony_ci unreachable("Color attachments can't be pushed"); 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci default: 1220bf215546Sopenharmony_ci fprintf(stream, "UBO (set=%d binding=%d start=%dB)", 1221bf215546Sopenharmony_ci stage->bind_map.push_ranges[i].set, 1222bf215546Sopenharmony_ci stage->bind_map.push_ranges[i].index, 1223bf215546Sopenharmony_ci stage->bind_map.push_ranges[i].start * 32); 1224bf215546Sopenharmony_ci break; 1225bf215546Sopenharmony_ci } 1226bf215546Sopenharmony_ci fprintf(stream, "\n"); 1227bf215546Sopenharmony_ci } 1228bf215546Sopenharmony_ci fprintf(stream, "\n"); 1229bf215546Sopenharmony_ci } 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci /* Creating this is far cheaper than it looks. It's perfectly fine to 1232bf215546Sopenharmony_ci * do it for every binary. 1233bf215546Sopenharmony_ci */ 1234bf215546Sopenharmony_ci intel_disassemble(&pipeline->device->physical->compiler->isa, 1235bf215546Sopenharmony_ci stage->code, code_offset, stream); 1236bf215546Sopenharmony_ci 1237bf215546Sopenharmony_ci fclose(stream); 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ci /* Copy it to a ralloc'd thing */ 1240bf215546Sopenharmony_ci disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1); 1241bf215546Sopenharmony_ci memcpy(disasm, stream_data, stream_size); 1242bf215546Sopenharmony_ci disasm[stream_size] = 0; 1243bf215546Sopenharmony_ci 1244bf215546Sopenharmony_ci free(stream_data); 1245bf215546Sopenharmony_ci } 1246bf215546Sopenharmony_ci 1247bf215546Sopenharmony_ci const struct anv_pipeline_executable exe = { 1248bf215546Sopenharmony_ci .stage = stage->stage, 1249bf215546Sopenharmony_ci .stats = *stats, 1250bf215546Sopenharmony_ci .nir = nir, 1251bf215546Sopenharmony_ci .disasm = disasm, 1252bf215546Sopenharmony_ci }; 1253bf215546Sopenharmony_ci util_dynarray_append(&pipeline->executables, 1254bf215546Sopenharmony_ci struct anv_pipeline_executable, exe); 1255bf215546Sopenharmony_ci} 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_cistatic void 1258bf215546Sopenharmony_cianv_pipeline_add_executables(struct anv_pipeline *pipeline, 1259bf215546Sopenharmony_ci struct anv_pipeline_stage *stage, 1260bf215546Sopenharmony_ci struct anv_shader_bin *bin) 1261bf215546Sopenharmony_ci{ 1262bf215546Sopenharmony_ci if (stage->stage == MESA_SHADER_FRAGMENT) { 1263bf215546Sopenharmony_ci /* We pull the prog data and stats out of the anv_shader_bin because 1264bf215546Sopenharmony_ci * the anv_pipeline_stage may not be fully populated if we successfully 1265bf215546Sopenharmony_ci * looked up the shader in a cache. 1266bf215546Sopenharmony_ci */ 1267bf215546Sopenharmony_ci const struct brw_wm_prog_data *wm_prog_data = 1268bf215546Sopenharmony_ci (const struct brw_wm_prog_data *)bin->prog_data; 1269bf215546Sopenharmony_ci struct brw_compile_stats *stats = bin->stats; 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_ci if (wm_prog_data->dispatch_8) { 1272bf215546Sopenharmony_ci anv_pipeline_add_executable(pipeline, stage, stats++, 0); 1273bf215546Sopenharmony_ci } 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci if (wm_prog_data->dispatch_16) { 1276bf215546Sopenharmony_ci anv_pipeline_add_executable(pipeline, stage, stats++, 1277bf215546Sopenharmony_ci wm_prog_data->prog_offset_16); 1278bf215546Sopenharmony_ci } 1279bf215546Sopenharmony_ci 1280bf215546Sopenharmony_ci if (wm_prog_data->dispatch_32) { 1281bf215546Sopenharmony_ci anv_pipeline_add_executable(pipeline, stage, stats++, 1282bf215546Sopenharmony_ci wm_prog_data->prog_offset_32); 1283bf215546Sopenharmony_ci } 1284bf215546Sopenharmony_ci } else { 1285bf215546Sopenharmony_ci anv_pipeline_add_executable(pipeline, stage, bin->stats, 0); 1286bf215546Sopenharmony_ci } 1287bf215546Sopenharmony_ci 1288bf215546Sopenharmony_ci pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries); 1289bf215546Sopenharmony_ci} 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_cistatic void 1292bf215546Sopenharmony_cianv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline) 1293bf215546Sopenharmony_ci{ 1294bf215546Sopenharmony_ci /* TODO: Cache this pipeline-wide information. */ 1295bf215546Sopenharmony_ci 1296bf215546Sopenharmony_ci if (anv_pipeline_is_primitive(pipeline)) { 1297bf215546Sopenharmony_ci /* Primitive replication depends on information from all the shaders. 1298bf215546Sopenharmony_ci * Recover this bit from the fact that we have more than one position slot 1299bf215546Sopenharmony_ci * in the vertex shader when using it. 1300bf215546Sopenharmony_ci */ 1301bf215546Sopenharmony_ci assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); 1302bf215546Sopenharmony_ci int pos_slots = 0; 1303bf215546Sopenharmony_ci const struct brw_vue_prog_data *vue_prog_data = 1304bf215546Sopenharmony_ci (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data; 1305bf215546Sopenharmony_ci const struct brw_vue_map *vue_map = &vue_prog_data->vue_map; 1306bf215546Sopenharmony_ci for (int i = 0; i < vue_map->num_slots; i++) { 1307bf215546Sopenharmony_ci if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS) 1308bf215546Sopenharmony_ci pos_slots++; 1309bf215546Sopenharmony_ci } 1310bf215546Sopenharmony_ci pipeline->use_primitive_replication = pos_slots > 1; 1311bf215546Sopenharmony_ci } 1312bf215546Sopenharmony_ci} 1313bf215546Sopenharmony_ci 1314bf215546Sopenharmony_cistatic void 1315bf215546Sopenharmony_cianv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline, 1316bf215546Sopenharmony_ci const struct vk_graphics_pipeline_state *state, 1317bf215546Sopenharmony_ci struct anv_pipeline_stage *stages) 1318bf215546Sopenharmony_ci{ 1319bf215546Sopenharmony_ci for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) { 1320bf215546Sopenharmony_ci if (!stages[s].info) 1321bf215546Sopenharmony_ci continue; 1322bf215546Sopenharmony_ci 1323bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1324bf215546Sopenharmony_ci 1325bf215546Sopenharmony_ci vk_pipeline_hash_shader_stage(stages[s].info, stages[s].shader_sha1); 1326bf215546Sopenharmony_ci 1327bf215546Sopenharmony_ci const struct anv_device *device = pipeline->base.device; 1328bf215546Sopenharmony_ci switch (stages[s].stage) { 1329bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 1330bf215546Sopenharmony_ci populate_vs_prog_key(device, 1331bf215546Sopenharmony_ci pipeline->base.device->robust_buffer_access, 1332bf215546Sopenharmony_ci &stages[s].key.vs); 1333bf215546Sopenharmony_ci break; 1334bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 1335bf215546Sopenharmony_ci populate_tcs_prog_key(device, 1336bf215546Sopenharmony_ci pipeline->base.device->robust_buffer_access, 1337bf215546Sopenharmony_ci state->ts->patch_control_points, 1338bf215546Sopenharmony_ci &stages[s].key.tcs); 1339bf215546Sopenharmony_ci break; 1340bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 1341bf215546Sopenharmony_ci populate_tes_prog_key(device, 1342bf215546Sopenharmony_ci pipeline->base.device->robust_buffer_access, 1343bf215546Sopenharmony_ci &stages[s].key.tes); 1344bf215546Sopenharmony_ci break; 1345bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 1346bf215546Sopenharmony_ci populate_gs_prog_key(device, 1347bf215546Sopenharmony_ci pipeline->base.device->robust_buffer_access, 1348bf215546Sopenharmony_ci &stages[s].key.gs); 1349bf215546Sopenharmony_ci break; 1350bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: { 1351bf215546Sopenharmony_ci populate_wm_prog_key(pipeline, 1352bf215546Sopenharmony_ci pipeline->base.device->robust_buffer_access, 1353bf215546Sopenharmony_ci state->dynamic, state->ms, state->fsr, state->rp, 1354bf215546Sopenharmony_ci &stages[s].key.wm); 1355bf215546Sopenharmony_ci break; 1356bf215546Sopenharmony_ci } 1357bf215546Sopenharmony_ci case MESA_SHADER_TASK: 1358bf215546Sopenharmony_ci populate_task_prog_key(device, 1359bf215546Sopenharmony_ci pipeline->base.device->robust_buffer_access, 1360bf215546Sopenharmony_ci &stages[s].key.task); 1361bf215546Sopenharmony_ci break; 1362bf215546Sopenharmony_ci case MESA_SHADER_MESH: 1363bf215546Sopenharmony_ci populate_mesh_prog_key(device, 1364bf215546Sopenharmony_ci pipeline->base.device->robust_buffer_access, 1365bf215546Sopenharmony_ci &stages[s].key.mesh); 1366bf215546Sopenharmony_ci break; 1367bf215546Sopenharmony_ci default: 1368bf215546Sopenharmony_ci unreachable("Invalid graphics shader stage"); 1369bf215546Sopenharmony_ci } 1370bf215546Sopenharmony_ci 1371bf215546Sopenharmony_ci stages[s].feedback.duration += os_time_get_nano() - stage_start; 1372bf215546Sopenharmony_ci stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 1373bf215546Sopenharmony_ci } 1374bf215546Sopenharmony_ci 1375bf215546Sopenharmony_ci assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT || 1376bf215546Sopenharmony_ci pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV); 1377bf215546Sopenharmony_ci} 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_cistatic bool 1380bf215546Sopenharmony_cianv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline, 1381bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 1382bf215546Sopenharmony_ci struct anv_pipeline_stage *stages, 1383bf215546Sopenharmony_ci VkPipelineCreationFeedbackEXT *pipeline_feedback) 1384bf215546Sopenharmony_ci{ 1385bf215546Sopenharmony_ci unsigned found = 0; 1386bf215546Sopenharmony_ci unsigned cache_hits = 0; 1387bf215546Sopenharmony_ci for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) { 1388bf215546Sopenharmony_ci if (!stages[s].info) 1389bf215546Sopenharmony_ci continue; 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1392bf215546Sopenharmony_ci 1393bf215546Sopenharmony_ci bool cache_hit; 1394bf215546Sopenharmony_ci struct anv_shader_bin *bin = 1395bf215546Sopenharmony_ci anv_device_search_for_kernel(pipeline->base.device, cache, 1396bf215546Sopenharmony_ci &stages[s].cache_key, 1397bf215546Sopenharmony_ci sizeof(stages[s].cache_key), &cache_hit); 1398bf215546Sopenharmony_ci if (bin) { 1399bf215546Sopenharmony_ci found++; 1400bf215546Sopenharmony_ci pipeline->shaders[s] = bin; 1401bf215546Sopenharmony_ci } 1402bf215546Sopenharmony_ci 1403bf215546Sopenharmony_ci if (cache_hit) { 1404bf215546Sopenharmony_ci cache_hits++; 1405bf215546Sopenharmony_ci stages[s].feedback.flags |= 1406bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 1407bf215546Sopenharmony_ci } 1408bf215546Sopenharmony_ci stages[s].feedback.duration += os_time_get_nano() - stage_start; 1409bf215546Sopenharmony_ci } 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_ci if (found == __builtin_popcount(pipeline->active_stages)) { 1412bf215546Sopenharmony_ci if (cache_hits == found) { 1413bf215546Sopenharmony_ci pipeline_feedback->flags |= 1414bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 1415bf215546Sopenharmony_ci } 1416bf215546Sopenharmony_ci /* We found all our shaders in the cache. We're done. */ 1417bf215546Sopenharmony_ci for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1418bf215546Sopenharmony_ci if (!stages[s].info) 1419bf215546Sopenharmony_ci continue; 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci anv_pipeline_add_executables(&pipeline->base, &stages[s], 1422bf215546Sopenharmony_ci pipeline->shaders[s]); 1423bf215546Sopenharmony_ci } 1424bf215546Sopenharmony_ci anv_pipeline_init_from_cached_graphics(pipeline); 1425bf215546Sopenharmony_ci return true; 1426bf215546Sopenharmony_ci } else if (found > 0) { 1427bf215546Sopenharmony_ci /* We found some but not all of our shaders. This shouldn't happen most 1428bf215546Sopenharmony_ci * of the time but it can if we have a partially populated pipeline 1429bf215546Sopenharmony_ci * cache. 1430bf215546Sopenharmony_ci */ 1431bf215546Sopenharmony_ci assert(found < __builtin_popcount(pipeline->active_stages)); 1432bf215546Sopenharmony_ci 1433bf215546Sopenharmony_ci vk_perf(VK_LOG_OBJS(cache ? &cache->base : 1434bf215546Sopenharmony_ci &pipeline->base.device->vk.base), 1435bf215546Sopenharmony_ci "Found a partial pipeline in the cache. This is " 1436bf215546Sopenharmony_ci "most likely caused by an incomplete pipeline cache " 1437bf215546Sopenharmony_ci "import or export"); 1438bf215546Sopenharmony_ci 1439bf215546Sopenharmony_ci /* We're going to have to recompile anyway, so just throw away our 1440bf215546Sopenharmony_ci * references to the shaders in the cache. We'll get them out of the 1441bf215546Sopenharmony_ci * cache again as part of the compilation process. 1442bf215546Sopenharmony_ci */ 1443bf215546Sopenharmony_ci for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1444bf215546Sopenharmony_ci stages[s].feedback.flags = 0; 1445bf215546Sopenharmony_ci if (pipeline->shaders[s]) { 1446bf215546Sopenharmony_ci anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]); 1447bf215546Sopenharmony_ci pipeline->shaders[s] = NULL; 1448bf215546Sopenharmony_ci } 1449bf215546Sopenharmony_ci } 1450bf215546Sopenharmony_ci } 1451bf215546Sopenharmony_ci 1452bf215546Sopenharmony_ci return false; 1453bf215546Sopenharmony_ci} 1454bf215546Sopenharmony_ci 1455bf215546Sopenharmony_cistatic const gl_shader_stage graphics_shader_order[] = { 1456bf215546Sopenharmony_ci MESA_SHADER_VERTEX, 1457bf215546Sopenharmony_ci MESA_SHADER_TESS_CTRL, 1458bf215546Sopenharmony_ci MESA_SHADER_TESS_EVAL, 1459bf215546Sopenharmony_ci MESA_SHADER_GEOMETRY, 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci MESA_SHADER_TASK, 1462bf215546Sopenharmony_ci MESA_SHADER_MESH, 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci MESA_SHADER_FRAGMENT, 1465bf215546Sopenharmony_ci}; 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_cistatic VkResult 1468bf215546Sopenharmony_cianv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline, 1469bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 1470bf215546Sopenharmony_ci struct anv_pipeline_stage *stages, 1471bf215546Sopenharmony_ci void *pipeline_ctx) 1472bf215546Sopenharmony_ci{ 1473bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { 1474bf215546Sopenharmony_ci gl_shader_stage s = graphics_shader_order[i]; 1475bf215546Sopenharmony_ci if (!stages[s].info) 1476bf215546Sopenharmony_ci continue; 1477bf215546Sopenharmony_ci 1478bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1479bf215546Sopenharmony_ci 1480bf215546Sopenharmony_ci assert(stages[s].stage == s); 1481bf215546Sopenharmony_ci assert(pipeline->shaders[s] == NULL); 1482bf215546Sopenharmony_ci 1483bf215546Sopenharmony_ci stages[s].bind_map = (struct anv_pipeline_bind_map) { 1484bf215546Sopenharmony_ci .surface_to_descriptor = stages[s].surface_to_descriptor, 1485bf215546Sopenharmony_ci .sampler_to_descriptor = stages[s].sampler_to_descriptor 1486bf215546Sopenharmony_ci }; 1487bf215546Sopenharmony_ci 1488bf215546Sopenharmony_ci stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, 1489bf215546Sopenharmony_ci pipeline_ctx, 1490bf215546Sopenharmony_ci &stages[s]); 1491bf215546Sopenharmony_ci if (stages[s].nir == NULL) { 1492bf215546Sopenharmony_ci return vk_error(pipeline, VK_ERROR_UNKNOWN); 1493bf215546Sopenharmony_ci } 1494bf215546Sopenharmony_ci 1495bf215546Sopenharmony_ci stages[s].feedback.duration += os_time_get_nano() - stage_start; 1496bf215546Sopenharmony_ci } 1497bf215546Sopenharmony_ci 1498bf215546Sopenharmony_ci return VK_SUCCESS; 1499bf215546Sopenharmony_ci} 1500bf215546Sopenharmony_ci 1501bf215546Sopenharmony_cistatic VkResult 1502bf215546Sopenharmony_cianv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline, 1503bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 1504bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *info, 1505bf215546Sopenharmony_ci const struct vk_graphics_pipeline_state *state) 1506bf215546Sopenharmony_ci{ 1507bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 1508bf215546Sopenharmony_ci VkResult result; 1509bf215546Sopenharmony_ci 1510bf215546Sopenharmony_ci VkPipelineCreationFeedbackEXT pipeline_feedback = { 1511bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 1512bf215546Sopenharmony_ci }; 1513bf215546Sopenharmony_ci int64_t pipeline_start = os_time_get_nano(); 1514bf215546Sopenharmony_ci 1515bf215546Sopenharmony_ci const struct brw_compiler *compiler = pipeline->base.device->physical->compiler; 1516bf215546Sopenharmony_ci struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {}; 1517bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->stageCount; i++) { 1518bf215546Sopenharmony_ci gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage); 1519bf215546Sopenharmony_ci stages[stage].stage = stage; 1520bf215546Sopenharmony_ci stages[stage].info = &info->pStages[i]; 1521bf215546Sopenharmony_ci } 1522bf215546Sopenharmony_ci 1523bf215546Sopenharmony_ci anv_graphics_pipeline_init_keys(pipeline, state, stages); 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci unsigned char sha1[20]; 1526bf215546Sopenharmony_ci anv_pipeline_hash_graphics(pipeline, layout, stages, sha1); 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci for (unsigned s = 0; s < ARRAY_SIZE(stages); s++) { 1529bf215546Sopenharmony_ci if (!stages[s].info) 1530bf215546Sopenharmony_ci continue; 1531bf215546Sopenharmony_ci 1532bf215546Sopenharmony_ci stages[s].cache_key.stage = s; 1533bf215546Sopenharmony_ci memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1)); 1534bf215546Sopenharmony_ci } 1535bf215546Sopenharmony_ci 1536bf215546Sopenharmony_ci const bool skip_cache_lookup = 1537bf215546Sopenharmony_ci (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 1538bf215546Sopenharmony_ci if (!skip_cache_lookup) { 1539bf215546Sopenharmony_ci bool found_all_shaders = 1540bf215546Sopenharmony_ci anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages, 1541bf215546Sopenharmony_ci &pipeline_feedback); 1542bf215546Sopenharmony_ci if (found_all_shaders) 1543bf215546Sopenharmony_ci goto done; 1544bf215546Sopenharmony_ci } 1545bf215546Sopenharmony_ci 1546bf215546Sopenharmony_ci if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) 1547bf215546Sopenharmony_ci return VK_PIPELINE_COMPILE_REQUIRED; 1548bf215546Sopenharmony_ci 1549bf215546Sopenharmony_ci void *pipeline_ctx = ralloc_context(NULL); 1550bf215546Sopenharmony_ci 1551bf215546Sopenharmony_ci result = anv_graphics_pipeline_load_nir(pipeline, cache, stages, 1552bf215546Sopenharmony_ci pipeline_ctx); 1553bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1554bf215546Sopenharmony_ci goto fail; 1555bf215546Sopenharmony_ci 1556bf215546Sopenharmony_ci /* Walk backwards to link */ 1557bf215546Sopenharmony_ci struct anv_pipeline_stage *next_stage = NULL; 1558bf215546Sopenharmony_ci for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) { 1559bf215546Sopenharmony_ci gl_shader_stage s = graphics_shader_order[i]; 1560bf215546Sopenharmony_ci if (!stages[s].info) 1561bf215546Sopenharmony_ci continue; 1562bf215546Sopenharmony_ci 1563bf215546Sopenharmony_ci switch (s) { 1564bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 1565bf215546Sopenharmony_ci anv_pipeline_link_vs(compiler, &stages[s], next_stage); 1566bf215546Sopenharmony_ci break; 1567bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 1568bf215546Sopenharmony_ci anv_pipeline_link_tcs(compiler, &stages[s], next_stage); 1569bf215546Sopenharmony_ci break; 1570bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 1571bf215546Sopenharmony_ci anv_pipeline_link_tes(compiler, &stages[s], next_stage); 1572bf215546Sopenharmony_ci break; 1573bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 1574bf215546Sopenharmony_ci anv_pipeline_link_gs(compiler, &stages[s], next_stage); 1575bf215546Sopenharmony_ci break; 1576bf215546Sopenharmony_ci case MESA_SHADER_TASK: 1577bf215546Sopenharmony_ci anv_pipeline_link_task(compiler, &stages[s], next_stage); 1578bf215546Sopenharmony_ci break; 1579bf215546Sopenharmony_ci case MESA_SHADER_MESH: 1580bf215546Sopenharmony_ci anv_pipeline_link_mesh(compiler, &stages[s], next_stage); 1581bf215546Sopenharmony_ci break; 1582bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 1583bf215546Sopenharmony_ci anv_pipeline_link_fs(compiler, &stages[s], state->rp); 1584bf215546Sopenharmony_ci break; 1585bf215546Sopenharmony_ci default: 1586bf215546Sopenharmony_ci unreachable("Invalid graphics shader stage"); 1587bf215546Sopenharmony_ci } 1588bf215546Sopenharmony_ci 1589bf215546Sopenharmony_ci next_stage = &stages[s]; 1590bf215546Sopenharmony_ci } 1591bf215546Sopenharmony_ci 1592bf215546Sopenharmony_ci if (pipeline->base.device->info.ver >= 12 && 1593bf215546Sopenharmony_ci pipeline->view_mask != 0) { 1594bf215546Sopenharmony_ci /* For some pipelines HW Primitive Replication can be used instead of 1595bf215546Sopenharmony_ci * instancing to implement Multiview. This depend on how viewIndex is 1596bf215546Sopenharmony_ci * used in all the active shaders, so this check can't be done per 1597bf215546Sopenharmony_ci * individual shaders. 1598bf215546Sopenharmony_ci */ 1599bf215546Sopenharmony_ci nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {}; 1600bf215546Sopenharmony_ci for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++) 1601bf215546Sopenharmony_ci shaders[s] = stages[s].nir; 1602bf215546Sopenharmony_ci 1603bf215546Sopenharmony_ci pipeline->use_primitive_replication = 1604bf215546Sopenharmony_ci anv_check_for_primitive_replication(shaders, pipeline); 1605bf215546Sopenharmony_ci } else { 1606bf215546Sopenharmony_ci pipeline->use_primitive_replication = false; 1607bf215546Sopenharmony_ci } 1608bf215546Sopenharmony_ci 1609bf215546Sopenharmony_ci struct anv_pipeline_stage *prev_stage = NULL; 1610bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { 1611bf215546Sopenharmony_ci gl_shader_stage s = graphics_shader_order[i]; 1612bf215546Sopenharmony_ci if (!stages[s].info) 1613bf215546Sopenharmony_ci continue; 1614bf215546Sopenharmony_ci 1615bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1616bf215546Sopenharmony_ci 1617bf215546Sopenharmony_ci void *stage_ctx = ralloc_context(NULL); 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout); 1620bf215546Sopenharmony_ci 1621bf215546Sopenharmony_ci if (prev_stage && compiler->nir_options[s]->unify_interfaces) { 1622bf215546Sopenharmony_ci prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read & 1623bf215546Sopenharmony_ci ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 1624bf215546Sopenharmony_ci stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written & 1625bf215546Sopenharmony_ci ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 1626bf215546Sopenharmony_ci prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read; 1627bf215546Sopenharmony_ci stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written; 1628bf215546Sopenharmony_ci } 1629bf215546Sopenharmony_ci 1630bf215546Sopenharmony_ci ralloc_free(stage_ctx); 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_ci stages[s].feedback.duration += os_time_get_nano() - stage_start; 1633bf215546Sopenharmony_ci 1634bf215546Sopenharmony_ci prev_stage = &stages[s]; 1635bf215546Sopenharmony_ci } 1636bf215546Sopenharmony_ci 1637bf215546Sopenharmony_ci /* In the case the platform can write the primitive variable shading rate, 1638bf215546Sopenharmony_ci * figure out the last geometry stage that should write the primitive 1639bf215546Sopenharmony_ci * shading rate, and ensure it is marked as used there. The backend will 1640bf215546Sopenharmony_ci * write a default value if the shader doesn't actually write it. 1641bf215546Sopenharmony_ci * 1642bf215546Sopenharmony_ci * We iterate backwards in the stage and stop on the first shader that can 1643bf215546Sopenharmony_ci * set the value. 1644bf215546Sopenharmony_ci */ 1645bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &pipeline->base.device->info; 1646bf215546Sopenharmony_ci if (devinfo->has_coarse_pixel_primitive_and_cb && 1647bf215546Sopenharmony_ci stages[MESA_SHADER_FRAGMENT].info && 1648bf215546Sopenharmony_ci stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel && 1649bf215546Sopenharmony_ci !stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading && 1650bf215546Sopenharmony_ci stages[MESA_SHADER_MESH].info == NULL) { 1651bf215546Sopenharmony_ci struct anv_pipeline_stage *last_psr = NULL; 1652bf215546Sopenharmony_ci 1653bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { 1654bf215546Sopenharmony_ci gl_shader_stage s = 1655bf215546Sopenharmony_ci graphics_shader_order[ARRAY_SIZE(graphics_shader_order) - i - 1]; 1656bf215546Sopenharmony_ci 1657bf215546Sopenharmony_ci if (!stages[s].info || 1658bf215546Sopenharmony_ci !gl_shader_stage_can_set_fragment_shading_rate(s)) 1659bf215546Sopenharmony_ci continue; 1660bf215546Sopenharmony_ci 1661bf215546Sopenharmony_ci last_psr = &stages[s]; 1662bf215546Sopenharmony_ci break; 1663bf215546Sopenharmony_ci } 1664bf215546Sopenharmony_ci 1665bf215546Sopenharmony_ci assert(last_psr); 1666bf215546Sopenharmony_ci last_psr->nir->info.outputs_written |= VARYING_BIT_PRIMITIVE_SHADING_RATE; 1667bf215546Sopenharmony_ci } 1668bf215546Sopenharmony_ci 1669bf215546Sopenharmony_ci prev_stage = NULL; 1670bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) { 1671bf215546Sopenharmony_ci gl_shader_stage s = graphics_shader_order[i]; 1672bf215546Sopenharmony_ci if (!stages[s].info) 1673bf215546Sopenharmony_ci continue; 1674bf215546Sopenharmony_ci 1675bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1676bf215546Sopenharmony_ci 1677bf215546Sopenharmony_ci void *stage_ctx = ralloc_context(NULL); 1678bf215546Sopenharmony_ci 1679bf215546Sopenharmony_ci switch (s) { 1680bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 1681bf215546Sopenharmony_ci anv_pipeline_compile_vs(compiler, stage_ctx, pipeline, 1682bf215546Sopenharmony_ci &stages[s]); 1683bf215546Sopenharmony_ci break; 1684bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 1685bf215546Sopenharmony_ci anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device, 1686bf215546Sopenharmony_ci &stages[s], prev_stage); 1687bf215546Sopenharmony_ci break; 1688bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 1689bf215546Sopenharmony_ci anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device, 1690bf215546Sopenharmony_ci &stages[s], prev_stage); 1691bf215546Sopenharmony_ci break; 1692bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 1693bf215546Sopenharmony_ci anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device, 1694bf215546Sopenharmony_ci &stages[s], prev_stage); 1695bf215546Sopenharmony_ci break; 1696bf215546Sopenharmony_ci case MESA_SHADER_TASK: 1697bf215546Sopenharmony_ci anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device, 1698bf215546Sopenharmony_ci &stages[s]); 1699bf215546Sopenharmony_ci break; 1700bf215546Sopenharmony_ci case MESA_SHADER_MESH: 1701bf215546Sopenharmony_ci anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device, 1702bf215546Sopenharmony_ci &stages[s], prev_stage); 1703bf215546Sopenharmony_ci break; 1704bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 1705bf215546Sopenharmony_ci anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device, 1706bf215546Sopenharmony_ci &stages[s], prev_stage); 1707bf215546Sopenharmony_ci break; 1708bf215546Sopenharmony_ci default: 1709bf215546Sopenharmony_ci unreachable("Invalid graphics shader stage"); 1710bf215546Sopenharmony_ci } 1711bf215546Sopenharmony_ci if (stages[s].code == NULL) { 1712bf215546Sopenharmony_ci ralloc_free(stage_ctx); 1713bf215546Sopenharmony_ci result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); 1714bf215546Sopenharmony_ci goto fail; 1715bf215546Sopenharmony_ci } 1716bf215546Sopenharmony_ci 1717bf215546Sopenharmony_ci anv_nir_validate_push_layout(&stages[s].prog_data.base, 1718bf215546Sopenharmony_ci &stages[s].bind_map); 1719bf215546Sopenharmony_ci 1720bf215546Sopenharmony_ci struct anv_shader_bin *bin = 1721bf215546Sopenharmony_ci anv_device_upload_kernel(pipeline->base.device, cache, s, 1722bf215546Sopenharmony_ci &stages[s].cache_key, 1723bf215546Sopenharmony_ci sizeof(stages[s].cache_key), 1724bf215546Sopenharmony_ci stages[s].code, 1725bf215546Sopenharmony_ci stages[s].prog_data.base.program_size, 1726bf215546Sopenharmony_ci &stages[s].prog_data.base, 1727bf215546Sopenharmony_ci brw_prog_data_size(s), 1728bf215546Sopenharmony_ci stages[s].stats, stages[s].num_stats, 1729bf215546Sopenharmony_ci stages[s].nir->xfb_info, 1730bf215546Sopenharmony_ci &stages[s].bind_map); 1731bf215546Sopenharmony_ci if (!bin) { 1732bf215546Sopenharmony_ci ralloc_free(stage_ctx); 1733bf215546Sopenharmony_ci result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1734bf215546Sopenharmony_ci goto fail; 1735bf215546Sopenharmony_ci } 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_ci anv_pipeline_add_executables(&pipeline->base, &stages[s], bin); 1738bf215546Sopenharmony_ci 1739bf215546Sopenharmony_ci pipeline->shaders[s] = bin; 1740bf215546Sopenharmony_ci ralloc_free(stage_ctx); 1741bf215546Sopenharmony_ci 1742bf215546Sopenharmony_ci stages[s].feedback.duration += os_time_get_nano() - stage_start; 1743bf215546Sopenharmony_ci 1744bf215546Sopenharmony_ci prev_stage = &stages[s]; 1745bf215546Sopenharmony_ci } 1746bf215546Sopenharmony_ci 1747bf215546Sopenharmony_ci ralloc_free(pipeline_ctx); 1748bf215546Sopenharmony_ci 1749bf215546Sopenharmony_cidone: 1750bf215546Sopenharmony_ci 1751bf215546Sopenharmony_ci pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 1752bf215546Sopenharmony_ci 1753bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *create_feedback = 1754bf215546Sopenharmony_ci vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 1755bf215546Sopenharmony_ci if (create_feedback) { 1756bf215546Sopenharmony_ci *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 1757bf215546Sopenharmony_ci 1758bf215546Sopenharmony_ci uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount; 1759bf215546Sopenharmony_ci assert(stage_count == 0 || info->stageCount == stage_count); 1760bf215546Sopenharmony_ci for (uint32_t i = 0; i < stage_count; i++) { 1761bf215546Sopenharmony_ci gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); 1762bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 1763bf215546Sopenharmony_ci } 1764bf215546Sopenharmony_ci } 1765bf215546Sopenharmony_ci 1766bf215546Sopenharmony_ci return VK_SUCCESS; 1767bf215546Sopenharmony_ci 1768bf215546Sopenharmony_cifail: 1769bf215546Sopenharmony_ci ralloc_free(pipeline_ctx); 1770bf215546Sopenharmony_ci 1771bf215546Sopenharmony_ci for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1772bf215546Sopenharmony_ci if (pipeline->shaders[s]) 1773bf215546Sopenharmony_ci anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]); 1774bf215546Sopenharmony_ci } 1775bf215546Sopenharmony_ci 1776bf215546Sopenharmony_ci return result; 1777bf215546Sopenharmony_ci} 1778bf215546Sopenharmony_ci 1779bf215546Sopenharmony_cistatic VkResult 1780bf215546Sopenharmony_cianv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, 1781bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 1782bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *info) 1783bf215546Sopenharmony_ci{ 1784bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *sinfo = &info->stage; 1785bf215546Sopenharmony_ci assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT); 1786bf215546Sopenharmony_ci 1787bf215546Sopenharmony_ci VkPipelineCreationFeedback pipeline_feedback = { 1788bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 1789bf215546Sopenharmony_ci }; 1790bf215546Sopenharmony_ci int64_t pipeline_start = os_time_get_nano(); 1791bf215546Sopenharmony_ci 1792bf215546Sopenharmony_ci struct anv_device *device = pipeline->base.device; 1793bf215546Sopenharmony_ci const struct brw_compiler *compiler = device->physical->compiler; 1794bf215546Sopenharmony_ci 1795bf215546Sopenharmony_ci struct anv_pipeline_stage stage = { 1796bf215546Sopenharmony_ci .stage = MESA_SHADER_COMPUTE, 1797bf215546Sopenharmony_ci .info = &info->stage, 1798bf215546Sopenharmony_ci .cache_key = { 1799bf215546Sopenharmony_ci .stage = MESA_SHADER_COMPUTE, 1800bf215546Sopenharmony_ci }, 1801bf215546Sopenharmony_ci .feedback = { 1802bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 1803bf215546Sopenharmony_ci }, 1804bf215546Sopenharmony_ci }; 1805bf215546Sopenharmony_ci vk_pipeline_hash_shader_stage(&info->stage, stage.shader_sha1); 1806bf215546Sopenharmony_ci 1807bf215546Sopenharmony_ci struct anv_shader_bin *bin = NULL; 1808bf215546Sopenharmony_ci 1809bf215546Sopenharmony_ci populate_cs_prog_key(device, device->robust_buffer_access, &stage.key.cs); 1810bf215546Sopenharmony_ci 1811bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 1812bf215546Sopenharmony_ci 1813bf215546Sopenharmony_ci const bool skip_cache_lookup = 1814bf215546Sopenharmony_ci (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 1815bf215546Sopenharmony_ci 1816bf215546Sopenharmony_ci anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1); 1817bf215546Sopenharmony_ci 1818bf215546Sopenharmony_ci bool cache_hit = false; 1819bf215546Sopenharmony_ci if (!skip_cache_lookup) { 1820bf215546Sopenharmony_ci bin = anv_device_search_for_kernel(device, cache, 1821bf215546Sopenharmony_ci &stage.cache_key, 1822bf215546Sopenharmony_ci sizeof(stage.cache_key), 1823bf215546Sopenharmony_ci &cache_hit); 1824bf215546Sopenharmony_ci } 1825bf215546Sopenharmony_ci 1826bf215546Sopenharmony_ci if (bin == NULL && 1827bf215546Sopenharmony_ci (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)) 1828bf215546Sopenharmony_ci return VK_PIPELINE_COMPILE_REQUIRED; 1829bf215546Sopenharmony_ci 1830bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 1831bf215546Sopenharmony_ci if (bin == NULL) { 1832bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1833bf215546Sopenharmony_ci 1834bf215546Sopenharmony_ci stage.bind_map = (struct anv_pipeline_bind_map) { 1835bf215546Sopenharmony_ci .surface_to_descriptor = stage.surface_to_descriptor, 1836bf215546Sopenharmony_ci .sampler_to_descriptor = stage.sampler_to_descriptor 1837bf215546Sopenharmony_ci }; 1838bf215546Sopenharmony_ci 1839bf215546Sopenharmony_ci /* Set up a binding for the gl_NumWorkGroups */ 1840bf215546Sopenharmony_ci stage.bind_map.surface_count = 1; 1841bf215546Sopenharmony_ci stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) { 1842bf215546Sopenharmony_ci .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS, 1843bf215546Sopenharmony_ci }; 1844bf215546Sopenharmony_ci 1845bf215546Sopenharmony_ci stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage); 1846bf215546Sopenharmony_ci if (stage.nir == NULL) { 1847bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1848bf215546Sopenharmony_ci return vk_error(pipeline, VK_ERROR_UNKNOWN); 1849bf215546Sopenharmony_ci } 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci NIR_PASS(_, stage.nir, anv_nir_add_base_work_group_id); 1852bf215546Sopenharmony_ci 1853bf215546Sopenharmony_ci anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout); 1854bf215546Sopenharmony_ci 1855bf215546Sopenharmony_ci unsigned local_size = stage.nir->info.workgroup_size[0] * 1856bf215546Sopenharmony_ci stage.nir->info.workgroup_size[1] * 1857bf215546Sopenharmony_ci stage.nir->info.workgroup_size[2]; 1858bf215546Sopenharmony_ci 1859bf215546Sopenharmony_ci /* Games don't always request full subgroups when they should, 1860bf215546Sopenharmony_ci * which can cause bugs, as they may expect bigger size of the 1861bf215546Sopenharmony_ci * subgroup than we choose for the execution. 1862bf215546Sopenharmony_ci */ 1863bf215546Sopenharmony_ci if (device->physical->instance->assume_full_subgroups && 1864bf215546Sopenharmony_ci stage.nir->info.cs.uses_wide_subgroup_intrinsics && 1865bf215546Sopenharmony_ci stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT && 1866bf215546Sopenharmony_ci local_size && 1867bf215546Sopenharmony_ci local_size % BRW_SUBGROUP_SIZE == 0) 1868bf215546Sopenharmony_ci stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS; 1869bf215546Sopenharmony_ci 1870bf215546Sopenharmony_ci /* If the client requests that we dispatch full subgroups but doesn't 1871bf215546Sopenharmony_ci * allow us to pick a subgroup size, we have to smash it to the API 1872bf215546Sopenharmony_ci * value of 32. Performance will likely be terrible in this case but 1873bf215546Sopenharmony_ci * there's nothing we can do about that. The client should have chosen 1874bf215546Sopenharmony_ci * a size. 1875bf215546Sopenharmony_ci */ 1876bf215546Sopenharmony_ci if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS) 1877bf215546Sopenharmony_ci stage.nir->info.subgroup_size = BRW_SUBGROUP_SIZE; 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_ci stage.num_stats = 1; 1880bf215546Sopenharmony_ci 1881bf215546Sopenharmony_ci struct brw_compile_cs_params params = { 1882bf215546Sopenharmony_ci .nir = stage.nir, 1883bf215546Sopenharmony_ci .key = &stage.key.cs, 1884bf215546Sopenharmony_ci .prog_data = &stage.prog_data.cs, 1885bf215546Sopenharmony_ci .stats = stage.stats, 1886bf215546Sopenharmony_ci .log_data = device, 1887bf215546Sopenharmony_ci }; 1888bf215546Sopenharmony_ci 1889bf215546Sopenharmony_ci stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms); 1890bf215546Sopenharmony_ci if (stage.code == NULL) { 1891bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1892bf215546Sopenharmony_ci return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1893bf215546Sopenharmony_ci } 1894bf215546Sopenharmony_ci 1895bf215546Sopenharmony_ci anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map); 1896bf215546Sopenharmony_ci 1897bf215546Sopenharmony_ci if (!stage.prog_data.cs.uses_num_work_groups) { 1898bf215546Sopenharmony_ci assert(stage.bind_map.surface_to_descriptor[0].set == 1899bf215546Sopenharmony_ci ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS); 1900bf215546Sopenharmony_ci stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL; 1901bf215546Sopenharmony_ci } 1902bf215546Sopenharmony_ci 1903bf215546Sopenharmony_ci const unsigned code_size = stage.prog_data.base.program_size; 1904bf215546Sopenharmony_ci bin = anv_device_upload_kernel(device, cache, 1905bf215546Sopenharmony_ci MESA_SHADER_COMPUTE, 1906bf215546Sopenharmony_ci &stage.cache_key, sizeof(stage.cache_key), 1907bf215546Sopenharmony_ci stage.code, code_size, 1908bf215546Sopenharmony_ci &stage.prog_data.base, 1909bf215546Sopenharmony_ci sizeof(stage.prog_data.cs), 1910bf215546Sopenharmony_ci stage.stats, stage.num_stats, 1911bf215546Sopenharmony_ci NULL, &stage.bind_map); 1912bf215546Sopenharmony_ci if (!bin) { 1913bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1914bf215546Sopenharmony_ci return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1915bf215546Sopenharmony_ci } 1916bf215546Sopenharmony_ci 1917bf215546Sopenharmony_ci stage.feedback.duration = os_time_get_nano() - stage_start; 1918bf215546Sopenharmony_ci } 1919bf215546Sopenharmony_ci 1920bf215546Sopenharmony_ci anv_pipeline_add_executables(&pipeline->base, &stage, bin); 1921bf215546Sopenharmony_ci 1922bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1923bf215546Sopenharmony_ci 1924bf215546Sopenharmony_ci if (cache_hit) { 1925bf215546Sopenharmony_ci stage.feedback.flags |= 1926bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 1927bf215546Sopenharmony_ci pipeline_feedback.flags |= 1928bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 1929bf215546Sopenharmony_ci } 1930bf215546Sopenharmony_ci pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 1931bf215546Sopenharmony_ci 1932bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *create_feedback = 1933bf215546Sopenharmony_ci vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 1934bf215546Sopenharmony_ci if (create_feedback) { 1935bf215546Sopenharmony_ci *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 1936bf215546Sopenharmony_ci 1937bf215546Sopenharmony_ci if (create_feedback->pipelineStageCreationFeedbackCount) { 1938bf215546Sopenharmony_ci assert(create_feedback->pipelineStageCreationFeedbackCount == 1); 1939bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback; 1940bf215546Sopenharmony_ci } 1941bf215546Sopenharmony_ci } 1942bf215546Sopenharmony_ci 1943bf215546Sopenharmony_ci pipeline->cs = bin; 1944bf215546Sopenharmony_ci 1945bf215546Sopenharmony_ci return VK_SUCCESS; 1946bf215546Sopenharmony_ci} 1947bf215546Sopenharmony_ci 1948bf215546Sopenharmony_cistatic VkResult 1949bf215546Sopenharmony_cianv_compute_pipeline_create(struct anv_device *device, 1950bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 1951bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfo, 1952bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 1953bf215546Sopenharmony_ci VkPipeline *pPipeline) 1954bf215546Sopenharmony_ci{ 1955bf215546Sopenharmony_ci struct anv_compute_pipeline *pipeline; 1956bf215546Sopenharmony_ci VkResult result; 1957bf215546Sopenharmony_ci 1958bf215546Sopenharmony_ci assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); 1959bf215546Sopenharmony_ci 1960bf215546Sopenharmony_ci pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 1961bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1962bf215546Sopenharmony_ci if (pipeline == NULL) 1963bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1964bf215546Sopenharmony_ci 1965bf215546Sopenharmony_ci result = anv_pipeline_init(&pipeline->base, device, 1966bf215546Sopenharmony_ci ANV_PIPELINE_COMPUTE, pCreateInfo->flags, 1967bf215546Sopenharmony_ci pAllocator); 1968bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1969bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pipeline); 1970bf215546Sopenharmony_ci return result; 1971bf215546Sopenharmony_ci } 1972bf215546Sopenharmony_ci 1973bf215546Sopenharmony_ci anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS, 1974bf215546Sopenharmony_ci pipeline->batch_data, sizeof(pipeline->batch_data)); 1975bf215546Sopenharmony_ci 1976bf215546Sopenharmony_ci result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo); 1977bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1978bf215546Sopenharmony_ci anv_pipeline_finish(&pipeline->base, device, pAllocator); 1979bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pipeline); 1980bf215546Sopenharmony_ci return result; 1981bf215546Sopenharmony_ci } 1982bf215546Sopenharmony_ci 1983bf215546Sopenharmony_ci anv_genX(&device->info, compute_pipeline_emit)(pipeline); 1984bf215546Sopenharmony_ci 1985bf215546Sopenharmony_ci *pPipeline = anv_pipeline_to_handle(&pipeline->base); 1986bf215546Sopenharmony_ci 1987bf215546Sopenharmony_ci return pipeline->base.batch.status; 1988bf215546Sopenharmony_ci} 1989bf215546Sopenharmony_ci 1990bf215546Sopenharmony_ciVkResult anv_CreateComputePipelines( 1991bf215546Sopenharmony_ci VkDevice _device, 1992bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 1993bf215546Sopenharmony_ci uint32_t count, 1994bf215546Sopenharmony_ci const VkComputePipelineCreateInfo* pCreateInfos, 1995bf215546Sopenharmony_ci const VkAllocationCallbacks* pAllocator, 1996bf215546Sopenharmony_ci VkPipeline* pPipelines) 1997bf215546Sopenharmony_ci{ 1998bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_device, device, _device); 1999bf215546Sopenharmony_ci ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); 2000bf215546Sopenharmony_ci 2001bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 2002bf215546Sopenharmony_ci 2003bf215546Sopenharmony_ci unsigned i; 2004bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 2005bf215546Sopenharmony_ci VkResult res = anv_compute_pipeline_create(device, pipeline_cache, 2006bf215546Sopenharmony_ci &pCreateInfos[i], 2007bf215546Sopenharmony_ci pAllocator, &pPipelines[i]); 2008bf215546Sopenharmony_ci 2009bf215546Sopenharmony_ci if (res == VK_SUCCESS) 2010bf215546Sopenharmony_ci continue; 2011bf215546Sopenharmony_ci 2012bf215546Sopenharmony_ci /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it 2013bf215546Sopenharmony_ci * is not obvious what error should be report upon 2 different failures. 2014bf215546Sopenharmony_ci * */ 2015bf215546Sopenharmony_ci result = res; 2016bf215546Sopenharmony_ci if (res != VK_PIPELINE_COMPILE_REQUIRED) 2017bf215546Sopenharmony_ci break; 2018bf215546Sopenharmony_ci 2019bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 2020bf215546Sopenharmony_ci 2021bf215546Sopenharmony_ci if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 2022bf215546Sopenharmony_ci break; 2023bf215546Sopenharmony_ci } 2024bf215546Sopenharmony_ci 2025bf215546Sopenharmony_ci for (; i < count; i++) 2026bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 2027bf215546Sopenharmony_ci 2028bf215546Sopenharmony_ci return result; 2029bf215546Sopenharmony_ci} 2030bf215546Sopenharmony_ci 2031bf215546Sopenharmony_ci/** 2032bf215546Sopenharmony_ci * Calculate the desired L3 partitioning based on the current state of the 2033bf215546Sopenharmony_ci * pipeline. For now this simply returns the conservative defaults calculated 2034bf215546Sopenharmony_ci * by get_default_l3_weights(), but we could probably do better by gathering 2035bf215546Sopenharmony_ci * more statistics from the pipeline state (e.g. guess of expected URB usage 2036bf215546Sopenharmony_ci * and bound surfaces), or by using feed-back from performance counters. 2037bf215546Sopenharmony_ci */ 2038bf215546Sopenharmony_civoid 2039bf215546Sopenharmony_cianv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm) 2040bf215546Sopenharmony_ci{ 2041bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &pipeline->device->info; 2042bf215546Sopenharmony_ci 2043bf215546Sopenharmony_ci const struct intel_l3_weights w = 2044bf215546Sopenharmony_ci intel_get_default_l3_weights(devinfo, true, needs_slm); 2045bf215546Sopenharmony_ci 2046bf215546Sopenharmony_ci pipeline->l3_config = intel_get_l3_config(devinfo, w); 2047bf215546Sopenharmony_ci} 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_cistatic VkResult 2050bf215546Sopenharmony_cianv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, 2051bf215546Sopenharmony_ci struct anv_device *device, 2052bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 2053bf215546Sopenharmony_ci const struct VkGraphicsPipelineCreateInfo *pCreateInfo, 2054bf215546Sopenharmony_ci const struct vk_graphics_pipeline_state *state, 2055bf215546Sopenharmony_ci const VkAllocationCallbacks *alloc) 2056bf215546Sopenharmony_ci{ 2057bf215546Sopenharmony_ci VkResult result; 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci result = anv_pipeline_init(&pipeline->base, device, 2060bf215546Sopenharmony_ci ANV_PIPELINE_GRAPHICS, pCreateInfo->flags, 2061bf215546Sopenharmony_ci alloc); 2062bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2063bf215546Sopenharmony_ci return result; 2064bf215546Sopenharmony_ci 2065bf215546Sopenharmony_ci anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS, 2066bf215546Sopenharmony_ci pipeline->batch_data, sizeof(pipeline->batch_data)); 2067bf215546Sopenharmony_ci 2068bf215546Sopenharmony_ci pipeline->active_stages = 0; 2069bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) 2070bf215546Sopenharmony_ci pipeline->active_stages |= pCreateInfo->pStages[i].stage; 2071bf215546Sopenharmony_ci 2072bf215546Sopenharmony_ci if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) 2073bf215546Sopenharmony_ci pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; 2074bf215546Sopenharmony_ci 2075bf215546Sopenharmony_ci if (anv_pipeline_is_mesh(pipeline)) 2076bf215546Sopenharmony_ci assert(device->physical->vk.supported_extensions.NV_mesh_shader); 2077bf215546Sopenharmony_ci 2078bf215546Sopenharmony_ci pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations; 2079bf215546Sopenharmony_ci vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state); 2080bf215546Sopenharmony_ci 2081bf215546Sopenharmony_ci pipeline->depth_clamp_enable = state->rs->depth_clamp_enable; 2082bf215546Sopenharmony_ci pipeline->depth_clip_enable = state->rs->depth_clip_enable; 2083bf215546Sopenharmony_ci pipeline->view_mask = state->rp->view_mask; 2084bf215546Sopenharmony_ci 2085bf215546Sopenharmony_ci result = anv_graphics_pipeline_compile(pipeline, cache, pCreateInfo, state); 2086bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2087bf215546Sopenharmony_ci anv_pipeline_finish(&pipeline->base, device, alloc); 2088bf215546Sopenharmony_ci return result; 2089bf215546Sopenharmony_ci } 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_ci anv_pipeline_setup_l3_config(&pipeline->base, false); 2092bf215546Sopenharmony_ci 2093bf215546Sopenharmony_ci if (anv_pipeline_is_primitive(pipeline)) { 2094bf215546Sopenharmony_ci const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read; 2095bf215546Sopenharmony_ci 2096bf215546Sopenharmony_ci u_foreach_bit(a, state->vi->attributes_valid) { 2097bf215546Sopenharmony_ci if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a)) 2098bf215546Sopenharmony_ci pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding); 2099bf215546Sopenharmony_ci } 2100bf215546Sopenharmony_ci 2101bf215546Sopenharmony_ci u_foreach_bit(b, state->vi->bindings_valid) { 2102bf215546Sopenharmony_ci pipeline->vb[b].stride = state->vi->bindings[b].stride; 2103bf215546Sopenharmony_ci pipeline->vb[b].instanced = state->vi->bindings[b].input_rate == 2104bf215546Sopenharmony_ci VK_VERTEX_INPUT_RATE_INSTANCE; 2105bf215546Sopenharmony_ci pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor; 2106bf215546Sopenharmony_ci } 2107bf215546Sopenharmony_ci 2108bf215546Sopenharmony_ci /* Our implementation of VK_KHR_multiview uses instancing to draw the 2109bf215546Sopenharmony_ci * different views. If the client asks for instancing, we need to multiply 2110bf215546Sopenharmony_ci * the instance divisor by the number of views ensure that we repeat the 2111bf215546Sopenharmony_ci * client's per-instance data once for each view. 2112bf215546Sopenharmony_ci */ 2113bf215546Sopenharmony_ci pipeline->instance_multiplier = 1; 2114bf215546Sopenharmony_ci if (pipeline->view_mask && !pipeline->use_primitive_replication) 2115bf215546Sopenharmony_ci pipeline->instance_multiplier = util_bitcount(pipeline->view_mask); 2116bf215546Sopenharmony_ci } else { 2117bf215546Sopenharmony_ci assert(anv_pipeline_is_mesh(pipeline)); 2118bf215546Sopenharmony_ci /* TODO(mesh): Mesh vs. Multiview with Instancing. */ 2119bf215546Sopenharmony_ci } 2120bf215546Sopenharmony_ci 2121bf215546Sopenharmony_ci pipeline->negative_one_to_one = 2122bf215546Sopenharmony_ci state->vp != NULL && state->vp->negative_one_to_one; 2123bf215546Sopenharmony_ci 2124bf215546Sopenharmony_ci /* Store line mode, polygon mode and rasterization samples, these are used 2125bf215546Sopenharmony_ci * for dynamic primitive topology. 2126bf215546Sopenharmony_ci */ 2127bf215546Sopenharmony_ci pipeline->polygon_mode = state->rs->polygon_mode; 2128bf215546Sopenharmony_ci pipeline->rasterization_samples = 2129bf215546Sopenharmony_ci state->ms != NULL ? state->ms->rasterization_samples : 1; 2130bf215546Sopenharmony_ci pipeline->line_mode = state->rs->line.mode; 2131bf215546Sopenharmony_ci if (pipeline->line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) { 2132bf215546Sopenharmony_ci if (pipeline->rasterization_samples > 1) { 2133bf215546Sopenharmony_ci pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; 2134bf215546Sopenharmony_ci } else { 2135bf215546Sopenharmony_ci pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; 2136bf215546Sopenharmony_ci } 2137bf215546Sopenharmony_ci } 2138bf215546Sopenharmony_ci pipeline->patch_control_points = 2139bf215546Sopenharmony_ci state->ts != NULL ? state->ts->patch_control_points : 0; 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_ci /* Store the color write masks, to be merged with color write enable if 2142bf215546Sopenharmony_ci * dynamic. 2143bf215546Sopenharmony_ci */ 2144bf215546Sopenharmony_ci if (state->cb != NULL) { 2145bf215546Sopenharmony_ci for (unsigned i = 0; i < state->cb->attachment_count; i++) 2146bf215546Sopenharmony_ci pipeline->color_comp_writes[i] = state->cb->attachments[i].write_mask; 2147bf215546Sopenharmony_ci } 2148bf215546Sopenharmony_ci 2149bf215546Sopenharmony_ci return VK_SUCCESS; 2150bf215546Sopenharmony_ci} 2151bf215546Sopenharmony_ci 2152bf215546Sopenharmony_cistatic VkResult 2153bf215546Sopenharmony_cianv_graphics_pipeline_create(struct anv_device *device, 2154bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 2155bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 2156bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 2157bf215546Sopenharmony_ci VkPipeline *pPipeline) 2158bf215546Sopenharmony_ci{ 2159bf215546Sopenharmony_ci struct anv_graphics_pipeline *pipeline; 2160bf215546Sopenharmony_ci VkResult result; 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_ci assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); 2163bf215546Sopenharmony_ci 2164bf215546Sopenharmony_ci pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 2165bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2166bf215546Sopenharmony_ci if (pipeline == NULL) 2167bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2168bf215546Sopenharmony_ci 2169bf215546Sopenharmony_ci struct vk_graphics_pipeline_all_state all; 2170bf215546Sopenharmony_ci struct vk_graphics_pipeline_state state = { }; 2171bf215546Sopenharmony_ci result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo, 2172bf215546Sopenharmony_ci NULL /* sp_info */, 2173bf215546Sopenharmony_ci &all, NULL, 0, NULL); 2174bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2175bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pipeline); 2176bf215546Sopenharmony_ci return result; 2177bf215546Sopenharmony_ci } 2178bf215546Sopenharmony_ci 2179bf215546Sopenharmony_ci result = anv_graphics_pipeline_init(pipeline, device, cache, 2180bf215546Sopenharmony_ci pCreateInfo, &state, pAllocator); 2181bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2182bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pipeline); 2183bf215546Sopenharmony_ci return result; 2184bf215546Sopenharmony_ci } 2185bf215546Sopenharmony_ci 2186bf215546Sopenharmony_ci anv_genX(&device->info, graphics_pipeline_emit)(pipeline, &state); 2187bf215546Sopenharmony_ci 2188bf215546Sopenharmony_ci *pPipeline = anv_pipeline_to_handle(&pipeline->base); 2189bf215546Sopenharmony_ci 2190bf215546Sopenharmony_ci return pipeline->base.batch.status; 2191bf215546Sopenharmony_ci} 2192bf215546Sopenharmony_ci 2193bf215546Sopenharmony_ciVkResult anv_CreateGraphicsPipelines( 2194bf215546Sopenharmony_ci VkDevice _device, 2195bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 2196bf215546Sopenharmony_ci uint32_t count, 2197bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo* pCreateInfos, 2198bf215546Sopenharmony_ci const VkAllocationCallbacks* pAllocator, 2199bf215546Sopenharmony_ci VkPipeline* pPipelines) 2200bf215546Sopenharmony_ci{ 2201bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_device, device, _device); 2202bf215546Sopenharmony_ci ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); 2203bf215546Sopenharmony_ci 2204bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 2205bf215546Sopenharmony_ci 2206bf215546Sopenharmony_ci unsigned i; 2207bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 2208bf215546Sopenharmony_ci VkResult res = anv_graphics_pipeline_create(device, 2209bf215546Sopenharmony_ci pipeline_cache, 2210bf215546Sopenharmony_ci &pCreateInfos[i], 2211bf215546Sopenharmony_ci pAllocator, &pPipelines[i]); 2212bf215546Sopenharmony_ci 2213bf215546Sopenharmony_ci if (res == VK_SUCCESS) 2214bf215546Sopenharmony_ci continue; 2215bf215546Sopenharmony_ci 2216bf215546Sopenharmony_ci /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it 2217bf215546Sopenharmony_ci * is not obvious what error should be report upon 2 different failures. 2218bf215546Sopenharmony_ci * */ 2219bf215546Sopenharmony_ci result = res; 2220bf215546Sopenharmony_ci if (res != VK_PIPELINE_COMPILE_REQUIRED) 2221bf215546Sopenharmony_ci break; 2222bf215546Sopenharmony_ci 2223bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 2224bf215546Sopenharmony_ci 2225bf215546Sopenharmony_ci if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 2226bf215546Sopenharmony_ci break; 2227bf215546Sopenharmony_ci } 2228bf215546Sopenharmony_ci 2229bf215546Sopenharmony_ci for (; i < count; i++) 2230bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 2231bf215546Sopenharmony_ci 2232bf215546Sopenharmony_ci return result; 2233bf215546Sopenharmony_ci} 2234bf215546Sopenharmony_ci 2235bf215546Sopenharmony_cistatic VkResult 2236bf215546Sopenharmony_cicompile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, 2237bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 2238bf215546Sopenharmony_ci nir_shader *nir, 2239bf215546Sopenharmony_ci struct anv_pipeline_stage *stage, 2240bf215546Sopenharmony_ci struct anv_shader_bin **shader_out, 2241bf215546Sopenharmony_ci void *mem_ctx) 2242bf215546Sopenharmony_ci{ 2243bf215546Sopenharmony_ci const struct brw_compiler *compiler = 2244bf215546Sopenharmony_ci pipeline->base.device->physical->compiler; 2245bf215546Sopenharmony_ci const struct intel_device_info *devinfo = compiler->devinfo; 2246bf215546Sopenharmony_ci 2247bf215546Sopenharmony_ci nir_shader **resume_shaders = NULL; 2248bf215546Sopenharmony_ci uint32_t num_resume_shaders = 0; 2249bf215546Sopenharmony_ci if (nir->info.stage != MESA_SHADER_COMPUTE) { 2250bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_shader_calls, 2251bf215546Sopenharmony_ci nir_address_format_64bit_global, 2252bf215546Sopenharmony_ci BRW_BTD_STACK_ALIGN, 2253bf215546Sopenharmony_ci &resume_shaders, &num_resume_shaders, mem_ctx); 2254bf215546Sopenharmony_ci NIR_PASS(_, nir, brw_nir_lower_shader_calls); 2255bf215546Sopenharmony_ci NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo); 2256bf215546Sopenharmony_ci } 2257bf215546Sopenharmony_ci 2258bf215546Sopenharmony_ci for (unsigned i = 0; i < num_resume_shaders; i++) { 2259bf215546Sopenharmony_ci NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls); 2260bf215546Sopenharmony_ci NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo); 2261bf215546Sopenharmony_ci } 2262bf215546Sopenharmony_ci 2263bf215546Sopenharmony_ci struct brw_compile_bs_params params = { 2264bf215546Sopenharmony_ci .nir = nir, 2265bf215546Sopenharmony_ci .key = &stage->key.bs, 2266bf215546Sopenharmony_ci .prog_data = &stage->prog_data.bs, 2267bf215546Sopenharmony_ci .num_resume_shaders = num_resume_shaders, 2268bf215546Sopenharmony_ci .resume_shaders = resume_shaders, 2269bf215546Sopenharmony_ci 2270bf215546Sopenharmony_ci .stats = stage->stats, 2271bf215546Sopenharmony_ci .log_data = pipeline->base.device, 2272bf215546Sopenharmony_ci }; 2273bf215546Sopenharmony_ci 2274bf215546Sopenharmony_ci stage->code = brw_compile_bs(compiler, mem_ctx, ¶ms); 2275bf215546Sopenharmony_ci if (stage->code == NULL) 2276bf215546Sopenharmony_ci return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2277bf215546Sopenharmony_ci 2278bf215546Sopenharmony_ci /* Ray-tracing shaders don't have a "real" bind map */ 2279bf215546Sopenharmony_ci struct anv_pipeline_bind_map empty_bind_map = {}; 2280bf215546Sopenharmony_ci 2281bf215546Sopenharmony_ci const unsigned code_size = stage->prog_data.base.program_size; 2282bf215546Sopenharmony_ci struct anv_shader_bin *bin = 2283bf215546Sopenharmony_ci anv_device_upload_kernel(pipeline->base.device, 2284bf215546Sopenharmony_ci cache, 2285bf215546Sopenharmony_ci stage->stage, 2286bf215546Sopenharmony_ci &stage->cache_key, sizeof(stage->cache_key), 2287bf215546Sopenharmony_ci stage->code, code_size, 2288bf215546Sopenharmony_ci &stage->prog_data.base, 2289bf215546Sopenharmony_ci sizeof(stage->prog_data.bs), 2290bf215546Sopenharmony_ci stage->stats, 1, 2291bf215546Sopenharmony_ci NULL, &empty_bind_map); 2292bf215546Sopenharmony_ci if (bin == NULL) 2293bf215546Sopenharmony_ci return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2294bf215546Sopenharmony_ci 2295bf215546Sopenharmony_ci /* TODO: Figure out executables for resume shaders */ 2296bf215546Sopenharmony_ci anv_pipeline_add_executables(&pipeline->base, stage, bin); 2297bf215546Sopenharmony_ci util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin); 2298bf215546Sopenharmony_ci 2299bf215546Sopenharmony_ci *shader_out = bin; 2300bf215546Sopenharmony_ci 2301bf215546Sopenharmony_ci return VK_SUCCESS; 2302bf215546Sopenharmony_ci} 2303bf215546Sopenharmony_ci 2304bf215546Sopenharmony_cistatic bool 2305bf215546Sopenharmony_ciis_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info) 2306bf215546Sopenharmony_ci{ 2307bf215546Sopenharmony_ci if (info->pDynamicState == NULL) 2308bf215546Sopenharmony_ci return false; 2309bf215546Sopenharmony_ci 2310bf215546Sopenharmony_ci for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) { 2311bf215546Sopenharmony_ci if (info->pDynamicState->pDynamicStates[i] == 2312bf215546Sopenharmony_ci VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) 2313bf215546Sopenharmony_ci return true; 2314bf215546Sopenharmony_ci } 2315bf215546Sopenharmony_ci 2316bf215546Sopenharmony_ci return false; 2317bf215546Sopenharmony_ci} 2318bf215546Sopenharmony_ci 2319bf215546Sopenharmony_cistatic void 2320bf215546Sopenharmony_cianv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline, 2321bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *info, 2322bf215546Sopenharmony_ci uint32_t *stack_max) 2323bf215546Sopenharmony_ci{ 2324bf215546Sopenharmony_ci if (is_rt_stack_size_dynamic(info)) { 2325bf215546Sopenharmony_ci pipeline->stack_size = 0; /* 0 means dynamic */ 2326bf215546Sopenharmony_ci } else { 2327bf215546Sopenharmony_ci /* From the Vulkan spec: 2328bf215546Sopenharmony_ci * 2329bf215546Sopenharmony_ci * "If the stack size is not set explicitly, the stack size for a 2330bf215546Sopenharmony_ci * pipeline is: 2331bf215546Sopenharmony_ci * 2332bf215546Sopenharmony_ci * rayGenStackMax + 2333bf215546Sopenharmony_ci * min(1, maxPipelineRayRecursionDepth) × 2334bf215546Sopenharmony_ci * max(closestHitStackMax, missStackMax, 2335bf215546Sopenharmony_ci * intersectionStackMax + anyHitStackMax) + 2336bf215546Sopenharmony_ci * max(0, maxPipelineRayRecursionDepth-1) × 2337bf215546Sopenharmony_ci * max(closestHitStackMax, missStackMax) + 2338bf215546Sopenharmony_ci * 2 × callableStackMax" 2339bf215546Sopenharmony_ci */ 2340bf215546Sopenharmony_ci pipeline->stack_size = 2341bf215546Sopenharmony_ci stack_max[MESA_SHADER_RAYGEN] + 2342bf215546Sopenharmony_ci MIN2(1, info->maxPipelineRayRecursionDepth) * 2343bf215546Sopenharmony_ci MAX4(stack_max[MESA_SHADER_CLOSEST_HIT], 2344bf215546Sopenharmony_ci stack_max[MESA_SHADER_MISS], 2345bf215546Sopenharmony_ci stack_max[MESA_SHADER_INTERSECTION], 2346bf215546Sopenharmony_ci stack_max[MESA_SHADER_ANY_HIT]) + 2347bf215546Sopenharmony_ci MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) * 2348bf215546Sopenharmony_ci MAX2(stack_max[MESA_SHADER_CLOSEST_HIT], 2349bf215546Sopenharmony_ci stack_max[MESA_SHADER_MISS]) + 2350bf215546Sopenharmony_ci 2 * stack_max[MESA_SHADER_CALLABLE]; 2351bf215546Sopenharmony_ci 2352bf215546Sopenharmony_ci /* This is an extremely unlikely case but we need to set it to some 2353bf215546Sopenharmony_ci * non-zero value so that we don't accidentally think it's dynamic. 2354bf215546Sopenharmony_ci * Our minimum stack size is 2KB anyway so we could set to any small 2355bf215546Sopenharmony_ci * value we like. 2356bf215546Sopenharmony_ci */ 2357bf215546Sopenharmony_ci if (pipeline->stack_size == 0) 2358bf215546Sopenharmony_ci pipeline->stack_size = 1; 2359bf215546Sopenharmony_ci } 2360bf215546Sopenharmony_ci} 2361bf215546Sopenharmony_ci 2362bf215546Sopenharmony_cistatic struct anv_pipeline_stage * 2363bf215546Sopenharmony_cianv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, 2364bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *info, 2365bf215546Sopenharmony_ci void *pipeline_ctx) 2366bf215546Sopenharmony_ci{ 2367bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 2368bf215546Sopenharmony_ci 2369bf215546Sopenharmony_ci /* Create enough stage entries for all shader modules plus potential 2370bf215546Sopenharmony_ci * combinaisons in the groups. 2371bf215546Sopenharmony_ci */ 2372bf215546Sopenharmony_ci struct anv_pipeline_stage *stages = 2373bf215546Sopenharmony_ci rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount); 2374bf215546Sopenharmony_ci 2375bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->stageCount; i++) { 2376bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i]; 2377bf215546Sopenharmony_ci if (vk_pipeline_shader_stage_is_null(sinfo)) 2378bf215546Sopenharmony_ci continue; 2379bf215546Sopenharmony_ci 2380bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 2381bf215546Sopenharmony_ci 2382bf215546Sopenharmony_ci stages[i] = (struct anv_pipeline_stage) { 2383bf215546Sopenharmony_ci .stage = vk_to_mesa_shader_stage(sinfo->stage), 2384bf215546Sopenharmony_ci .info = sinfo, 2385bf215546Sopenharmony_ci .cache_key = { 2386bf215546Sopenharmony_ci .stage = vk_to_mesa_shader_stage(sinfo->stage), 2387bf215546Sopenharmony_ci }, 2388bf215546Sopenharmony_ci .feedback = { 2389bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 2390bf215546Sopenharmony_ci }, 2391bf215546Sopenharmony_ci }; 2392bf215546Sopenharmony_ci 2393bf215546Sopenharmony_ci populate_bs_prog_key(pipeline->base.device, 2394bf215546Sopenharmony_ci pipeline->base.device->robust_buffer_access, 2395bf215546Sopenharmony_ci &stages[i].key.bs); 2396bf215546Sopenharmony_ci 2397bf215546Sopenharmony_ci vk_pipeline_hash_shader_stage(sinfo, stages[i].shader_sha1); 2398bf215546Sopenharmony_ci 2399bf215546Sopenharmony_ci if (stages[i].stage != MESA_SHADER_INTERSECTION) { 2400bf215546Sopenharmony_ci anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i], 2401bf215546Sopenharmony_ci stages[i].cache_key.sha1); 2402bf215546Sopenharmony_ci } 2403bf215546Sopenharmony_ci 2404bf215546Sopenharmony_ci stages[i].feedback.duration += os_time_get_nano() - stage_start; 2405bf215546Sopenharmony_ci } 2406bf215546Sopenharmony_ci 2407bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->groupCount; i++) { 2408bf215546Sopenharmony_ci const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; 2409bf215546Sopenharmony_ci 2410bf215546Sopenharmony_ci if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR) 2411bf215546Sopenharmony_ci continue; 2412bf215546Sopenharmony_ci 2413bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 2414bf215546Sopenharmony_ci 2415bf215546Sopenharmony_ci uint32_t intersection_idx = ginfo->intersectionShader; 2416bf215546Sopenharmony_ci assert(intersection_idx < info->stageCount); 2417bf215546Sopenharmony_ci 2418bf215546Sopenharmony_ci uint32_t any_hit_idx = ginfo->anyHitShader; 2419bf215546Sopenharmony_ci if (any_hit_idx != VK_SHADER_UNUSED_KHR) { 2420bf215546Sopenharmony_ci assert(any_hit_idx < info->stageCount); 2421bf215546Sopenharmony_ci anv_pipeline_hash_ray_tracing_combined_shader(pipeline, 2422bf215546Sopenharmony_ci layout, 2423bf215546Sopenharmony_ci &stages[intersection_idx], 2424bf215546Sopenharmony_ci &stages[any_hit_idx], 2425bf215546Sopenharmony_ci stages[intersection_idx].cache_key.sha1); 2426bf215546Sopenharmony_ci } else { 2427bf215546Sopenharmony_ci anv_pipeline_hash_ray_tracing_shader(pipeline, layout, 2428bf215546Sopenharmony_ci &stages[intersection_idx], 2429bf215546Sopenharmony_ci stages[intersection_idx].cache_key.sha1); 2430bf215546Sopenharmony_ci } 2431bf215546Sopenharmony_ci 2432bf215546Sopenharmony_ci stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start; 2433bf215546Sopenharmony_ci } 2434bf215546Sopenharmony_ci 2435bf215546Sopenharmony_ci return stages; 2436bf215546Sopenharmony_ci} 2437bf215546Sopenharmony_ci 2438bf215546Sopenharmony_cistatic bool 2439bf215546Sopenharmony_cianv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, 2440bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 2441bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *info, 2442bf215546Sopenharmony_ci struct anv_pipeline_stage *stages, 2443bf215546Sopenharmony_ci uint32_t *stack_max) 2444bf215546Sopenharmony_ci{ 2445bf215546Sopenharmony_ci uint32_t shaders = 0, cache_hits = 0; 2446bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->stageCount; i++) { 2447bf215546Sopenharmony_ci if (stages[i].info == NULL) 2448bf215546Sopenharmony_ci continue; 2449bf215546Sopenharmony_ci 2450bf215546Sopenharmony_ci shaders++; 2451bf215546Sopenharmony_ci 2452bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 2453bf215546Sopenharmony_ci 2454bf215546Sopenharmony_ci bool cache_hit; 2455bf215546Sopenharmony_ci stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache, 2456bf215546Sopenharmony_ci &stages[i].cache_key, 2457bf215546Sopenharmony_ci sizeof(stages[i].cache_key), 2458bf215546Sopenharmony_ci &cache_hit); 2459bf215546Sopenharmony_ci if (cache_hit) { 2460bf215546Sopenharmony_ci cache_hits++; 2461bf215546Sopenharmony_ci stages[i].feedback.flags |= 2462bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 2463bf215546Sopenharmony_ci } 2464bf215546Sopenharmony_ci 2465bf215546Sopenharmony_ci if (stages[i].bin != NULL) { 2466bf215546Sopenharmony_ci anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin); 2467bf215546Sopenharmony_ci util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin); 2468bf215546Sopenharmony_ci 2469bf215546Sopenharmony_ci uint32_t stack_size = 2470bf215546Sopenharmony_ci brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; 2471bf215546Sopenharmony_ci stack_max[stages[i].stage] = 2472bf215546Sopenharmony_ci MAX2(stack_max[stages[i].stage], stack_size); 2473bf215546Sopenharmony_ci } 2474bf215546Sopenharmony_ci 2475bf215546Sopenharmony_ci stages[i].feedback.duration += os_time_get_nano() - stage_start; 2476bf215546Sopenharmony_ci } 2477bf215546Sopenharmony_ci 2478bf215546Sopenharmony_ci return cache_hits == shaders; 2479bf215546Sopenharmony_ci} 2480bf215546Sopenharmony_ci 2481bf215546Sopenharmony_cistatic VkResult 2482bf215546Sopenharmony_cianv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline, 2483bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 2484bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *info) 2485bf215546Sopenharmony_ci{ 2486bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &pipeline->base.device->info; 2487bf215546Sopenharmony_ci VkResult result; 2488bf215546Sopenharmony_ci 2489bf215546Sopenharmony_ci VkPipelineCreationFeedback pipeline_feedback = { 2490bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 2491bf215546Sopenharmony_ci }; 2492bf215546Sopenharmony_ci int64_t pipeline_start = os_time_get_nano(); 2493bf215546Sopenharmony_ci 2494bf215546Sopenharmony_ci void *pipeline_ctx = ralloc_context(NULL); 2495bf215546Sopenharmony_ci 2496bf215546Sopenharmony_ci struct anv_pipeline_stage *stages = 2497bf215546Sopenharmony_ci anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx); 2498bf215546Sopenharmony_ci 2499bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 2500bf215546Sopenharmony_ci 2501bf215546Sopenharmony_ci const bool skip_cache_lookup = 2502bf215546Sopenharmony_ci (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 2503bf215546Sopenharmony_ci 2504bf215546Sopenharmony_ci uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {}; 2505bf215546Sopenharmony_ci 2506bf215546Sopenharmony_ci if (!skip_cache_lookup && 2507bf215546Sopenharmony_ci anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) { 2508bf215546Sopenharmony_ci pipeline_feedback.flags |= 2509bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 2510bf215546Sopenharmony_ci goto done; 2511bf215546Sopenharmony_ci } 2512bf215546Sopenharmony_ci 2513bf215546Sopenharmony_ci if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { 2514bf215546Sopenharmony_ci ralloc_free(pipeline_ctx); 2515bf215546Sopenharmony_ci return VK_PIPELINE_COMPILE_REQUIRED; 2516bf215546Sopenharmony_ci } 2517bf215546Sopenharmony_ci 2518bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->stageCount; i++) { 2519bf215546Sopenharmony_ci if (stages[i].info == NULL) 2520bf215546Sopenharmony_ci continue; 2521bf215546Sopenharmony_ci 2522bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 2523bf215546Sopenharmony_ci 2524bf215546Sopenharmony_ci stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, 2525bf215546Sopenharmony_ci pipeline_ctx, &stages[i]); 2526bf215546Sopenharmony_ci if (stages[i].nir == NULL) { 2527bf215546Sopenharmony_ci ralloc_free(pipeline_ctx); 2528bf215546Sopenharmony_ci return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2529bf215546Sopenharmony_ci } 2530bf215546Sopenharmony_ci 2531bf215546Sopenharmony_ci anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout); 2532bf215546Sopenharmony_ci 2533bf215546Sopenharmony_ci stages[i].feedback.duration += os_time_get_nano() - stage_start; 2534bf215546Sopenharmony_ci } 2535bf215546Sopenharmony_ci 2536bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->stageCount; i++) { 2537bf215546Sopenharmony_ci if (stages[i].info == NULL) 2538bf215546Sopenharmony_ci continue; 2539bf215546Sopenharmony_ci 2540bf215546Sopenharmony_ci /* Shader found in cache already. */ 2541bf215546Sopenharmony_ci if (stages[i].bin != NULL) 2542bf215546Sopenharmony_ci continue; 2543bf215546Sopenharmony_ci 2544bf215546Sopenharmony_ci /* We handle intersection shaders as part of the group */ 2545bf215546Sopenharmony_ci if (stages[i].stage == MESA_SHADER_INTERSECTION) 2546bf215546Sopenharmony_ci continue; 2547bf215546Sopenharmony_ci 2548bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 2549bf215546Sopenharmony_ci 2550bf215546Sopenharmony_ci void *stage_ctx = ralloc_context(pipeline_ctx); 2551bf215546Sopenharmony_ci 2552bf215546Sopenharmony_ci nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir); 2553bf215546Sopenharmony_ci switch (stages[i].stage) { 2554bf215546Sopenharmony_ci case MESA_SHADER_RAYGEN: 2555bf215546Sopenharmony_ci brw_nir_lower_raygen(nir); 2556bf215546Sopenharmony_ci break; 2557bf215546Sopenharmony_ci 2558bf215546Sopenharmony_ci case MESA_SHADER_ANY_HIT: 2559bf215546Sopenharmony_ci brw_nir_lower_any_hit(nir, devinfo); 2560bf215546Sopenharmony_ci break; 2561bf215546Sopenharmony_ci 2562bf215546Sopenharmony_ci case MESA_SHADER_CLOSEST_HIT: 2563bf215546Sopenharmony_ci brw_nir_lower_closest_hit(nir); 2564bf215546Sopenharmony_ci break; 2565bf215546Sopenharmony_ci 2566bf215546Sopenharmony_ci case MESA_SHADER_MISS: 2567bf215546Sopenharmony_ci brw_nir_lower_miss(nir); 2568bf215546Sopenharmony_ci break; 2569bf215546Sopenharmony_ci 2570bf215546Sopenharmony_ci case MESA_SHADER_INTERSECTION: 2571bf215546Sopenharmony_ci unreachable("These are handled later"); 2572bf215546Sopenharmony_ci 2573bf215546Sopenharmony_ci case MESA_SHADER_CALLABLE: 2574bf215546Sopenharmony_ci brw_nir_lower_callable(nir); 2575bf215546Sopenharmony_ci break; 2576bf215546Sopenharmony_ci 2577bf215546Sopenharmony_ci default: 2578bf215546Sopenharmony_ci unreachable("Invalid ray-tracing shader stage"); 2579bf215546Sopenharmony_ci } 2580bf215546Sopenharmony_ci 2581bf215546Sopenharmony_ci result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i], 2582bf215546Sopenharmony_ci &stages[i].bin, stage_ctx); 2583bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2584bf215546Sopenharmony_ci ralloc_free(pipeline_ctx); 2585bf215546Sopenharmony_ci return result; 2586bf215546Sopenharmony_ci } 2587bf215546Sopenharmony_ci 2588bf215546Sopenharmony_ci uint32_t stack_size = 2589bf215546Sopenharmony_ci brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; 2590bf215546Sopenharmony_ci stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size); 2591bf215546Sopenharmony_ci 2592bf215546Sopenharmony_ci ralloc_free(stage_ctx); 2593bf215546Sopenharmony_ci 2594bf215546Sopenharmony_ci stages[i].feedback.duration += os_time_get_nano() - stage_start; 2595bf215546Sopenharmony_ci } 2596bf215546Sopenharmony_ci 2597bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->groupCount; i++) { 2598bf215546Sopenharmony_ci const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; 2599bf215546Sopenharmony_ci struct anv_rt_shader_group *group = &pipeline->groups[i]; 2600bf215546Sopenharmony_ci group->type = ginfo->type; 2601bf215546Sopenharmony_ci switch (ginfo->type) { 2602bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 2603bf215546Sopenharmony_ci assert(ginfo->generalShader < info->stageCount); 2604bf215546Sopenharmony_ci group->general = stages[ginfo->generalShader].bin; 2605bf215546Sopenharmony_ci break; 2606bf215546Sopenharmony_ci 2607bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 2608bf215546Sopenharmony_ci if (ginfo->anyHitShader < info->stageCount) 2609bf215546Sopenharmony_ci group->any_hit = stages[ginfo->anyHitShader].bin; 2610bf215546Sopenharmony_ci 2611bf215546Sopenharmony_ci if (ginfo->closestHitShader < info->stageCount) 2612bf215546Sopenharmony_ci group->closest_hit = stages[ginfo->closestHitShader].bin; 2613bf215546Sopenharmony_ci break; 2614bf215546Sopenharmony_ci 2615bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: { 2616bf215546Sopenharmony_ci if (ginfo->closestHitShader < info->stageCount) 2617bf215546Sopenharmony_ci group->closest_hit = stages[ginfo->closestHitShader].bin; 2618bf215546Sopenharmony_ci 2619bf215546Sopenharmony_ci uint32_t intersection_idx = info->pGroups[i].intersectionShader; 2620bf215546Sopenharmony_ci assert(intersection_idx < info->stageCount); 2621bf215546Sopenharmony_ci 2622bf215546Sopenharmony_ci /* Only compile this stage if not already found in the cache. */ 2623bf215546Sopenharmony_ci if (stages[intersection_idx].bin == NULL) { 2624bf215546Sopenharmony_ci /* The any-hit and intersection shader have to be combined */ 2625bf215546Sopenharmony_ci uint32_t any_hit_idx = info->pGroups[i].anyHitShader; 2626bf215546Sopenharmony_ci const nir_shader *any_hit = NULL; 2627bf215546Sopenharmony_ci if (any_hit_idx < info->stageCount) 2628bf215546Sopenharmony_ci any_hit = stages[any_hit_idx].nir; 2629bf215546Sopenharmony_ci 2630bf215546Sopenharmony_ci void *group_ctx = ralloc_context(pipeline_ctx); 2631bf215546Sopenharmony_ci nir_shader *intersection = 2632bf215546Sopenharmony_ci nir_shader_clone(group_ctx, stages[intersection_idx].nir); 2633bf215546Sopenharmony_ci 2634bf215546Sopenharmony_ci brw_nir_lower_combined_intersection_any_hit(intersection, any_hit, 2635bf215546Sopenharmony_ci devinfo); 2636bf215546Sopenharmony_ci 2637bf215546Sopenharmony_ci result = compile_upload_rt_shader(pipeline, cache, 2638bf215546Sopenharmony_ci intersection, 2639bf215546Sopenharmony_ci &stages[intersection_idx], 2640bf215546Sopenharmony_ci &group->intersection, 2641bf215546Sopenharmony_ci group_ctx); 2642bf215546Sopenharmony_ci ralloc_free(group_ctx); 2643bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2644bf215546Sopenharmony_ci return result; 2645bf215546Sopenharmony_ci } else { 2646bf215546Sopenharmony_ci group->intersection = stages[intersection_idx].bin; 2647bf215546Sopenharmony_ci } 2648bf215546Sopenharmony_ci 2649bf215546Sopenharmony_ci uint32_t stack_size = 2650bf215546Sopenharmony_ci brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size; 2651bf215546Sopenharmony_ci stack_max[MESA_SHADER_INTERSECTION] = 2652bf215546Sopenharmony_ci MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size); 2653bf215546Sopenharmony_ci 2654bf215546Sopenharmony_ci break; 2655bf215546Sopenharmony_ci } 2656bf215546Sopenharmony_ci 2657bf215546Sopenharmony_ci default: 2658bf215546Sopenharmony_ci unreachable("Invalid ray tracing shader group type"); 2659bf215546Sopenharmony_ci } 2660bf215546Sopenharmony_ci } 2661bf215546Sopenharmony_ci 2662bf215546Sopenharmony_ci done: 2663bf215546Sopenharmony_ci ralloc_free(pipeline_ctx); 2664bf215546Sopenharmony_ci 2665bf215546Sopenharmony_ci anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max); 2666bf215546Sopenharmony_ci 2667bf215546Sopenharmony_ci pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 2668bf215546Sopenharmony_ci 2669bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *create_feedback = 2670bf215546Sopenharmony_ci vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 2671bf215546Sopenharmony_ci if (create_feedback) { 2672bf215546Sopenharmony_ci *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 2673bf215546Sopenharmony_ci 2674bf215546Sopenharmony_ci uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount; 2675bf215546Sopenharmony_ci assert(stage_count == 0 || info->stageCount == stage_count); 2676bf215546Sopenharmony_ci for (uint32_t i = 0; i < stage_count; i++) { 2677bf215546Sopenharmony_ci gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); 2678bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 2679bf215546Sopenharmony_ci } 2680bf215546Sopenharmony_ci } 2681bf215546Sopenharmony_ci 2682bf215546Sopenharmony_ci return VK_SUCCESS; 2683bf215546Sopenharmony_ci} 2684bf215546Sopenharmony_ci 2685bf215546Sopenharmony_ciVkResult 2686bf215546Sopenharmony_cianv_device_init_rt_shaders(struct anv_device *device) 2687bf215546Sopenharmony_ci{ 2688bf215546Sopenharmony_ci if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) 2689bf215546Sopenharmony_ci return VK_SUCCESS; 2690bf215546Sopenharmony_ci 2691bf215546Sopenharmony_ci bool cache_hit; 2692bf215546Sopenharmony_ci 2693bf215546Sopenharmony_ci struct brw_rt_trampoline { 2694bf215546Sopenharmony_ci char name[16]; 2695bf215546Sopenharmony_ci struct brw_cs_prog_key key; 2696bf215546Sopenharmony_ci } trampoline_key = { 2697bf215546Sopenharmony_ci .name = "rt-trampoline", 2698bf215546Sopenharmony_ci }; 2699bf215546Sopenharmony_ci device->rt_trampoline = 2700bf215546Sopenharmony_ci anv_device_search_for_kernel(device, device->internal_cache, 2701bf215546Sopenharmony_ci &trampoline_key, sizeof(trampoline_key), 2702bf215546Sopenharmony_ci &cache_hit); 2703bf215546Sopenharmony_ci if (device->rt_trampoline == NULL) { 2704bf215546Sopenharmony_ci 2705bf215546Sopenharmony_ci void *tmp_ctx = ralloc_context(NULL); 2706bf215546Sopenharmony_ci nir_shader *trampoline_nir = 2707bf215546Sopenharmony_ci brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx); 2708bf215546Sopenharmony_ci 2709bf215546Sopenharmony_ci trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8; 2710bf215546Sopenharmony_ci 2711bf215546Sopenharmony_ci struct anv_pipeline_bind_map bind_map = { 2712bf215546Sopenharmony_ci .surface_count = 0, 2713bf215546Sopenharmony_ci .sampler_count = 0, 2714bf215546Sopenharmony_ci }; 2715bf215546Sopenharmony_ci uint32_t dummy_params[4] = { 0, }; 2716bf215546Sopenharmony_ci struct brw_cs_prog_data trampoline_prog_data = { 2717bf215546Sopenharmony_ci .base.nr_params = 4, 2718bf215546Sopenharmony_ci .base.param = dummy_params, 2719bf215546Sopenharmony_ci .uses_inline_data = true, 2720bf215546Sopenharmony_ci .uses_btd_stack_ids = true, 2721bf215546Sopenharmony_ci }; 2722bf215546Sopenharmony_ci struct brw_compile_cs_params params = { 2723bf215546Sopenharmony_ci .nir = trampoline_nir, 2724bf215546Sopenharmony_ci .key = &trampoline_key.key, 2725bf215546Sopenharmony_ci .prog_data = &trampoline_prog_data, 2726bf215546Sopenharmony_ci .log_data = device, 2727bf215546Sopenharmony_ci }; 2728bf215546Sopenharmony_ci const unsigned *tramp_data = 2729bf215546Sopenharmony_ci brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms); 2730bf215546Sopenharmony_ci 2731bf215546Sopenharmony_ci device->rt_trampoline = 2732bf215546Sopenharmony_ci anv_device_upload_kernel(device, device->internal_cache, 2733bf215546Sopenharmony_ci MESA_SHADER_COMPUTE, 2734bf215546Sopenharmony_ci &trampoline_key, sizeof(trampoline_key), 2735bf215546Sopenharmony_ci tramp_data, 2736bf215546Sopenharmony_ci trampoline_prog_data.base.program_size, 2737bf215546Sopenharmony_ci &trampoline_prog_data.base, 2738bf215546Sopenharmony_ci sizeof(trampoline_prog_data), 2739bf215546Sopenharmony_ci NULL, 0, NULL, &bind_map); 2740bf215546Sopenharmony_ci 2741bf215546Sopenharmony_ci ralloc_free(tmp_ctx); 2742bf215546Sopenharmony_ci 2743bf215546Sopenharmony_ci if (device->rt_trampoline == NULL) 2744bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2745bf215546Sopenharmony_ci } 2746bf215546Sopenharmony_ci 2747bf215546Sopenharmony_ci /* The cache already has a reference and it's not going anywhere so there 2748bf215546Sopenharmony_ci * is no need to hold a second reference. 2749bf215546Sopenharmony_ci */ 2750bf215546Sopenharmony_ci anv_shader_bin_unref(device, device->rt_trampoline); 2751bf215546Sopenharmony_ci 2752bf215546Sopenharmony_ci struct brw_rt_trivial_return { 2753bf215546Sopenharmony_ci char name[16]; 2754bf215546Sopenharmony_ci struct brw_bs_prog_key key; 2755bf215546Sopenharmony_ci } return_key = { 2756bf215546Sopenharmony_ci .name = "rt-trivial-ret", 2757bf215546Sopenharmony_ci }; 2758bf215546Sopenharmony_ci device->rt_trivial_return = 2759bf215546Sopenharmony_ci anv_device_search_for_kernel(device, device->internal_cache, 2760bf215546Sopenharmony_ci &return_key, sizeof(return_key), 2761bf215546Sopenharmony_ci &cache_hit); 2762bf215546Sopenharmony_ci if (device->rt_trivial_return == NULL) { 2763bf215546Sopenharmony_ci void *tmp_ctx = ralloc_context(NULL); 2764bf215546Sopenharmony_ci nir_shader *trivial_return_nir = 2765bf215546Sopenharmony_ci brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx); 2766bf215546Sopenharmony_ci 2767bf215546Sopenharmony_ci NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info); 2768bf215546Sopenharmony_ci 2769bf215546Sopenharmony_ci struct anv_pipeline_bind_map bind_map = { 2770bf215546Sopenharmony_ci .surface_count = 0, 2771bf215546Sopenharmony_ci .sampler_count = 0, 2772bf215546Sopenharmony_ci }; 2773bf215546Sopenharmony_ci struct brw_bs_prog_data return_prog_data = { 0, }; 2774bf215546Sopenharmony_ci struct brw_compile_bs_params params = { 2775bf215546Sopenharmony_ci .nir = trivial_return_nir, 2776bf215546Sopenharmony_ci .key = &return_key.key, 2777bf215546Sopenharmony_ci .prog_data = &return_prog_data, 2778bf215546Sopenharmony_ci 2779bf215546Sopenharmony_ci .log_data = device, 2780bf215546Sopenharmony_ci }; 2781bf215546Sopenharmony_ci const unsigned *return_data = 2782bf215546Sopenharmony_ci brw_compile_bs(device->physical->compiler, tmp_ctx, ¶ms); 2783bf215546Sopenharmony_ci 2784bf215546Sopenharmony_ci device->rt_trivial_return = 2785bf215546Sopenharmony_ci anv_device_upload_kernel(device, device->internal_cache, 2786bf215546Sopenharmony_ci MESA_SHADER_CALLABLE, 2787bf215546Sopenharmony_ci &return_key, sizeof(return_key), 2788bf215546Sopenharmony_ci return_data, return_prog_data.base.program_size, 2789bf215546Sopenharmony_ci &return_prog_data.base, sizeof(return_prog_data), 2790bf215546Sopenharmony_ci NULL, 0, NULL, &bind_map); 2791bf215546Sopenharmony_ci 2792bf215546Sopenharmony_ci ralloc_free(tmp_ctx); 2793bf215546Sopenharmony_ci 2794bf215546Sopenharmony_ci if (device->rt_trivial_return == NULL) 2795bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2796bf215546Sopenharmony_ci } 2797bf215546Sopenharmony_ci 2798bf215546Sopenharmony_ci /* The cache already has a reference and it's not going anywhere so there 2799bf215546Sopenharmony_ci * is no need to hold a second reference. 2800bf215546Sopenharmony_ci */ 2801bf215546Sopenharmony_ci anv_shader_bin_unref(device, device->rt_trivial_return); 2802bf215546Sopenharmony_ci 2803bf215546Sopenharmony_ci return VK_SUCCESS; 2804bf215546Sopenharmony_ci} 2805bf215546Sopenharmony_ci 2806bf215546Sopenharmony_civoid 2807bf215546Sopenharmony_cianv_device_finish_rt_shaders(struct anv_device *device) 2808bf215546Sopenharmony_ci{ 2809bf215546Sopenharmony_ci if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) 2810bf215546Sopenharmony_ci return; 2811bf215546Sopenharmony_ci} 2812bf215546Sopenharmony_ci 2813bf215546Sopenharmony_cistatic VkResult 2814bf215546Sopenharmony_cianv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, 2815bf215546Sopenharmony_ci struct anv_device *device, 2816bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 2817bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 2818bf215546Sopenharmony_ci const VkAllocationCallbacks *alloc) 2819bf215546Sopenharmony_ci{ 2820bf215546Sopenharmony_ci VkResult result; 2821bf215546Sopenharmony_ci 2822bf215546Sopenharmony_ci util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx); 2823bf215546Sopenharmony_ci 2824bf215546Sopenharmony_ci result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo); 2825bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2826bf215546Sopenharmony_ci goto fail; 2827bf215546Sopenharmony_ci 2828bf215546Sopenharmony_ci anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false); 2829bf215546Sopenharmony_ci 2830bf215546Sopenharmony_ci return VK_SUCCESS; 2831bf215546Sopenharmony_ci 2832bf215546Sopenharmony_cifail: 2833bf215546Sopenharmony_ci util_dynarray_foreach(&pipeline->shaders, 2834bf215546Sopenharmony_ci struct anv_shader_bin *, shader) { 2835bf215546Sopenharmony_ci anv_shader_bin_unref(device, *shader); 2836bf215546Sopenharmony_ci } 2837bf215546Sopenharmony_ci return result; 2838bf215546Sopenharmony_ci} 2839bf215546Sopenharmony_ci 2840bf215546Sopenharmony_cistatic void 2841bf215546Sopenharmony_ciassert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, 2842bf215546Sopenharmony_ci uint32_t stage_idx, 2843bf215546Sopenharmony_ci VkShaderStageFlags valid_stages) 2844bf215546Sopenharmony_ci{ 2845bf215546Sopenharmony_ci if (stage_idx == VK_SHADER_UNUSED_KHR) 2846bf215546Sopenharmony_ci return; 2847bf215546Sopenharmony_ci 2848bf215546Sopenharmony_ci assert(stage_idx <= pCreateInfo->stageCount); 2849bf215546Sopenharmony_ci assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1); 2850bf215546Sopenharmony_ci assert(pCreateInfo->pStages[stage_idx].stage & valid_stages); 2851bf215546Sopenharmony_ci} 2852bf215546Sopenharmony_ci 2853bf215546Sopenharmony_cistatic VkResult 2854bf215546Sopenharmony_cianv_ray_tracing_pipeline_create( 2855bf215546Sopenharmony_ci VkDevice _device, 2856bf215546Sopenharmony_ci struct vk_pipeline_cache * cache, 2857bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, 2858bf215546Sopenharmony_ci const VkAllocationCallbacks* pAllocator, 2859bf215546Sopenharmony_ci VkPipeline* pPipeline) 2860bf215546Sopenharmony_ci{ 2861bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_device, device, _device); 2862bf215546Sopenharmony_ci VkResult result; 2863bf215546Sopenharmony_ci 2864bf215546Sopenharmony_ci assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR); 2865bf215546Sopenharmony_ci 2866bf215546Sopenharmony_ci VK_MULTIALLOC(ma); 2867bf215546Sopenharmony_ci VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1); 2868bf215546Sopenharmony_ci VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount); 2869bf215546Sopenharmony_ci if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, 2870bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) 2871bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2872bf215546Sopenharmony_ci 2873bf215546Sopenharmony_ci result = anv_pipeline_init(&pipeline->base, device, 2874bf215546Sopenharmony_ci ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags, 2875bf215546Sopenharmony_ci pAllocator); 2876bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2877bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pipeline); 2878bf215546Sopenharmony_ci return result; 2879bf215546Sopenharmony_ci } 2880bf215546Sopenharmony_ci 2881bf215546Sopenharmony_ci pipeline->group_count = pCreateInfo->groupCount; 2882bf215546Sopenharmony_ci pipeline->groups = groups; 2883bf215546Sopenharmony_ci 2884bf215546Sopenharmony_ci ASSERTED const VkShaderStageFlags ray_tracing_stages = 2885bf215546Sopenharmony_ci VK_SHADER_STAGE_RAYGEN_BIT_KHR | 2886bf215546Sopenharmony_ci VK_SHADER_STAGE_ANY_HIT_BIT_KHR | 2887bf215546Sopenharmony_ci VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | 2888bf215546Sopenharmony_ci VK_SHADER_STAGE_MISS_BIT_KHR | 2889bf215546Sopenharmony_ci VK_SHADER_STAGE_INTERSECTION_BIT_KHR | 2890bf215546Sopenharmony_ci VK_SHADER_STAGE_CALLABLE_BIT_KHR; 2891bf215546Sopenharmony_ci 2892bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) 2893bf215546Sopenharmony_ci assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0); 2894bf215546Sopenharmony_ci 2895bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) { 2896bf215546Sopenharmony_ci const VkRayTracingShaderGroupCreateInfoKHR *ginfo = 2897bf215546Sopenharmony_ci &pCreateInfo->pGroups[i]; 2898bf215546Sopenharmony_ci assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader, 2899bf215546Sopenharmony_ci VK_SHADER_STAGE_RAYGEN_BIT_KHR | 2900bf215546Sopenharmony_ci VK_SHADER_STAGE_MISS_BIT_KHR | 2901bf215546Sopenharmony_ci VK_SHADER_STAGE_CALLABLE_BIT_KHR); 2902bf215546Sopenharmony_ci assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader, 2903bf215546Sopenharmony_ci VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR); 2904bf215546Sopenharmony_ci assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader, 2905bf215546Sopenharmony_ci VK_SHADER_STAGE_ANY_HIT_BIT_KHR); 2906bf215546Sopenharmony_ci assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader, 2907bf215546Sopenharmony_ci VK_SHADER_STAGE_INTERSECTION_BIT_KHR); 2908bf215546Sopenharmony_ci switch (ginfo->type) { 2909bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 2910bf215546Sopenharmony_ci assert(ginfo->generalShader < pCreateInfo->stageCount); 2911bf215546Sopenharmony_ci assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR); 2912bf215546Sopenharmony_ci assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR); 2913bf215546Sopenharmony_ci assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR); 2914bf215546Sopenharmony_ci break; 2915bf215546Sopenharmony_ci 2916bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 2917bf215546Sopenharmony_ci assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR); 2918bf215546Sopenharmony_ci assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR); 2919bf215546Sopenharmony_ci break; 2920bf215546Sopenharmony_ci 2921bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 2922bf215546Sopenharmony_ci assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR); 2923bf215546Sopenharmony_ci break; 2924bf215546Sopenharmony_ci 2925bf215546Sopenharmony_ci default: 2926bf215546Sopenharmony_ci unreachable("Invalid ray-tracing shader group type"); 2927bf215546Sopenharmony_ci } 2928bf215546Sopenharmony_ci } 2929bf215546Sopenharmony_ci 2930bf215546Sopenharmony_ci result = anv_ray_tracing_pipeline_init(pipeline, device, cache, 2931bf215546Sopenharmony_ci pCreateInfo, pAllocator); 2932bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2933bf215546Sopenharmony_ci anv_pipeline_finish(&pipeline->base, device, pAllocator); 2934bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pipeline); 2935bf215546Sopenharmony_ci return result; 2936bf215546Sopenharmony_ci } 2937bf215546Sopenharmony_ci 2938bf215546Sopenharmony_ci anv_genX(&device->info, ray_tracing_pipeline_emit)(pipeline); 2939bf215546Sopenharmony_ci 2940bf215546Sopenharmony_ci *pPipeline = anv_pipeline_to_handle(&pipeline->base); 2941bf215546Sopenharmony_ci 2942bf215546Sopenharmony_ci return pipeline->base.batch.status; 2943bf215546Sopenharmony_ci} 2944bf215546Sopenharmony_ci 2945bf215546Sopenharmony_ciVkResult 2946bf215546Sopenharmony_cianv_CreateRayTracingPipelinesKHR( 2947bf215546Sopenharmony_ci VkDevice _device, 2948bf215546Sopenharmony_ci VkDeferredOperationKHR deferredOperation, 2949bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 2950bf215546Sopenharmony_ci uint32_t createInfoCount, 2951bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, 2952bf215546Sopenharmony_ci const VkAllocationCallbacks* pAllocator, 2953bf215546Sopenharmony_ci VkPipeline* pPipelines) 2954bf215546Sopenharmony_ci{ 2955bf215546Sopenharmony_ci ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); 2956bf215546Sopenharmony_ci 2957bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 2958bf215546Sopenharmony_ci 2959bf215546Sopenharmony_ci unsigned i; 2960bf215546Sopenharmony_ci for (i = 0; i < createInfoCount; i++) { 2961bf215546Sopenharmony_ci VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache, 2962bf215546Sopenharmony_ci &pCreateInfos[i], 2963bf215546Sopenharmony_ci pAllocator, &pPipelines[i]); 2964bf215546Sopenharmony_ci 2965bf215546Sopenharmony_ci if (res == VK_SUCCESS) 2966bf215546Sopenharmony_ci continue; 2967bf215546Sopenharmony_ci 2968bf215546Sopenharmony_ci /* Bail out on the first error as it is not obvious what error should be 2969bf215546Sopenharmony_ci * report upon 2 different failures. */ 2970bf215546Sopenharmony_ci result = res; 2971bf215546Sopenharmony_ci if (result != VK_PIPELINE_COMPILE_REQUIRED) 2972bf215546Sopenharmony_ci break; 2973bf215546Sopenharmony_ci 2974bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 2975bf215546Sopenharmony_ci 2976bf215546Sopenharmony_ci if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 2977bf215546Sopenharmony_ci break; 2978bf215546Sopenharmony_ci } 2979bf215546Sopenharmony_ci 2980bf215546Sopenharmony_ci for (; i < createInfoCount; i++) 2981bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 2982bf215546Sopenharmony_ci 2983bf215546Sopenharmony_ci return result; 2984bf215546Sopenharmony_ci} 2985bf215546Sopenharmony_ci 2986bf215546Sopenharmony_ci#define WRITE_STR(field, ...) ({ \ 2987bf215546Sopenharmony_ci memset(field, 0, sizeof(field)); \ 2988bf215546Sopenharmony_ci UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \ 2989bf215546Sopenharmony_ci assert(i > 0 && i < sizeof(field)); \ 2990bf215546Sopenharmony_ci}) 2991bf215546Sopenharmony_ci 2992bf215546Sopenharmony_ciVkResult anv_GetPipelineExecutablePropertiesKHR( 2993bf215546Sopenharmony_ci VkDevice device, 2994bf215546Sopenharmony_ci const VkPipelineInfoKHR* pPipelineInfo, 2995bf215546Sopenharmony_ci uint32_t* pExecutableCount, 2996bf215546Sopenharmony_ci VkPipelineExecutablePropertiesKHR* pProperties) 2997bf215546Sopenharmony_ci{ 2998bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline); 2999bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, 3000bf215546Sopenharmony_ci pProperties, pExecutableCount); 3001bf215546Sopenharmony_ci 3002bf215546Sopenharmony_ci util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) { 3003bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { 3004bf215546Sopenharmony_ci gl_shader_stage stage = exe->stage; 3005bf215546Sopenharmony_ci props->stages = mesa_to_vk_shader_stage(stage); 3006bf215546Sopenharmony_ci 3007bf215546Sopenharmony_ci unsigned simd_width = exe->stats.dispatch_width; 3008bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT) { 3009bf215546Sopenharmony_ci WRITE_STR(props->name, "%s%d %s", 3010bf215546Sopenharmony_ci simd_width ? "SIMD" : "vec", 3011bf215546Sopenharmony_ci simd_width ? simd_width : 4, 3012bf215546Sopenharmony_ci _mesa_shader_stage_to_string(stage)); 3013bf215546Sopenharmony_ci } else { 3014bf215546Sopenharmony_ci WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage)); 3015bf215546Sopenharmony_ci } 3016bf215546Sopenharmony_ci WRITE_STR(props->description, "%s%d %s shader", 3017bf215546Sopenharmony_ci simd_width ? "SIMD" : "vec", 3018bf215546Sopenharmony_ci simd_width ? simd_width : 4, 3019bf215546Sopenharmony_ci _mesa_shader_stage_to_string(stage)); 3020bf215546Sopenharmony_ci 3021bf215546Sopenharmony_ci /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan 3022bf215546Sopenharmony_ci * wants a subgroup size of 1. 3023bf215546Sopenharmony_ci */ 3024bf215546Sopenharmony_ci props->subgroupSize = MAX2(simd_width, 1); 3025bf215546Sopenharmony_ci } 3026bf215546Sopenharmony_ci } 3027bf215546Sopenharmony_ci 3028bf215546Sopenharmony_ci return vk_outarray_status(&out); 3029bf215546Sopenharmony_ci} 3030bf215546Sopenharmony_ci 3031bf215546Sopenharmony_cistatic const struct anv_pipeline_executable * 3032bf215546Sopenharmony_cianv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index) 3033bf215546Sopenharmony_ci{ 3034bf215546Sopenharmony_ci assert(index < util_dynarray_num_elements(&pipeline->executables, 3035bf215546Sopenharmony_ci struct anv_pipeline_executable)); 3036bf215546Sopenharmony_ci return util_dynarray_element( 3037bf215546Sopenharmony_ci &pipeline->executables, struct anv_pipeline_executable, index); 3038bf215546Sopenharmony_ci} 3039bf215546Sopenharmony_ci 3040bf215546Sopenharmony_ciVkResult anv_GetPipelineExecutableStatisticsKHR( 3041bf215546Sopenharmony_ci VkDevice device, 3042bf215546Sopenharmony_ci const VkPipelineExecutableInfoKHR* pExecutableInfo, 3043bf215546Sopenharmony_ci uint32_t* pStatisticCount, 3044bf215546Sopenharmony_ci VkPipelineExecutableStatisticKHR* pStatistics) 3045bf215546Sopenharmony_ci{ 3046bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline); 3047bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, 3048bf215546Sopenharmony_ci pStatistics, pStatisticCount); 3049bf215546Sopenharmony_ci 3050bf215546Sopenharmony_ci const struct anv_pipeline_executable *exe = 3051bf215546Sopenharmony_ci anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3052bf215546Sopenharmony_ci 3053bf215546Sopenharmony_ci const struct brw_stage_prog_data *prog_data; 3054bf215546Sopenharmony_ci switch (pipeline->type) { 3055bf215546Sopenharmony_ci case ANV_PIPELINE_GRAPHICS: { 3056bf215546Sopenharmony_ci prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data; 3057bf215546Sopenharmony_ci break; 3058bf215546Sopenharmony_ci } 3059bf215546Sopenharmony_ci case ANV_PIPELINE_COMPUTE: { 3060bf215546Sopenharmony_ci prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data; 3061bf215546Sopenharmony_ci break; 3062bf215546Sopenharmony_ci } 3063bf215546Sopenharmony_ci case ANV_PIPELINE_RAY_TRACING: { 3064bf215546Sopenharmony_ci struct anv_shader_bin **shader = 3065bf215546Sopenharmony_ci util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders, 3066bf215546Sopenharmony_ci struct anv_shader_bin *, 3067bf215546Sopenharmony_ci pExecutableInfo->executableIndex); 3068bf215546Sopenharmony_ci prog_data = (*shader)->prog_data; 3069bf215546Sopenharmony_ci break; 3070bf215546Sopenharmony_ci } 3071bf215546Sopenharmony_ci default: 3072bf215546Sopenharmony_ci unreachable("invalid pipeline type"); 3073bf215546Sopenharmony_ci } 3074bf215546Sopenharmony_ci 3075bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3076bf215546Sopenharmony_ci WRITE_STR(stat->name, "Instruction Count"); 3077bf215546Sopenharmony_ci WRITE_STR(stat->description, 3078bf215546Sopenharmony_ci "Number of GEN instructions in the final generated " 3079bf215546Sopenharmony_ci "shader executable."); 3080bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3081bf215546Sopenharmony_ci stat->value.u64 = exe->stats.instructions; 3082bf215546Sopenharmony_ci } 3083bf215546Sopenharmony_ci 3084bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3085bf215546Sopenharmony_ci WRITE_STR(stat->name, "SEND Count"); 3086bf215546Sopenharmony_ci WRITE_STR(stat->description, 3087bf215546Sopenharmony_ci "Number of instructions in the final generated shader " 3088bf215546Sopenharmony_ci "executable which access external units such as the " 3089bf215546Sopenharmony_ci "constant cache or the sampler."); 3090bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3091bf215546Sopenharmony_ci stat->value.u64 = exe->stats.sends; 3092bf215546Sopenharmony_ci } 3093bf215546Sopenharmony_ci 3094bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3095bf215546Sopenharmony_ci WRITE_STR(stat->name, "Loop Count"); 3096bf215546Sopenharmony_ci WRITE_STR(stat->description, 3097bf215546Sopenharmony_ci "Number of loops (not unrolled) in the final generated " 3098bf215546Sopenharmony_ci "shader executable."); 3099bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3100bf215546Sopenharmony_ci stat->value.u64 = exe->stats.loops; 3101bf215546Sopenharmony_ci } 3102bf215546Sopenharmony_ci 3103bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3104bf215546Sopenharmony_ci WRITE_STR(stat->name, "Cycle Count"); 3105bf215546Sopenharmony_ci WRITE_STR(stat->description, 3106bf215546Sopenharmony_ci "Estimate of the number of EU cycles required to execute " 3107bf215546Sopenharmony_ci "the final generated executable. This is an estimate only " 3108bf215546Sopenharmony_ci "and may vary greatly from actual run-time performance."); 3109bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3110bf215546Sopenharmony_ci stat->value.u64 = exe->stats.cycles; 3111bf215546Sopenharmony_ci } 3112bf215546Sopenharmony_ci 3113bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3114bf215546Sopenharmony_ci WRITE_STR(stat->name, "Spill Count"); 3115bf215546Sopenharmony_ci WRITE_STR(stat->description, 3116bf215546Sopenharmony_ci "Number of scratch spill operations. This gives a rough " 3117bf215546Sopenharmony_ci "estimate of the cost incurred due to spilling temporary " 3118bf215546Sopenharmony_ci "values to memory. If this is non-zero, you may want to " 3119bf215546Sopenharmony_ci "adjust your shader to reduce register pressure."); 3120bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3121bf215546Sopenharmony_ci stat->value.u64 = exe->stats.spills; 3122bf215546Sopenharmony_ci } 3123bf215546Sopenharmony_ci 3124bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3125bf215546Sopenharmony_ci WRITE_STR(stat->name, "Fill Count"); 3126bf215546Sopenharmony_ci WRITE_STR(stat->description, 3127bf215546Sopenharmony_ci "Number of scratch fill operations. This gives a rough " 3128bf215546Sopenharmony_ci "estimate of the cost incurred due to spilling temporary " 3129bf215546Sopenharmony_ci "values to memory. If this is non-zero, you may want to " 3130bf215546Sopenharmony_ci "adjust your shader to reduce register pressure."); 3131bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3132bf215546Sopenharmony_ci stat->value.u64 = exe->stats.fills; 3133bf215546Sopenharmony_ci } 3134bf215546Sopenharmony_ci 3135bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3136bf215546Sopenharmony_ci WRITE_STR(stat->name, "Scratch Memory Size"); 3137bf215546Sopenharmony_ci WRITE_STR(stat->description, 3138bf215546Sopenharmony_ci "Number of bytes of scratch memory required by the " 3139bf215546Sopenharmony_ci "generated shader executable. If this is non-zero, you " 3140bf215546Sopenharmony_ci "may want to adjust your shader to reduce register " 3141bf215546Sopenharmony_ci "pressure."); 3142bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3143bf215546Sopenharmony_ci stat->value.u64 = prog_data->total_scratch; 3144bf215546Sopenharmony_ci } 3145bf215546Sopenharmony_ci 3146bf215546Sopenharmony_ci if (gl_shader_stage_uses_workgroup(exe->stage)) { 3147bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3148bf215546Sopenharmony_ci WRITE_STR(stat->name, "Workgroup Memory Size"); 3149bf215546Sopenharmony_ci WRITE_STR(stat->description, 3150bf215546Sopenharmony_ci "Number of bytes of workgroup shared memory used by this " 3151bf215546Sopenharmony_ci "shader including any padding."); 3152bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3153bf215546Sopenharmony_ci stat->value.u64 = prog_data->total_shared; 3154bf215546Sopenharmony_ci } 3155bf215546Sopenharmony_ci } 3156bf215546Sopenharmony_ci 3157bf215546Sopenharmony_ci return vk_outarray_status(&out); 3158bf215546Sopenharmony_ci} 3159bf215546Sopenharmony_ci 3160bf215546Sopenharmony_cistatic bool 3161bf215546Sopenharmony_ciwrite_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, 3162bf215546Sopenharmony_ci const char *data) 3163bf215546Sopenharmony_ci{ 3164bf215546Sopenharmony_ci ir->isText = VK_TRUE; 3165bf215546Sopenharmony_ci 3166bf215546Sopenharmony_ci size_t data_len = strlen(data) + 1; 3167bf215546Sopenharmony_ci 3168bf215546Sopenharmony_ci if (ir->pData == NULL) { 3169bf215546Sopenharmony_ci ir->dataSize = data_len; 3170bf215546Sopenharmony_ci return true; 3171bf215546Sopenharmony_ci } 3172bf215546Sopenharmony_ci 3173bf215546Sopenharmony_ci strncpy(ir->pData, data, ir->dataSize); 3174bf215546Sopenharmony_ci if (ir->dataSize < data_len) 3175bf215546Sopenharmony_ci return false; 3176bf215546Sopenharmony_ci 3177bf215546Sopenharmony_ci ir->dataSize = data_len; 3178bf215546Sopenharmony_ci return true; 3179bf215546Sopenharmony_ci} 3180bf215546Sopenharmony_ci 3181bf215546Sopenharmony_ciVkResult anv_GetPipelineExecutableInternalRepresentationsKHR( 3182bf215546Sopenharmony_ci VkDevice device, 3183bf215546Sopenharmony_ci const VkPipelineExecutableInfoKHR* pExecutableInfo, 3184bf215546Sopenharmony_ci uint32_t* pInternalRepresentationCount, 3185bf215546Sopenharmony_ci VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations) 3186bf215546Sopenharmony_ci{ 3187bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline); 3188bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out, 3189bf215546Sopenharmony_ci pInternalRepresentations, pInternalRepresentationCount); 3190bf215546Sopenharmony_ci bool incomplete_text = false; 3191bf215546Sopenharmony_ci 3192bf215546Sopenharmony_ci const struct anv_pipeline_executable *exe = 3193bf215546Sopenharmony_ci anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3194bf215546Sopenharmony_ci 3195bf215546Sopenharmony_ci if (exe->nir) { 3196bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { 3197bf215546Sopenharmony_ci WRITE_STR(ir->name, "Final NIR"); 3198bf215546Sopenharmony_ci WRITE_STR(ir->description, 3199bf215546Sopenharmony_ci "Final NIR before going into the back-end compiler"); 3200bf215546Sopenharmony_ci 3201bf215546Sopenharmony_ci if (!write_ir_text(ir, exe->nir)) 3202bf215546Sopenharmony_ci incomplete_text = true; 3203bf215546Sopenharmony_ci } 3204bf215546Sopenharmony_ci } 3205bf215546Sopenharmony_ci 3206bf215546Sopenharmony_ci if (exe->disasm) { 3207bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { 3208bf215546Sopenharmony_ci WRITE_STR(ir->name, "GEN Assembly"); 3209bf215546Sopenharmony_ci WRITE_STR(ir->description, 3210bf215546Sopenharmony_ci "Final GEN assembly for the generated shader binary"); 3211bf215546Sopenharmony_ci 3212bf215546Sopenharmony_ci if (!write_ir_text(ir, exe->disasm)) 3213bf215546Sopenharmony_ci incomplete_text = true; 3214bf215546Sopenharmony_ci } 3215bf215546Sopenharmony_ci } 3216bf215546Sopenharmony_ci 3217bf215546Sopenharmony_ci return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out); 3218bf215546Sopenharmony_ci} 3219bf215546Sopenharmony_ci 3220bf215546Sopenharmony_ciVkResult 3221bf215546Sopenharmony_cianv_GetRayTracingShaderGroupHandlesKHR( 3222bf215546Sopenharmony_ci VkDevice _device, 3223bf215546Sopenharmony_ci VkPipeline _pipeline, 3224bf215546Sopenharmony_ci uint32_t firstGroup, 3225bf215546Sopenharmony_ci uint32_t groupCount, 3226bf215546Sopenharmony_ci size_t dataSize, 3227bf215546Sopenharmony_ci void* pData) 3228bf215546Sopenharmony_ci{ 3229bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_device, device, _device); 3230bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 3231bf215546Sopenharmony_ci 3232bf215546Sopenharmony_ci if (pipeline->type != ANV_PIPELINE_RAY_TRACING) 3233bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 3234bf215546Sopenharmony_ci 3235bf215546Sopenharmony_ci struct anv_ray_tracing_pipeline *rt_pipeline = 3236bf215546Sopenharmony_ci anv_pipeline_to_ray_tracing(pipeline); 3237bf215546Sopenharmony_ci 3238bf215546Sopenharmony_ci for (uint32_t i = 0; i < groupCount; i++) { 3239bf215546Sopenharmony_ci struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i]; 3240bf215546Sopenharmony_ci memcpy(pData, group->handle, sizeof(group->handle)); 3241bf215546Sopenharmony_ci pData += sizeof(group->handle); 3242bf215546Sopenharmony_ci } 3243bf215546Sopenharmony_ci 3244bf215546Sopenharmony_ci return VK_SUCCESS; 3245bf215546Sopenharmony_ci} 3246bf215546Sopenharmony_ci 3247bf215546Sopenharmony_ciVkResult 3248bf215546Sopenharmony_cianv_GetRayTracingCaptureReplayShaderGroupHandlesKHR( 3249bf215546Sopenharmony_ci VkDevice _device, 3250bf215546Sopenharmony_ci VkPipeline pipeline, 3251bf215546Sopenharmony_ci uint32_t firstGroup, 3252bf215546Sopenharmony_ci uint32_t groupCount, 3253bf215546Sopenharmony_ci size_t dataSize, 3254bf215546Sopenharmony_ci void* pData) 3255bf215546Sopenharmony_ci{ 3256bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_device, device, _device); 3257bf215546Sopenharmony_ci unreachable("Unimplemented"); 3258bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 3259bf215546Sopenharmony_ci} 3260bf215546Sopenharmony_ci 3261bf215546Sopenharmony_ciVkDeviceSize 3262bf215546Sopenharmony_cianv_GetRayTracingShaderGroupStackSizeKHR( 3263bf215546Sopenharmony_ci VkDevice device, 3264bf215546Sopenharmony_ci VkPipeline _pipeline, 3265bf215546Sopenharmony_ci uint32_t group, 3266bf215546Sopenharmony_ci VkShaderGroupShaderKHR groupShader) 3267bf215546Sopenharmony_ci{ 3268bf215546Sopenharmony_ci ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 3269bf215546Sopenharmony_ci assert(pipeline->type == ANV_PIPELINE_RAY_TRACING); 3270bf215546Sopenharmony_ci 3271bf215546Sopenharmony_ci struct anv_ray_tracing_pipeline *rt_pipeline = 3272bf215546Sopenharmony_ci anv_pipeline_to_ray_tracing(pipeline); 3273bf215546Sopenharmony_ci 3274bf215546Sopenharmony_ci assert(group < rt_pipeline->group_count); 3275bf215546Sopenharmony_ci 3276bf215546Sopenharmony_ci struct anv_shader_bin *bin; 3277bf215546Sopenharmony_ci switch (groupShader) { 3278bf215546Sopenharmony_ci case VK_SHADER_GROUP_SHADER_GENERAL_KHR: 3279bf215546Sopenharmony_ci bin = rt_pipeline->groups[group].general; 3280bf215546Sopenharmony_ci break; 3281bf215546Sopenharmony_ci 3282bf215546Sopenharmony_ci case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR: 3283bf215546Sopenharmony_ci bin = rt_pipeline->groups[group].closest_hit; 3284bf215546Sopenharmony_ci break; 3285bf215546Sopenharmony_ci 3286bf215546Sopenharmony_ci case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR: 3287bf215546Sopenharmony_ci bin = rt_pipeline->groups[group].any_hit; 3288bf215546Sopenharmony_ci break; 3289bf215546Sopenharmony_ci 3290bf215546Sopenharmony_ci case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR: 3291bf215546Sopenharmony_ci bin = rt_pipeline->groups[group].intersection; 3292bf215546Sopenharmony_ci break; 3293bf215546Sopenharmony_ci 3294bf215546Sopenharmony_ci default: 3295bf215546Sopenharmony_ci unreachable("Invalid VkShaderGroupShader enum"); 3296bf215546Sopenharmony_ci } 3297bf215546Sopenharmony_ci 3298bf215546Sopenharmony_ci if (bin == NULL) 3299bf215546Sopenharmony_ci return 0; 3300bf215546Sopenharmony_ci 3301bf215546Sopenharmony_ci return brw_bs_prog_data_const(bin->prog_data)->max_stack_size; 3302bf215546Sopenharmony_ci} 3303