1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Raspberry Pi Ltd 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "vk_util.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "v3dv_debug.h" 27bf215546Sopenharmony_ci#include "v3dv_private.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "common/v3d_debug.h" 30bf215546Sopenharmony_ci#include "qpu/qpu_disasm.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 33bf215546Sopenharmony_ci#include "nir/nir_serialize.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include "util/u_atomic.h" 36bf215546Sopenharmony_ci#include "util/u_prim.h" 37bf215546Sopenharmony_ci#include "util/os_time.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci#include "vk_pipeline.h" 40bf215546Sopenharmony_ci#include "vulkan/util/vk_format.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cistatic VkResult 43bf215546Sopenharmony_cicompute_vpm_config(struct v3dv_pipeline *pipeline); 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_civoid 46bf215546Sopenharmony_civ3dv_print_v3d_key(struct v3d_key *key, 47bf215546Sopenharmony_ci uint32_t v3d_key_size) 48bf215546Sopenharmony_ci{ 49bf215546Sopenharmony_ci struct mesa_sha1 ctx; 50bf215546Sopenharmony_ci unsigned char sha1[20]; 51bf215546Sopenharmony_ci char sha1buf[41]; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, key, v3d_key_size); 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, sha1); 58bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, sha1); 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci fprintf(stderr, "key %p: %s\n", key, sha1buf); 61bf215546Sopenharmony_ci} 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_cistatic void 64bf215546Sopenharmony_cipipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage) 65bf215546Sopenharmony_ci{ 66bf215546Sopenharmony_ci VkPipelineShaderStageCreateInfo info = { 67bf215546Sopenharmony_ci .module = vk_shader_module_handle_from_nir(p_stage->nir), 68bf215546Sopenharmony_ci .pName = p_stage->entrypoint, 69bf215546Sopenharmony_ci .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage), 70bf215546Sopenharmony_ci }; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci vk_pipeline_hash_shader_stage(&info, p_stage->shader_sha1); 73bf215546Sopenharmony_ci} 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_civoid 76bf215546Sopenharmony_civ3dv_shader_variant_destroy(struct v3dv_device *device, 77bf215546Sopenharmony_ci struct v3dv_shader_variant *variant) 78bf215546Sopenharmony_ci{ 79bf215546Sopenharmony_ci /* The assembly BO is shared by all variants in the pipeline, so it can't 80bf215546Sopenharmony_ci * be freed here and should be freed with the pipeline 81bf215546Sopenharmony_ci */ 82bf215546Sopenharmony_ci if (variant->qpu_insts) 83bf215546Sopenharmony_ci free(variant->qpu_insts); 84bf215546Sopenharmony_ci ralloc_free(variant->prog_data.base); 85bf215546Sopenharmony_ci vk_free(&device->vk.alloc, variant); 86bf215546Sopenharmony_ci} 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_cistatic void 89bf215546Sopenharmony_cidestroy_pipeline_stage(struct v3dv_device *device, 90bf215546Sopenharmony_ci struct v3dv_pipeline_stage *p_stage, 91bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci if (!p_stage) 94bf215546Sopenharmony_ci return; 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci ralloc_free(p_stage->nir); 97bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, p_stage); 98bf215546Sopenharmony_ci} 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_cistatic void 101bf215546Sopenharmony_cipipeline_free_stages(struct v3dv_device *device, 102bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline, 103bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 104bf215546Sopenharmony_ci{ 105bf215546Sopenharmony_ci assert(pipeline); 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci /* FIXME: we can't just use a loop over mesa stage due the bin, would be 108bf215546Sopenharmony_ci * good to find an alternative. 109bf215546Sopenharmony_ci */ 110bf215546Sopenharmony_ci destroy_pipeline_stage(device, pipeline->vs, pAllocator); 111bf215546Sopenharmony_ci destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator); 112bf215546Sopenharmony_ci destroy_pipeline_stage(device, pipeline->gs, pAllocator); 113bf215546Sopenharmony_ci destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator); 114bf215546Sopenharmony_ci destroy_pipeline_stage(device, pipeline->fs, pAllocator); 115bf215546Sopenharmony_ci destroy_pipeline_stage(device, pipeline->cs, pAllocator); 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci pipeline->vs = NULL; 118bf215546Sopenharmony_ci pipeline->vs_bin = NULL; 119bf215546Sopenharmony_ci pipeline->gs = NULL; 120bf215546Sopenharmony_ci pipeline->gs_bin = NULL; 121bf215546Sopenharmony_ci pipeline->fs = NULL; 122bf215546Sopenharmony_ci pipeline->cs = NULL; 123bf215546Sopenharmony_ci} 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_cistatic void 126bf215546Sopenharmony_civ3dv_destroy_pipeline(struct v3dv_pipeline *pipeline, 127bf215546Sopenharmony_ci struct v3dv_device *device, 128bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 129bf215546Sopenharmony_ci{ 130bf215546Sopenharmony_ci if (!pipeline) 131bf215546Sopenharmony_ci return; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci pipeline_free_stages(device, pipeline, pAllocator); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci if (pipeline->shared_data) { 136bf215546Sopenharmony_ci v3dv_pipeline_shared_data_unref(device, pipeline->shared_data); 137bf215546Sopenharmony_ci pipeline->shared_data = NULL; 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (pipeline->spill.bo) { 141bf215546Sopenharmony_ci assert(pipeline->spill.size_per_thread > 0); 142bf215546Sopenharmony_ci v3dv_bo_free(device, pipeline->spill.bo); 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci if (pipeline->default_attribute_values) { 146bf215546Sopenharmony_ci v3dv_bo_free(device, pipeline->default_attribute_values); 147bf215546Sopenharmony_ci pipeline->default_attribute_values = NULL; 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci if (pipeline->executables.mem_ctx) 151bf215546Sopenharmony_ci ralloc_free(pipeline->executables.mem_ctx); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci vk_object_free(&device->vk, pAllocator, pipeline); 154bf215546Sopenharmony_ci} 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 157bf215546Sopenharmony_civ3dv_DestroyPipeline(VkDevice _device, 158bf215546Sopenharmony_ci VkPipeline _pipeline, 159bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 160bf215546Sopenharmony_ci{ 161bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 162bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci if (!pipeline) 165bf215546Sopenharmony_ci return; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci v3dv_destroy_pipeline(pipeline, device, pAllocator); 168bf215546Sopenharmony_ci} 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_cistatic const struct spirv_to_nir_options default_spirv_options = { 171bf215546Sopenharmony_ci .caps = { 172bf215546Sopenharmony_ci .device_group = true, 173bf215546Sopenharmony_ci .float_controls = true, 174bf215546Sopenharmony_ci .multiview = true, 175bf215546Sopenharmony_ci .storage_8bit = true, 176bf215546Sopenharmony_ci .storage_16bit = true, 177bf215546Sopenharmony_ci .subgroup_basic = true, 178bf215546Sopenharmony_ci .variable_pointers = true, 179bf215546Sopenharmony_ci .vk_memory_model = true, 180bf215546Sopenharmony_ci .vk_memory_model_device_scope = true, 181bf215546Sopenharmony_ci .physical_storage_buffer_address = true, 182bf215546Sopenharmony_ci }, 183bf215546Sopenharmony_ci .ubo_addr_format = nir_address_format_32bit_index_offset, 184bf215546Sopenharmony_ci .ssbo_addr_format = nir_address_format_32bit_index_offset, 185bf215546Sopenharmony_ci .phys_ssbo_addr_format = nir_address_format_2x32bit_global, 186bf215546Sopenharmony_ci .push_const_addr_format = nir_address_format_logical, 187bf215546Sopenharmony_ci .shared_addr_format = nir_address_format_32bit_offset, 188bf215546Sopenharmony_ci}; 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ciconst nir_shader_compiler_options v3dv_nir_options = { 191bf215546Sopenharmony_ci .lower_uadd_sat = true, 192bf215546Sopenharmony_ci .lower_usub_sat = true, 193bf215546Sopenharmony_ci .lower_iadd_sat = true, 194bf215546Sopenharmony_ci .lower_all_io_to_temps = true, 195bf215546Sopenharmony_ci .lower_extract_byte = true, 196bf215546Sopenharmony_ci .lower_extract_word = true, 197bf215546Sopenharmony_ci .lower_insert_byte = true, 198bf215546Sopenharmony_ci .lower_insert_word = true, 199bf215546Sopenharmony_ci .lower_bitfield_insert_to_shifts = true, 200bf215546Sopenharmony_ci .lower_bitfield_extract_to_shifts = true, 201bf215546Sopenharmony_ci .lower_bitfield_reverse = true, 202bf215546Sopenharmony_ci .lower_bit_count = true, 203bf215546Sopenharmony_ci .lower_cs_local_id_to_index = true, 204bf215546Sopenharmony_ci .lower_ffract = true, 205bf215546Sopenharmony_ci .lower_fmod = true, 206bf215546Sopenharmony_ci .lower_pack_unorm_2x16 = true, 207bf215546Sopenharmony_ci .lower_pack_snorm_2x16 = true, 208bf215546Sopenharmony_ci .lower_unpack_unorm_2x16 = true, 209bf215546Sopenharmony_ci .lower_unpack_snorm_2x16 = true, 210bf215546Sopenharmony_ci .lower_pack_unorm_4x8 = true, 211bf215546Sopenharmony_ci .lower_pack_snorm_4x8 = true, 212bf215546Sopenharmony_ci .lower_unpack_unorm_4x8 = true, 213bf215546Sopenharmony_ci .lower_unpack_snorm_4x8 = true, 214bf215546Sopenharmony_ci .lower_pack_half_2x16 = true, 215bf215546Sopenharmony_ci .lower_unpack_half_2x16 = true, 216bf215546Sopenharmony_ci .lower_pack_32_2x16 = true, 217bf215546Sopenharmony_ci .lower_pack_32_2x16_split = true, 218bf215546Sopenharmony_ci .lower_unpack_32_2x16_split = true, 219bf215546Sopenharmony_ci .lower_mul_2x32_64 = true, 220bf215546Sopenharmony_ci .lower_fdiv = true, 221bf215546Sopenharmony_ci .lower_find_lsb = true, 222bf215546Sopenharmony_ci .lower_ffma16 = true, 223bf215546Sopenharmony_ci .lower_ffma32 = true, 224bf215546Sopenharmony_ci .lower_ffma64 = true, 225bf215546Sopenharmony_ci .lower_flrp32 = true, 226bf215546Sopenharmony_ci .lower_fpow = true, 227bf215546Sopenharmony_ci .lower_fsat = true, 228bf215546Sopenharmony_ci .lower_fsqrt = true, 229bf215546Sopenharmony_ci .lower_ifind_msb = true, 230bf215546Sopenharmony_ci .lower_isign = true, 231bf215546Sopenharmony_ci .lower_ldexp = true, 232bf215546Sopenharmony_ci .lower_mul_high = true, 233bf215546Sopenharmony_ci .lower_wpos_pntc = true, 234bf215546Sopenharmony_ci .lower_rotate = true, 235bf215546Sopenharmony_ci .lower_to_scalar = true, 236bf215546Sopenharmony_ci .lower_device_index_to_zero = true, 237bf215546Sopenharmony_ci .has_fsub = true, 238bf215546Sopenharmony_ci .has_isub = true, 239bf215546Sopenharmony_ci .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic 240bf215546Sopenharmony_ci * needs to be supported */ 241bf215546Sopenharmony_ci .lower_interpolate_at = true, 242bf215546Sopenharmony_ci .max_unroll_iterations = 16, 243bf215546Sopenharmony_ci .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp), 244bf215546Sopenharmony_ci .divergence_analysis_options = 245bf215546Sopenharmony_ci nir_divergence_multiple_workgroup_per_compute_subgroup 246bf215546Sopenharmony_ci}; 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ciconst nir_shader_compiler_options * 249bf215546Sopenharmony_civ3dv_pipeline_get_nir_options(void) 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci return &v3dv_nir_options; 252bf215546Sopenharmony_ci} 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci#define OPT(pass, ...) ({ \ 255bf215546Sopenharmony_ci bool this_progress = false; \ 256bf215546Sopenharmony_ci NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 257bf215546Sopenharmony_ci if (this_progress) \ 258bf215546Sopenharmony_ci progress = true; \ 259bf215546Sopenharmony_ci this_progress; \ 260bf215546Sopenharmony_ci}) 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_cistatic void 263bf215546Sopenharmony_cinir_optimize(nir_shader *nir, bool allow_copies) 264bf215546Sopenharmony_ci{ 265bf215546Sopenharmony_ci bool progress; 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci do { 268bf215546Sopenharmony_ci progress = false; 269bf215546Sopenharmony_ci OPT(nir_split_array_vars, nir_var_function_temp); 270bf215546Sopenharmony_ci OPT(nir_shrink_vec_array_vars, nir_var_function_temp); 271bf215546Sopenharmony_ci OPT(nir_opt_deref); 272bf215546Sopenharmony_ci OPT(nir_lower_vars_to_ssa); 273bf215546Sopenharmony_ci if (allow_copies) { 274bf215546Sopenharmony_ci /* Only run this pass in the first call to nir_optimize. Later calls 275bf215546Sopenharmony_ci * assume that we've lowered away any copy_deref instructions and we 276bf215546Sopenharmony_ci * don't want to introduce any more. 277bf215546Sopenharmony_ci */ 278bf215546Sopenharmony_ci OPT(nir_opt_find_array_copies); 279bf215546Sopenharmony_ci } 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci OPT(nir_remove_dead_variables, 282bf215546Sopenharmony_ci (nir_variable_mode)(nir_var_function_temp | 283bf215546Sopenharmony_ci nir_var_shader_temp | 284bf215546Sopenharmony_ci nir_var_mem_shared), 285bf215546Sopenharmony_ci NULL); 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci OPT(nir_opt_copy_prop_vars); 288bf215546Sopenharmony_ci OPT(nir_opt_dead_write_vars); 289bf215546Sopenharmony_ci OPT(nir_opt_combine_stores, nir_var_all); 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci OPT(nir_lower_alu_to_scalar, NULL, NULL); 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci OPT(nir_copy_prop); 294bf215546Sopenharmony_ci OPT(nir_lower_phis_to_scalar, false); 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci OPT(nir_copy_prop); 297bf215546Sopenharmony_ci OPT(nir_opt_dce); 298bf215546Sopenharmony_ci OPT(nir_opt_cse); 299bf215546Sopenharmony_ci OPT(nir_opt_combine_stores, nir_var_all); 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci /* Passing 0 to the peephole select pass causes it to convert 302bf215546Sopenharmony_ci * if-statements that contain only move instructions in the branches 303bf215546Sopenharmony_ci * regardless of the count. 304bf215546Sopenharmony_ci * 305bf215546Sopenharmony_ci * Passing 1 to the peephole select pass causes it to convert 306bf215546Sopenharmony_ci * if-statements that contain at most a single ALU instruction (total) 307bf215546Sopenharmony_ci * in both branches. 308bf215546Sopenharmony_ci */ 309bf215546Sopenharmony_ci OPT(nir_opt_peephole_select, 0, false, false); 310bf215546Sopenharmony_ci OPT(nir_opt_peephole_select, 8, false, true); 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci OPT(nir_opt_intrinsics); 313bf215546Sopenharmony_ci OPT(nir_opt_idiv_const, 32); 314bf215546Sopenharmony_ci OPT(nir_opt_algebraic); 315bf215546Sopenharmony_ci OPT(nir_lower_alu); 316bf215546Sopenharmony_ci OPT(nir_opt_constant_folding); 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci OPT(nir_opt_dead_cf); 319bf215546Sopenharmony_ci if (nir_opt_trivial_continues(nir)) { 320bf215546Sopenharmony_ci progress = true; 321bf215546Sopenharmony_ci OPT(nir_copy_prop); 322bf215546Sopenharmony_ci OPT(nir_opt_dce); 323bf215546Sopenharmony_ci } 324bf215546Sopenharmony_ci OPT(nir_opt_conditional_discard); 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci OPT(nir_opt_remove_phis); 327bf215546Sopenharmony_ci OPT(nir_opt_gcm, false); 328bf215546Sopenharmony_ci OPT(nir_opt_if, nir_opt_if_optimize_phi_true_false); 329bf215546Sopenharmony_ci OPT(nir_opt_undef); 330bf215546Sopenharmony_ci OPT(nir_lower_pack); 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci /* There are two optimizations that we don't do here, and we rely on the 333bf215546Sopenharmony_ci * backend: 334bf215546Sopenharmony_ci * 335bf215546Sopenharmony_ci * nir_lower_flrp only needs to be called once, as nothing should 336bf215546Sopenharmony_ci * rematerialize any flrps. As we are already calling it on the backend 337bf215546Sopenharmony_ci * compiler, we don't call it again. 338bf215546Sopenharmony_ci * 339bf215546Sopenharmony_ci * nir_opt_loop_unroll: as the backend includes custom strategies in 340bf215546Sopenharmony_ci * order to get the lowest spill/fills possible, and some of them 341bf215546Sopenharmony_ci * include disable loop unrolling. 342bf215546Sopenharmony_ci * 343bf215546Sopenharmony_ci * FIXME: ideally we would like to just remove this method and 344bf215546Sopenharmony_ci * v3d_optimize_nir. But: 345bf215546Sopenharmony_ci * 346bf215546Sopenharmony_ci * * Using it leads to some regressions on Vulkan CTS tests, due to 347bf215546Sopenharmony_ci * some lowering use there 348bf215546Sopenharmony_ci * * We would need to move to the backend some additional 349bf215546Sopenharmony_ci * lowerings/optimizations that are used on the Vulkan 350bf215546Sopenharmony_ci * frontend. That would require to check that we are not getting any 351bf215546Sopenharmony_ci * regression or performance drop on OpenGL 352bf215546Sopenharmony_ci * 353bf215546Sopenharmony_ci * For now we would keep this Vulkan fronted nir_optimize 354bf215546Sopenharmony_ci */ 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci } while (progress); 357bf215546Sopenharmony_ci} 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_cistatic void 360bf215546Sopenharmony_cipreprocess_nir(nir_shader *nir) 361bf215546Sopenharmony_ci{ 362bf215546Sopenharmony_ci const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { 363bf215546Sopenharmony_ci .frag_coord = true, 364bf215546Sopenharmony_ci .point_coord = true, 365bf215546Sopenharmony_ci }; 366bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci /* Vulkan uses the separate-shader linking model */ 369bf215546Sopenharmony_ci nir->info.separate_shader = true; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci /* Make sure we lower variable initializers on output variables so that 372bf215546Sopenharmony_ci * nir_remove_dead_variables below sees the corresponding stores 373bf215546Sopenharmony_ci */ 374bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out); 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT) 377bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out); 378bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT) { 379bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_input_attachments, 380bf215546Sopenharmony_ci &(nir_input_attachment_options) { 381bf215546Sopenharmony_ci .use_fragcoord_sysval = false, 382bf215546Sopenharmony_ci }); 383bf215546Sopenharmony_ci } 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_to_temporaries, 386bf215546Sopenharmony_ci nir_shader_get_entrypoint(nir), true, false); 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_system_values); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL); 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_normalize_cubemap_coords); 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_global_vars_to_local); 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_split_var_copies); 397bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp); 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci nir_optimize(nir, true); 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, 402bf215546Sopenharmony_ci nir_var_mem_push_const, 403bf215546Sopenharmony_ci nir_address_format_32bit_offset); 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, 406bf215546Sopenharmony_ci nir_var_mem_ubo | nir_var_mem_ssbo, 407bf215546Sopenharmony_ci nir_address_format_32bit_index_offset); 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, 410bf215546Sopenharmony_ci nir_var_mem_global, 411bf215546Sopenharmony_ci nir_address_format_2x32bit_global); 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_load_const_to_scalar); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci /* Lower a bunch of stuff */ 416bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_var_copies); 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_indirect_derefs, 421bf215546Sopenharmony_ci nir_var_function_temp, 2); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_array_deref_of_vec, 424bf215546Sopenharmony_ci nir_var_mem_ubo | nir_var_mem_ssbo, 425bf215546Sopenharmony_ci nir_lower_direct_array_deref_of_vec_load); 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_frexp); 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci /* Get rid of split copies */ 430bf215546Sopenharmony_ci nir_optimize(nir, false); 431bf215546Sopenharmony_ci} 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_cistatic nir_shader * 434bf215546Sopenharmony_cishader_module_compile_to_nir(struct v3dv_device *device, 435bf215546Sopenharmony_ci struct v3dv_pipeline_stage *stage) 436bf215546Sopenharmony_ci{ 437bf215546Sopenharmony_ci nir_shader *nir; 438bf215546Sopenharmony_ci const nir_shader_compiler_options *nir_options = &v3dv_nir_options; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) && stage->module->nir == NULL) 442bf215546Sopenharmony_ci v3dv_print_spirv(stage->module->data, stage->module->size, stderr); 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci /* vk_shader_module_to_nir also handles internal shaders, when module->nir 445bf215546Sopenharmony_ci * != NULL. It also calls nir_validate_shader on both cases, so we don't 446bf215546Sopenharmony_ci * call it again here. 447bf215546Sopenharmony_ci */ 448bf215546Sopenharmony_ci VkResult result = vk_shader_module_to_nir(&device->vk, stage->module, 449bf215546Sopenharmony_ci broadcom_shader_stage_to_gl(stage->stage), 450bf215546Sopenharmony_ci stage->entrypoint, 451bf215546Sopenharmony_ci stage->spec_info, 452bf215546Sopenharmony_ci &default_spirv_options, 453bf215546Sopenharmony_ci nir_options, 454bf215546Sopenharmony_ci NULL, &nir); 455bf215546Sopenharmony_ci if (result != VK_SUCCESS) 456bf215546Sopenharmony_ci return NULL; 457bf215546Sopenharmony_ci assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage)); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERDB) && stage->module->nir == NULL) { 460bf215546Sopenharmony_ci char sha1buf[41]; 461bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, stage->pipeline->sha1); 462bf215546Sopenharmony_ci nir->info.name = ralloc_strdup(nir, sha1buf); 463bf215546Sopenharmony_ci } 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 466bf215546Sopenharmony_ci v3d_debug_flag_for_shader_stage( 467bf215546Sopenharmony_ci broadcom_shader_stage_to_gl(stage->stage))))) { 468bf215546Sopenharmony_ci fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n", 469bf215546Sopenharmony_ci broadcom_shader_stage_name(stage->stage), 470bf215546Sopenharmony_ci stage->program_id); 471bf215546Sopenharmony_ci nir_print_shader(nir, stderr); 472bf215546Sopenharmony_ci fprintf(stderr, "\n"); 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci preprocess_nir(nir); 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci return nir; 478bf215546Sopenharmony_ci} 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_cistatic int 481bf215546Sopenharmony_citype_size_vec4(const struct glsl_type *type, bool bindless) 482bf215546Sopenharmony_ci{ 483bf215546Sopenharmony_ci return glsl_count_attribute_slots(type, false); 484bf215546Sopenharmony_ci} 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci/* FIXME: the number of parameters for this method is somewhat big. Perhaps 487bf215546Sopenharmony_ci * rethink. 488bf215546Sopenharmony_ci */ 489bf215546Sopenharmony_cistatic unsigned 490bf215546Sopenharmony_cidescriptor_map_add(struct v3dv_descriptor_map *map, 491bf215546Sopenharmony_ci int set, 492bf215546Sopenharmony_ci int binding, 493bf215546Sopenharmony_ci int array_index, 494bf215546Sopenharmony_ci int array_size, 495bf215546Sopenharmony_ci int start_index, 496bf215546Sopenharmony_ci uint8_t return_size) 497bf215546Sopenharmony_ci{ 498bf215546Sopenharmony_ci assert(array_index < array_size); 499bf215546Sopenharmony_ci assert(return_size == 16 || return_size == 32); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci unsigned index = start_index; 502bf215546Sopenharmony_ci for (; index < map->num_desc; index++) { 503bf215546Sopenharmony_ci if (map->used[index] && 504bf215546Sopenharmony_ci set == map->set[index] && 505bf215546Sopenharmony_ci binding == map->binding[index] && 506bf215546Sopenharmony_ci array_index == map->array_index[index]) { 507bf215546Sopenharmony_ci assert(array_size == map->array_size[index]); 508bf215546Sopenharmony_ci if (return_size != map->return_size[index]) { 509bf215546Sopenharmony_ci /* It the return_size is different it means that the same sampler 510bf215546Sopenharmony_ci * was used for operations with different precision 511bf215546Sopenharmony_ci * requirement. In this case we need to ensure that we use the 512bf215546Sopenharmony_ci * larger one. 513bf215546Sopenharmony_ci */ 514bf215546Sopenharmony_ci map->return_size[index] = 32; 515bf215546Sopenharmony_ci } 516bf215546Sopenharmony_ci return index; 517bf215546Sopenharmony_ci } else if (!map->used[index]) { 518bf215546Sopenharmony_ci break; 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci } 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci assert(index < DESCRIPTOR_MAP_SIZE); 523bf215546Sopenharmony_ci assert(!map->used[index]); 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci map->used[index] = true; 526bf215546Sopenharmony_ci map->set[index] = set; 527bf215546Sopenharmony_ci map->binding[index] = binding; 528bf215546Sopenharmony_ci map->array_index[index] = array_index; 529bf215546Sopenharmony_ci map->array_size[index] = array_size; 530bf215546Sopenharmony_ci map->return_size[index] = return_size; 531bf215546Sopenharmony_ci map->num_desc = MAX2(map->num_desc, index + 1); 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci return index; 534bf215546Sopenharmony_ci} 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_cistruct lower_pipeline_layout_state { 537bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline; 538bf215546Sopenharmony_ci const struct v3dv_pipeline_layout *layout; 539bf215546Sopenharmony_ci bool needs_default_sampler_state; 540bf215546Sopenharmony_ci}; 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_cistatic void 544bf215546Sopenharmony_cilower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr, 545bf215546Sopenharmony_ci struct lower_pipeline_layout_state *state) 546bf215546Sopenharmony_ci{ 547bf215546Sopenharmony_ci assert(instr->intrinsic == nir_intrinsic_load_push_constant); 548bf215546Sopenharmony_ci instr->intrinsic = nir_intrinsic_load_uniform; 549bf215546Sopenharmony_ci} 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_cistatic struct v3dv_descriptor_map* 552bf215546Sopenharmony_cipipeline_get_descriptor_map(struct v3dv_pipeline *pipeline, 553bf215546Sopenharmony_ci VkDescriptorType desc_type, 554bf215546Sopenharmony_ci gl_shader_stage gl_stage, 555bf215546Sopenharmony_ci bool is_sampler) 556bf215546Sopenharmony_ci{ 557bf215546Sopenharmony_ci enum broadcom_shader_stage broadcom_stage = 558bf215546Sopenharmony_ci gl_shader_stage_to_broadcom(gl_stage); 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci assert(pipeline->shared_data && 561bf215546Sopenharmony_ci pipeline->shared_data->maps[broadcom_stage]); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci switch(desc_type) { 564bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_SAMPLER: 565bf215546Sopenharmony_ci return &pipeline->shared_data->maps[broadcom_stage]->sampler_map; 566bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 567bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 568bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 569bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 570bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 571bf215546Sopenharmony_ci return &pipeline->shared_data->maps[broadcom_stage]->texture_map; 572bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 573bf215546Sopenharmony_ci return is_sampler ? 574bf215546Sopenharmony_ci &pipeline->shared_data->maps[broadcom_stage]->sampler_map : 575bf215546Sopenharmony_ci &pipeline->shared_data->maps[broadcom_stage]->texture_map; 576bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 577bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 578bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: 579bf215546Sopenharmony_ci return &pipeline->shared_data->maps[broadcom_stage]->ubo_map; 580bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 581bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 582bf215546Sopenharmony_ci return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map; 583bf215546Sopenharmony_ci default: 584bf215546Sopenharmony_ci unreachable("Descriptor type unknown or not having a descriptor map"); 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci} 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci/* Gathers info from the intrinsic (set and binding) and then lowers it so it 589bf215546Sopenharmony_ci * could be used by the v3d_compiler */ 590bf215546Sopenharmony_cistatic void 591bf215546Sopenharmony_cilower_vulkan_resource_index(nir_builder *b, 592bf215546Sopenharmony_ci nir_intrinsic_instr *instr, 593bf215546Sopenharmony_ci struct lower_pipeline_layout_state *state) 594bf215546Sopenharmony_ci{ 595bf215546Sopenharmony_ci assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index); 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci unsigned set = nir_intrinsic_desc_set(instr); 600bf215546Sopenharmony_ci unsigned binding = nir_intrinsic_binding(instr); 601bf215546Sopenharmony_ci struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; 602bf215546Sopenharmony_ci struct v3dv_descriptor_set_binding_layout *binding_layout = 603bf215546Sopenharmony_ci &set_layout->binding[binding]; 604bf215546Sopenharmony_ci unsigned index = 0; 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci switch (binding_layout->type) { 607bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 608bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 609bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 610bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 611bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: { 612bf215546Sopenharmony_ci struct v3dv_descriptor_map *descriptor_map = 613bf215546Sopenharmony_ci pipeline_get_descriptor_map(state->pipeline, binding_layout->type, 614bf215546Sopenharmony_ci b->shader->info.stage, false); 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci if (!const_val) 617bf215546Sopenharmony_ci unreachable("non-constant vulkan_resource_index array index"); 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci /* At compile-time we will need to know if we are processing a UBO load 620bf215546Sopenharmony_ci * for an inline or a regular UBO so we can handle inline loads like 621bf215546Sopenharmony_ci * push constants. At the level of NIR level however, the inline 622bf215546Sopenharmony_ci * information is gone, so we rely on the index to make this distinction. 623bf215546Sopenharmony_ci * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for 624bf215546Sopenharmony_ci * inline buffers. This means that at the descriptor map level 625bf215546Sopenharmony_ci * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1, 626bf215546Sopenharmony_ci * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS. 627bf215546Sopenharmony_ci */ 628bf215546Sopenharmony_ci uint32_t start_index = 0; 629bf215546Sopenharmony_ci if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || 630bf215546Sopenharmony_ci binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { 631bf215546Sopenharmony_ci start_index = MAX_INLINE_UNIFORM_BUFFERS; 632bf215546Sopenharmony_ci } 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci index = descriptor_map_add(descriptor_map, set, binding, 635bf215546Sopenharmony_ci const_val->u32, 636bf215546Sopenharmony_ci binding_layout->array_size, 637bf215546Sopenharmony_ci start_index, 638bf215546Sopenharmony_ci 32 /* return_size: doesn't really apply for this case */); 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci /* We always reserve index 0 for push constants */ 641bf215546Sopenharmony_ci if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || 642bf215546Sopenharmony_ci binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || 643bf215546Sopenharmony_ci binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { 644bf215546Sopenharmony_ci index++; 645bf215546Sopenharmony_ci } 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci break; 648bf215546Sopenharmony_ci } 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci default: 651bf215546Sopenharmony_ci unreachable("unsupported descriptor type for vulkan_resource_index"); 652bf215546Sopenharmony_ci break; 653bf215546Sopenharmony_ci } 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci /* Since we use the deref pass, both vulkan_resource_index and 656bf215546Sopenharmony_ci * vulkan_load_descriptor return a vec2 providing an index and 657bf215546Sopenharmony_ci * offset. Our backend compiler only cares about the index part. 658bf215546Sopenharmony_ci */ 659bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&instr->dest.ssa, 660bf215546Sopenharmony_ci nir_imm_ivec2(b, index, 0)); 661bf215546Sopenharmony_ci nir_instr_remove(&instr->instr); 662bf215546Sopenharmony_ci} 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci/* Returns return_size, so it could be used for the case of not having a 665bf215546Sopenharmony_ci * sampler object 666bf215546Sopenharmony_ci */ 667bf215546Sopenharmony_cistatic uint8_t 668bf215546Sopenharmony_cilower_tex_src_to_offset(nir_builder *b, 669bf215546Sopenharmony_ci nir_tex_instr *instr, 670bf215546Sopenharmony_ci unsigned src_idx, 671bf215546Sopenharmony_ci struct lower_pipeline_layout_state *state) 672bf215546Sopenharmony_ci{ 673bf215546Sopenharmony_ci nir_ssa_def *index = NULL; 674bf215546Sopenharmony_ci unsigned base_index = 0; 675bf215546Sopenharmony_ci unsigned array_elements = 1; 676bf215546Sopenharmony_ci nir_tex_src *src = &instr->src[src_idx]; 677bf215546Sopenharmony_ci bool is_sampler = src->src_type == nir_tex_src_sampler_deref; 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci /* We compute first the offsets */ 680bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr); 681bf215546Sopenharmony_ci while (deref->deref_type != nir_deref_type_var) { 682bf215546Sopenharmony_ci assert(deref->parent.is_ssa); 683bf215546Sopenharmony_ci nir_deref_instr *parent = 684bf215546Sopenharmony_ci nir_instr_as_deref(deref->parent.ssa->parent_instr); 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci assert(deref->deref_type == nir_deref_type_array); 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci if (nir_src_is_const(deref->arr.index) && index == NULL) { 689bf215546Sopenharmony_ci /* We're still building a direct index */ 690bf215546Sopenharmony_ci base_index += nir_src_as_uint(deref->arr.index) * array_elements; 691bf215546Sopenharmony_ci } else { 692bf215546Sopenharmony_ci if (index == NULL) { 693bf215546Sopenharmony_ci /* We used to be direct but not anymore */ 694bf215546Sopenharmony_ci index = nir_imm_int(b, base_index); 695bf215546Sopenharmony_ci base_index = 0; 696bf215546Sopenharmony_ci } 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci index = nir_iadd(b, index, 699bf215546Sopenharmony_ci nir_imul(b, nir_imm_int(b, array_elements), 700bf215546Sopenharmony_ci nir_ssa_for_src(b, deref->arr.index, 1))); 701bf215546Sopenharmony_ci } 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci array_elements *= glsl_get_length(parent->type); 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci deref = parent; 706bf215546Sopenharmony_ci } 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci if (index) 709bf215546Sopenharmony_ci index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci /* We have the offsets, we apply them, rewriting the source or removing 712bf215546Sopenharmony_ci * instr if needed 713bf215546Sopenharmony_ci */ 714bf215546Sopenharmony_ci if (index) { 715bf215546Sopenharmony_ci nir_instr_rewrite_src(&instr->instr, &src->src, 716bf215546Sopenharmony_ci nir_src_for_ssa(index)); 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci src->src_type = is_sampler ? 719bf215546Sopenharmony_ci nir_tex_src_sampler_offset : 720bf215546Sopenharmony_ci nir_tex_src_texture_offset; 721bf215546Sopenharmony_ci } else { 722bf215546Sopenharmony_ci nir_tex_instr_remove_src(instr, src_idx); 723bf215546Sopenharmony_ci } 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci uint32_t set = deref->var->data.descriptor_set; 726bf215546Sopenharmony_ci uint32_t binding = deref->var->data.binding; 727bf215546Sopenharmony_ci /* FIXME: this is a really simplified check for the precision to be used 728bf215546Sopenharmony_ci * for the sampling. Right now we are ony checking for the variables used 729bf215546Sopenharmony_ci * on the operation itself, but there are other cases that we could use to 730bf215546Sopenharmony_ci * infer the precision requirement. 731bf215546Sopenharmony_ci */ 732bf215546Sopenharmony_ci bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM || 733bf215546Sopenharmony_ci deref->var->data.precision == GLSL_PRECISION_LOW; 734bf215546Sopenharmony_ci struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; 735bf215546Sopenharmony_ci struct v3dv_descriptor_set_binding_layout *binding_layout = 736bf215546Sopenharmony_ci &set_layout->binding[binding]; 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci /* For input attachments, the shader includes the attachment_idx. As we are 739bf215546Sopenharmony_ci * treating them as a texture, we only want the base_index 740bf215546Sopenharmony_ci */ 741bf215546Sopenharmony_ci uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ? 742bf215546Sopenharmony_ci deref->var->data.index + base_index : 743bf215546Sopenharmony_ci base_index; 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_ci uint8_t return_size; 746bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT)) 747bf215546Sopenharmony_ci return_size = 16; 748bf215546Sopenharmony_ci else if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT)) 749bf215546Sopenharmony_ci return_size = 32; 750bf215546Sopenharmony_ci else 751bf215546Sopenharmony_ci return_size = relaxed_precision || instr->is_shadow ? 16 : 32; 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci struct v3dv_descriptor_map *map = 754bf215546Sopenharmony_ci pipeline_get_descriptor_map(state->pipeline, binding_layout->type, 755bf215546Sopenharmony_ci b->shader->info.stage, is_sampler); 756bf215546Sopenharmony_ci int desc_index = 757bf215546Sopenharmony_ci descriptor_map_add(map, 758bf215546Sopenharmony_ci deref->var->data.descriptor_set, 759bf215546Sopenharmony_ci deref->var->data.binding, 760bf215546Sopenharmony_ci array_index, 761bf215546Sopenharmony_ci binding_layout->array_size, 762bf215546Sopenharmony_ci 0, 763bf215546Sopenharmony_ci return_size); 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci if (is_sampler) 766bf215546Sopenharmony_ci instr->sampler_index = desc_index; 767bf215546Sopenharmony_ci else 768bf215546Sopenharmony_ci instr->texture_index = desc_index; 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci return return_size; 771bf215546Sopenharmony_ci} 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_cistatic bool 774bf215546Sopenharmony_cilower_sampler(nir_builder *b, 775bf215546Sopenharmony_ci nir_tex_instr *instr, 776bf215546Sopenharmony_ci struct lower_pipeline_layout_state *state) 777bf215546Sopenharmony_ci{ 778bf215546Sopenharmony_ci uint8_t return_size = 0; 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci int texture_idx = 781bf215546Sopenharmony_ci nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci if (texture_idx >= 0) 784bf215546Sopenharmony_ci return_size = lower_tex_src_to_offset(b, instr, texture_idx, state); 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci int sampler_idx = 787bf215546Sopenharmony_ci nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci if (sampler_idx >= 0) 790bf215546Sopenharmony_ci lower_tex_src_to_offset(b, instr, sampler_idx, state); 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci if (texture_idx < 0 && sampler_idx < 0) 793bf215546Sopenharmony_ci return false; 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci /* If we don't have a sampler, we assign it the idx we reserve for this 796bf215546Sopenharmony_ci * case, and we ensure that it is using the correct return size. 797bf215546Sopenharmony_ci */ 798bf215546Sopenharmony_ci if (sampler_idx < 0) { 799bf215546Sopenharmony_ci state->needs_default_sampler_state = true; 800bf215546Sopenharmony_ci instr->sampler_index = return_size == 16 ? 801bf215546Sopenharmony_ci V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX; 802bf215546Sopenharmony_ci } 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci return true; 805bf215546Sopenharmony_ci} 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */ 808bf215546Sopenharmony_cistatic void 809bf215546Sopenharmony_cilower_image_deref(nir_builder *b, 810bf215546Sopenharmony_ci nir_intrinsic_instr *instr, 811bf215546Sopenharmony_ci struct lower_pipeline_layout_state *state) 812bf215546Sopenharmony_ci{ 813bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); 814bf215546Sopenharmony_ci nir_ssa_def *index = NULL; 815bf215546Sopenharmony_ci unsigned array_elements = 1; 816bf215546Sopenharmony_ci unsigned base_index = 0; 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci while (deref->deref_type != nir_deref_type_var) { 819bf215546Sopenharmony_ci assert(deref->parent.is_ssa); 820bf215546Sopenharmony_ci nir_deref_instr *parent = 821bf215546Sopenharmony_ci nir_instr_as_deref(deref->parent.ssa->parent_instr); 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci assert(deref->deref_type == nir_deref_type_array); 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci if (nir_src_is_const(deref->arr.index) && index == NULL) { 826bf215546Sopenharmony_ci /* We're still building a direct index */ 827bf215546Sopenharmony_ci base_index += nir_src_as_uint(deref->arr.index) * array_elements; 828bf215546Sopenharmony_ci } else { 829bf215546Sopenharmony_ci if (index == NULL) { 830bf215546Sopenharmony_ci /* We used to be direct but not anymore */ 831bf215546Sopenharmony_ci index = nir_imm_int(b, base_index); 832bf215546Sopenharmony_ci base_index = 0; 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci index = nir_iadd(b, index, 836bf215546Sopenharmony_ci nir_imul(b, nir_imm_int(b, array_elements), 837bf215546Sopenharmony_ci nir_ssa_for_src(b, deref->arr.index, 1))); 838bf215546Sopenharmony_ci } 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci array_elements *= glsl_get_length(parent->type); 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci deref = parent; 843bf215546Sopenharmony_ci } 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_ci if (index) 846bf215546Sopenharmony_ci index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci uint32_t set = deref->var->data.descriptor_set; 849bf215546Sopenharmony_ci uint32_t binding = deref->var->data.binding; 850bf215546Sopenharmony_ci struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; 851bf215546Sopenharmony_ci struct v3dv_descriptor_set_binding_layout *binding_layout = 852bf215546Sopenharmony_ci &set_layout->binding[binding]; 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci uint32_t array_index = deref->var->data.index + base_index; 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || 857bf215546Sopenharmony_ci binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci struct v3dv_descriptor_map *map = 860bf215546Sopenharmony_ci pipeline_get_descriptor_map(state->pipeline, binding_layout->type, 861bf215546Sopenharmony_ci b->shader->info.stage, false); 862bf215546Sopenharmony_ci 863bf215546Sopenharmony_ci int desc_index = 864bf215546Sopenharmony_ci descriptor_map_add(map, 865bf215546Sopenharmony_ci deref->var->data.descriptor_set, 866bf215546Sopenharmony_ci deref->var->data.binding, 867bf215546Sopenharmony_ci array_index, 868bf215546Sopenharmony_ci binding_layout->array_size, 869bf215546Sopenharmony_ci 0, 870bf215546Sopenharmony_ci 32 /* return_size: doesn't apply for textures */); 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci /* Note: we don't need to do anything here in relation to the precision and 873bf215546Sopenharmony_ci * the output size because for images we can infer that info from the image 874bf215546Sopenharmony_ci * intrinsic, that includes the image format (see 875bf215546Sopenharmony_ci * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler. 876bf215546Sopenharmony_ci */ 877bf215546Sopenharmony_ci 878bf215546Sopenharmony_ci index = nir_imm_int(b, desc_index); 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci nir_rewrite_image_intrinsic(instr, index, false); 881bf215546Sopenharmony_ci} 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_cistatic bool 884bf215546Sopenharmony_cilower_intrinsic(nir_builder *b, 885bf215546Sopenharmony_ci nir_intrinsic_instr *instr, 886bf215546Sopenharmony_ci struct lower_pipeline_layout_state *state) 887bf215546Sopenharmony_ci{ 888bf215546Sopenharmony_ci switch (instr->intrinsic) { 889bf215546Sopenharmony_ci case nir_intrinsic_load_push_constant: 890bf215546Sopenharmony_ci lower_load_push_constant(b, instr, state); 891bf215546Sopenharmony_ci return true; 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci case nir_intrinsic_vulkan_resource_index: 894bf215546Sopenharmony_ci lower_vulkan_resource_index(b, instr, state); 895bf215546Sopenharmony_ci return true; 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci case nir_intrinsic_load_vulkan_descriptor: { 898bf215546Sopenharmony_ci /* Loading the descriptor happens as part of load/store instructions, 899bf215546Sopenharmony_ci * so for us this is a no-op. 900bf215546Sopenharmony_ci */ 901bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa); 902bf215546Sopenharmony_ci nir_instr_remove(&instr->instr); 903bf215546Sopenharmony_ci return true; 904bf215546Sopenharmony_ci } 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci case nir_intrinsic_image_deref_load: 907bf215546Sopenharmony_ci case nir_intrinsic_image_deref_store: 908bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_add: 909bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_imin: 910bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_umin: 911bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_imax: 912bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_umax: 913bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_and: 914bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_or: 915bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_xor: 916bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_exchange: 917bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_comp_swap: 918bf215546Sopenharmony_ci case nir_intrinsic_image_deref_size: 919bf215546Sopenharmony_ci case nir_intrinsic_image_deref_samples: 920bf215546Sopenharmony_ci lower_image_deref(b, instr, state); 921bf215546Sopenharmony_ci return true; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci default: 924bf215546Sopenharmony_ci return false; 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci} 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_cistatic bool 929bf215546Sopenharmony_cilower_pipeline_layout_cb(nir_builder *b, 930bf215546Sopenharmony_ci nir_instr *instr, 931bf215546Sopenharmony_ci void *_state) 932bf215546Sopenharmony_ci{ 933bf215546Sopenharmony_ci bool progress = false; 934bf215546Sopenharmony_ci struct lower_pipeline_layout_state *state = _state; 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 937bf215546Sopenharmony_ci switch (instr->type) { 938bf215546Sopenharmony_ci case nir_instr_type_tex: 939bf215546Sopenharmony_ci progress |= lower_sampler(b, nir_instr_as_tex(instr), state); 940bf215546Sopenharmony_ci break; 941bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 942bf215546Sopenharmony_ci progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state); 943bf215546Sopenharmony_ci break; 944bf215546Sopenharmony_ci default: 945bf215546Sopenharmony_ci break; 946bf215546Sopenharmony_ci } 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci return progress; 949bf215546Sopenharmony_ci} 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_cistatic bool 952bf215546Sopenharmony_cilower_pipeline_layout_info(nir_shader *shader, 953bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline, 954bf215546Sopenharmony_ci const struct v3dv_pipeline_layout *layout, 955bf215546Sopenharmony_ci bool *needs_default_sampler_state) 956bf215546Sopenharmony_ci{ 957bf215546Sopenharmony_ci bool progress = false; 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_ci struct lower_pipeline_layout_state state = { 960bf215546Sopenharmony_ci .pipeline = pipeline, 961bf215546Sopenharmony_ci .layout = layout, 962bf215546Sopenharmony_ci .needs_default_sampler_state = false, 963bf215546Sopenharmony_ci }; 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb, 966bf215546Sopenharmony_ci nir_metadata_block_index | 967bf215546Sopenharmony_ci nir_metadata_dominance, 968bf215546Sopenharmony_ci &state); 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci *needs_default_sampler_state = state.needs_default_sampler_state; 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci return progress; 973bf215546Sopenharmony_ci} 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_cistatic void 977bf215546Sopenharmony_cilower_fs_io(nir_shader *nir) 978bf215546Sopenharmony_ci{ 979bf215546Sopenharmony_ci /* Our backend doesn't handle array fragment shader outputs */ 980bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 981bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL); 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 984bf215546Sopenharmony_ci MESA_SHADER_FRAGMENT); 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 987bf215546Sopenharmony_ci MESA_SHADER_FRAGMENT); 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 990bf215546Sopenharmony_ci type_size_vec4, 0); 991bf215546Sopenharmony_ci} 992bf215546Sopenharmony_ci 993bf215546Sopenharmony_cistatic void 994bf215546Sopenharmony_cilower_gs_io(struct nir_shader *nir) 995bf215546Sopenharmony_ci{ 996bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 997bf215546Sopenharmony_ci 998bf215546Sopenharmony_ci nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 999bf215546Sopenharmony_ci MESA_SHADER_GEOMETRY); 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 1002bf215546Sopenharmony_ci MESA_SHADER_GEOMETRY); 1003bf215546Sopenharmony_ci} 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_cistatic void 1006bf215546Sopenharmony_cilower_vs_io(struct nir_shader *nir) 1007bf215546Sopenharmony_ci{ 1008bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_ci nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 1011bf215546Sopenharmony_ci MESA_SHADER_VERTEX); 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 1014bf215546Sopenharmony_ci MESA_SHADER_VERTEX); 1015bf215546Sopenharmony_ci 1016bf215546Sopenharmony_ci /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it 1017bf215546Sopenharmony_ci * overlaps with v3d_nir_lower_io. Need further research though. 1018bf215546Sopenharmony_ci */ 1019bf215546Sopenharmony_ci} 1020bf215546Sopenharmony_ci 1021bf215546Sopenharmony_cistatic void 1022bf215546Sopenharmony_cishader_debug_output(const char *message, void *data) 1023bf215546Sopenharmony_ci{ 1024bf215546Sopenharmony_ci /* FIXME: We probably don't want to debug anything extra here, and in fact 1025bf215546Sopenharmony_ci * the compiler is not using this callback too much, only as an alternative 1026bf215546Sopenharmony_ci * way to debug out the shaderdb stats, that you can already get using 1027bf215546Sopenharmony_ci * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d 1028bf215546Sopenharmony_ci * compiler to remove that callback. 1029bf215546Sopenharmony_ci */ 1030bf215546Sopenharmony_ci} 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_cistatic void 1033bf215546Sopenharmony_cipipeline_populate_v3d_key(struct v3d_key *key, 1034bf215546Sopenharmony_ci const struct v3dv_pipeline_stage *p_stage, 1035bf215546Sopenharmony_ci uint32_t ucp_enables, 1036bf215546Sopenharmony_ci bool robust_buffer_access) 1037bf215546Sopenharmony_ci{ 1038bf215546Sopenharmony_ci assert(p_stage->pipeline->shared_data && 1039bf215546Sopenharmony_ci p_stage->pipeline->shared_data->maps[p_stage->stage]); 1040bf215546Sopenharmony_ci 1041bf215546Sopenharmony_ci /* The following values are default values used at pipeline create. We use 1042bf215546Sopenharmony_ci * there 32 bit as default return size. 1043bf215546Sopenharmony_ci */ 1044bf215546Sopenharmony_ci struct v3dv_descriptor_map *sampler_map = 1045bf215546Sopenharmony_ci &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map; 1046bf215546Sopenharmony_ci struct v3dv_descriptor_map *texture_map = 1047bf215546Sopenharmony_ci &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map; 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_ci key->num_tex_used = texture_map->num_desc; 1050bf215546Sopenharmony_ci assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS); 1051bf215546Sopenharmony_ci for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) { 1052bf215546Sopenharmony_ci key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X; 1053bf215546Sopenharmony_ci key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y; 1054bf215546Sopenharmony_ci key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z; 1055bf215546Sopenharmony_ci key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W; 1056bf215546Sopenharmony_ci } 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci key->num_samplers_used = sampler_map->num_desc; 1059bf215546Sopenharmony_ci assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS); 1060bf215546Sopenharmony_ci for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc; 1061bf215546Sopenharmony_ci sampler_idx++) { 1062bf215546Sopenharmony_ci key->sampler[sampler_idx].return_size = 1063bf215546Sopenharmony_ci sampler_map->return_size[sampler_idx]; 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci key->sampler[sampler_idx].return_channels = 1066bf215546Sopenharmony_ci key->sampler[sampler_idx].return_size == 32 ? 4 : 2; 1067bf215546Sopenharmony_ci } 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci switch (p_stage->stage) { 1070bf215546Sopenharmony_ci case BROADCOM_SHADER_VERTEX: 1071bf215546Sopenharmony_ci case BROADCOM_SHADER_VERTEX_BIN: 1072bf215546Sopenharmony_ci key->is_last_geometry_stage = p_stage->pipeline->gs == NULL; 1073bf215546Sopenharmony_ci break; 1074bf215546Sopenharmony_ci case BROADCOM_SHADER_GEOMETRY: 1075bf215546Sopenharmony_ci case BROADCOM_SHADER_GEOMETRY_BIN: 1076bf215546Sopenharmony_ci /* FIXME: while we don't implement tessellation shaders */ 1077bf215546Sopenharmony_ci key->is_last_geometry_stage = true; 1078bf215546Sopenharmony_ci break; 1079bf215546Sopenharmony_ci case BROADCOM_SHADER_FRAGMENT: 1080bf215546Sopenharmony_ci case BROADCOM_SHADER_COMPUTE: 1081bf215546Sopenharmony_ci key->is_last_geometry_stage = false; 1082bf215546Sopenharmony_ci break; 1083bf215546Sopenharmony_ci default: 1084bf215546Sopenharmony_ci unreachable("unsupported shader stage"); 1085bf215546Sopenharmony_ci } 1086bf215546Sopenharmony_ci 1087bf215546Sopenharmony_ci /* Vulkan doesn't have fixed function state for user clip planes. Instead, 1088bf215546Sopenharmony_ci * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler 1089bf215546Sopenharmony_ci * takes care of adding a single compact array variable at 1090bf215546Sopenharmony_ci * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering. 1091bf215546Sopenharmony_ci * 1092bf215546Sopenharmony_ci * The only lowering we are interested is specific to the fragment shader, 1093bf215546Sopenharmony_ci * where we want to emit discards to honor writes to gl_ClipDistance[] in 1094bf215546Sopenharmony_ci * previous stages. This is done via nir_lower_clip_fs() so we only set up 1095bf215546Sopenharmony_ci * the ucp enable mask for that stage. 1096bf215546Sopenharmony_ci */ 1097bf215546Sopenharmony_ci key->ucp_enables = ucp_enables; 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci key->robust_buffer_access = robust_buffer_access; 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_ci key->environment = V3D_ENVIRONMENT_VULKAN; 1102bf215546Sopenharmony_ci} 1103bf215546Sopenharmony_ci 1104bf215546Sopenharmony_ci/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the 1105bf215546Sopenharmony_ci * same. For not using prim_mode that is the one already used on v3d 1106bf215546Sopenharmony_ci */ 1107bf215546Sopenharmony_cistatic const enum pipe_prim_type vk_to_pipe_prim_type[] = { 1108bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS, 1109bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES, 1110bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP, 1111bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES, 1112bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP, 1113bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN, 1114bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY, 1115bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY, 1116bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY, 1117bf215546Sopenharmony_ci [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY, 1118bf215546Sopenharmony_ci}; 1119bf215546Sopenharmony_ci 1120bf215546Sopenharmony_cistatic const enum pipe_logicop vk_to_pipe_logicop[] = { 1121bf215546Sopenharmony_ci [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR, 1122bf215546Sopenharmony_ci [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND, 1123bf215546Sopenharmony_ci [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE, 1124bf215546Sopenharmony_ci [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY, 1125bf215546Sopenharmony_ci [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED, 1126bf215546Sopenharmony_ci [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP, 1127bf215546Sopenharmony_ci [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR, 1128bf215546Sopenharmony_ci [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR, 1129bf215546Sopenharmony_ci [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR, 1130bf215546Sopenharmony_ci [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV, 1131bf215546Sopenharmony_ci [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT, 1132bf215546Sopenharmony_ci [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE, 1133bf215546Sopenharmony_ci [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED, 1134bf215546Sopenharmony_ci [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED, 1135bf215546Sopenharmony_ci [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND, 1136bf215546Sopenharmony_ci [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET, 1137bf215546Sopenharmony_ci}; 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_cistatic void 1140bf215546Sopenharmony_cipipeline_populate_v3d_fs_key(struct v3d_fs_key *key, 1141bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 1142bf215546Sopenharmony_ci const struct v3dv_pipeline_stage *p_stage, 1143bf215546Sopenharmony_ci bool has_geometry_shader, 1144bf215546Sopenharmony_ci uint32_t ucp_enables) 1145bf215546Sopenharmony_ci{ 1146bf215546Sopenharmony_ci assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT); 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_ci const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 1151bf215546Sopenharmony_ci pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba); 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci const VkPipelineInputAssemblyStateCreateInfo *ia_info = 1154bf215546Sopenharmony_ci pCreateInfo->pInputAssemblyState; 1155bf215546Sopenharmony_ci uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci key->is_points = (topology == PIPE_PRIM_POINTS); 1158bf215546Sopenharmony_ci key->is_lines = (topology >= PIPE_PRIM_LINES && 1159bf215546Sopenharmony_ci topology <= PIPE_PRIM_LINE_STRIP); 1160bf215546Sopenharmony_ci key->has_gs = has_geometry_shader; 1161bf215546Sopenharmony_ci 1162bf215546Sopenharmony_ci const VkPipelineColorBlendStateCreateInfo *cb_info = 1163bf215546Sopenharmony_ci !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? 1164bf215546Sopenharmony_ci pCreateInfo->pColorBlendState : NULL; 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ci key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? 1167bf215546Sopenharmony_ci vk_to_pipe_logicop[cb_info->logicOp] : 1168bf215546Sopenharmony_ci PIPE_LOGICOP_COPY; 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_ci const bool raster_enabled = 1171bf215546Sopenharmony_ci !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci /* Multisample rasterization state must be ignored if rasterization 1174bf215546Sopenharmony_ci * is disabled. 1175bf215546Sopenharmony_ci */ 1176bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *ms_info = 1177bf215546Sopenharmony_ci raster_enabled ? pCreateInfo->pMultisampleState : NULL; 1178bf215546Sopenharmony_ci if (ms_info) { 1179bf215546Sopenharmony_ci assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || 1180bf215546Sopenharmony_ci ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); 1181bf215546Sopenharmony_ci key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_ci if (key->msaa) { 1184bf215546Sopenharmony_ci key->sample_coverage = 1185bf215546Sopenharmony_ci p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1; 1186bf215546Sopenharmony_ci key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; 1187bf215546Sopenharmony_ci key->sample_alpha_to_one = ms_info->alphaToOneEnable; 1188bf215546Sopenharmony_ci } 1189bf215546Sopenharmony_ci } 1190bf215546Sopenharmony_ci 1191bf215546Sopenharmony_ci /* This is intended for V3D versions before 4.1, otherwise we just use the 1192bf215546Sopenharmony_ci * tile buffer load/store swap R/B bit. 1193bf215546Sopenharmony_ci */ 1194bf215546Sopenharmony_ci key->swap_color_rb = 0; 1195bf215546Sopenharmony_ci 1196bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = 1197bf215546Sopenharmony_ci v3dv_render_pass_from_handle(pCreateInfo->renderPass); 1198bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = p_stage->pipeline->subpass; 1199bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 1200bf215546Sopenharmony_ci const uint32_t att_idx = subpass->color_attachments[i].attachment; 1201bf215546Sopenharmony_ci if (att_idx == VK_ATTACHMENT_UNUSED) 1202bf215546Sopenharmony_ci continue; 1203bf215546Sopenharmony_ci 1204bf215546Sopenharmony_ci key->cbufs |= 1 << i; 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci VkFormat fb_format = pass->attachments[att_idx].desc.format; 1207bf215546Sopenharmony_ci enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); 1208bf215546Sopenharmony_ci 1209bf215546Sopenharmony_ci /* If logic operations are enabled then we might emit color reads and we 1210bf215546Sopenharmony_ci * need to know the color buffer format and swizzle for that 1211bf215546Sopenharmony_ci */ 1212bf215546Sopenharmony_ci if (key->logicop_func != PIPE_LOGICOP_COPY) { 1213bf215546Sopenharmony_ci key->color_fmt[i].format = fb_pipe_format; 1214bf215546Sopenharmony_ci memcpy(key->color_fmt[i].swizzle, 1215bf215546Sopenharmony_ci v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format), 1216bf215546Sopenharmony_ci sizeof(key->color_fmt[i].swizzle)); 1217bf215546Sopenharmony_ci } 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci const struct util_format_description *desc = 1220bf215546Sopenharmony_ci vk_format_description(fb_format); 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_ci if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 1223bf215546Sopenharmony_ci desc->channel[0].size == 32) { 1224bf215546Sopenharmony_ci key->f32_color_rb |= 1 << i; 1225bf215546Sopenharmony_ci } 1226bf215546Sopenharmony_ci 1227bf215546Sopenharmony_ci if (p_stage->nir->info.fs.untyped_color_outputs) { 1228bf215546Sopenharmony_ci if (util_format_is_pure_uint(fb_pipe_format)) 1229bf215546Sopenharmony_ci key->uint_color_rb |= 1 << i; 1230bf215546Sopenharmony_ci else if (util_format_is_pure_sint(fb_pipe_format)) 1231bf215546Sopenharmony_ci key->int_color_rb |= 1 << i; 1232bf215546Sopenharmony_ci } 1233bf215546Sopenharmony_ci 1234bf215546Sopenharmony_ci if (key->is_points) { 1235bf215546Sopenharmony_ci /* This mask represents state for GL_ARB_point_sprite which is not 1236bf215546Sopenharmony_ci * relevant to Vulkan. 1237bf215546Sopenharmony_ci */ 1238bf215546Sopenharmony_ci key->point_sprite_mask = 0; 1239bf215546Sopenharmony_ci 1240bf215546Sopenharmony_ci /* Vulkan mandates upper left. */ 1241bf215546Sopenharmony_ci key->point_coord_upper_left = true; 1242bf215546Sopenharmony_ci } 1243bf215546Sopenharmony_ci } 1244bf215546Sopenharmony_ci} 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_cistatic void 1247bf215546Sopenharmony_cisetup_stage_outputs_from_next_stage_inputs( 1248bf215546Sopenharmony_ci uint8_t next_stage_num_inputs, 1249bf215546Sopenharmony_ci struct v3d_varying_slot *next_stage_input_slots, 1250bf215546Sopenharmony_ci uint8_t *num_used_outputs, 1251bf215546Sopenharmony_ci struct v3d_varying_slot *used_output_slots, 1252bf215546Sopenharmony_ci uint32_t size_of_used_output_slots) 1253bf215546Sopenharmony_ci{ 1254bf215546Sopenharmony_ci *num_used_outputs = next_stage_num_inputs; 1255bf215546Sopenharmony_ci memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots); 1256bf215546Sopenharmony_ci} 1257bf215546Sopenharmony_ci 1258bf215546Sopenharmony_cistatic void 1259bf215546Sopenharmony_cipipeline_populate_v3d_gs_key(struct v3d_gs_key *key, 1260bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 1261bf215546Sopenharmony_ci const struct v3dv_pipeline_stage *p_stage) 1262bf215546Sopenharmony_ci{ 1263bf215546Sopenharmony_ci assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY || 1264bf215546Sopenharmony_ci p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN); 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 1267bf215546Sopenharmony_ci 1268bf215546Sopenharmony_ci const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 1269bf215546Sopenharmony_ci pipeline_populate_v3d_key(&key->base, p_stage, 0, rba); 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = p_stage->pipeline; 1272bf215546Sopenharmony_ci 1273bf215546Sopenharmony_ci key->per_vertex_point_size = 1274bf215546Sopenharmony_ci p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ); 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci assert(key->base.is_last_geometry_stage); 1279bf215546Sopenharmony_ci if (key->is_coord) { 1280bf215546Sopenharmony_ci /* Output varyings in the last binning shader are only used for transform 1281bf215546Sopenharmony_ci * feedback. Set to 0 as VK_EXT_transform_feedback is not supported. 1282bf215546Sopenharmony_ci */ 1283bf215546Sopenharmony_ci key->num_used_outputs = 0; 1284bf215546Sopenharmony_ci } else { 1285bf215546Sopenharmony_ci struct v3dv_shader_variant *fs_variant = 1286bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 1287bf215546Sopenharmony_ci 1288bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(key->used_outputs) == 1289bf215546Sopenharmony_ci sizeof(fs_variant->prog_data.fs->input_slots)); 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_ci setup_stage_outputs_from_next_stage_inputs( 1292bf215546Sopenharmony_ci fs_variant->prog_data.fs->num_inputs, 1293bf215546Sopenharmony_ci fs_variant->prog_data.fs->input_slots, 1294bf215546Sopenharmony_ci &key->num_used_outputs, 1295bf215546Sopenharmony_ci key->used_outputs, 1296bf215546Sopenharmony_ci sizeof(key->used_outputs)); 1297bf215546Sopenharmony_ci } 1298bf215546Sopenharmony_ci} 1299bf215546Sopenharmony_ci 1300bf215546Sopenharmony_cistatic void 1301bf215546Sopenharmony_cipipeline_populate_v3d_vs_key(struct v3d_vs_key *key, 1302bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 1303bf215546Sopenharmony_ci const struct v3dv_pipeline_stage *p_stage) 1304bf215546Sopenharmony_ci{ 1305bf215546Sopenharmony_ci assert(p_stage->stage == BROADCOM_SHADER_VERTEX || 1306bf215546Sopenharmony_ci p_stage->stage == BROADCOM_SHADER_VERTEX_BIN); 1307bf215546Sopenharmony_ci 1308bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 1309bf215546Sopenharmony_ci 1310bf215546Sopenharmony_ci const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 1311bf215546Sopenharmony_ci pipeline_populate_v3d_key(&key->base, p_stage, 0, rba); 1312bf215546Sopenharmony_ci 1313bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = p_stage->pipeline; 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci /* Vulkan specifies a point size per vertex, so true for if the prim are 1316bf215546Sopenharmony_ci * points, like on ES2) 1317bf215546Sopenharmony_ci */ 1318bf215546Sopenharmony_ci const VkPipelineInputAssemblyStateCreateInfo *ia_info = 1319bf215546Sopenharmony_ci pCreateInfo->pInputAssemblyState; 1320bf215546Sopenharmony_ci uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; 1321bf215546Sopenharmony_ci 1322bf215546Sopenharmony_ci /* FIXME: PRIM_POINTS is not enough, in gallium the full check is 1323bf215546Sopenharmony_ci * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */ 1324bf215546Sopenharmony_ci key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS); 1325bf215546Sopenharmony_ci 1326bf215546Sopenharmony_ci key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); 1327bf215546Sopenharmony_ci 1328bf215546Sopenharmony_ci if (key->is_coord) { /* Binning VS*/ 1329bf215546Sopenharmony_ci if (key->base.is_last_geometry_stage) { 1330bf215546Sopenharmony_ci /* Output varyings in the last binning shader are only used for 1331bf215546Sopenharmony_ci * transform feedback. Set to 0 as VK_EXT_transform_feedback is not 1332bf215546Sopenharmony_ci * supported. 1333bf215546Sopenharmony_ci */ 1334bf215546Sopenharmony_ci key->num_used_outputs = 0; 1335bf215546Sopenharmony_ci } else { 1336bf215546Sopenharmony_ci /* Linking against GS binning program */ 1337bf215546Sopenharmony_ci assert(pipeline->gs); 1338bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_bin_variant = 1339bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 1340bf215546Sopenharmony_ci 1341bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(key->used_outputs) == 1342bf215546Sopenharmony_ci sizeof(gs_bin_variant->prog_data.gs->input_slots)); 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci setup_stage_outputs_from_next_stage_inputs( 1345bf215546Sopenharmony_ci gs_bin_variant->prog_data.gs->num_inputs, 1346bf215546Sopenharmony_ci gs_bin_variant->prog_data.gs->input_slots, 1347bf215546Sopenharmony_ci &key->num_used_outputs, 1348bf215546Sopenharmony_ci key->used_outputs, 1349bf215546Sopenharmony_ci sizeof(key->used_outputs)); 1350bf215546Sopenharmony_ci } 1351bf215546Sopenharmony_ci } else { /* Render VS */ 1352bf215546Sopenharmony_ci if (pipeline->gs) { 1353bf215546Sopenharmony_ci /* Linking against GS render program */ 1354bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_variant = 1355bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 1356bf215546Sopenharmony_ci 1357bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(key->used_outputs) == 1358bf215546Sopenharmony_ci sizeof(gs_variant->prog_data.gs->input_slots)); 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_ci setup_stage_outputs_from_next_stage_inputs( 1361bf215546Sopenharmony_ci gs_variant->prog_data.gs->num_inputs, 1362bf215546Sopenharmony_ci gs_variant->prog_data.gs->input_slots, 1363bf215546Sopenharmony_ci &key->num_used_outputs, 1364bf215546Sopenharmony_ci key->used_outputs, 1365bf215546Sopenharmony_ci sizeof(key->used_outputs)); 1366bf215546Sopenharmony_ci } else { 1367bf215546Sopenharmony_ci /* Linking against FS program */ 1368bf215546Sopenharmony_ci struct v3dv_shader_variant *fs_variant = 1369bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 1370bf215546Sopenharmony_ci 1371bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(key->used_outputs) == 1372bf215546Sopenharmony_ci sizeof(fs_variant->prog_data.fs->input_slots)); 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci setup_stage_outputs_from_next_stage_inputs( 1375bf215546Sopenharmony_ci fs_variant->prog_data.fs->num_inputs, 1376bf215546Sopenharmony_ci fs_variant->prog_data.fs->input_slots, 1377bf215546Sopenharmony_ci &key->num_used_outputs, 1378bf215546Sopenharmony_ci key->used_outputs, 1379bf215546Sopenharmony_ci sizeof(key->used_outputs)); 1380bf215546Sopenharmony_ci } 1381bf215546Sopenharmony_ci } 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci const VkPipelineVertexInputStateCreateInfo *vi_info = 1384bf215546Sopenharmony_ci pCreateInfo->pVertexInputState; 1385bf215546Sopenharmony_ci for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 1386bf215546Sopenharmony_ci const VkVertexInputAttributeDescription *desc = 1387bf215546Sopenharmony_ci &vi_info->pVertexAttributeDescriptions[i]; 1388bf215546Sopenharmony_ci assert(desc->location < MAX_VERTEX_ATTRIBS); 1389bf215546Sopenharmony_ci if (desc->format == VK_FORMAT_B8G8R8A8_UNORM) 1390bf215546Sopenharmony_ci key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); 1391bf215546Sopenharmony_ci } 1392bf215546Sopenharmony_ci} 1393bf215546Sopenharmony_ci 1394bf215546Sopenharmony_ci/** 1395bf215546Sopenharmony_ci * Creates the initial form of the pipeline stage for a binning shader by 1396bf215546Sopenharmony_ci * cloning the render shader and flagging it as a coordinate shader. 1397bf215546Sopenharmony_ci * 1398bf215546Sopenharmony_ci * Returns NULL if it was not able to allocate the object, so it should be 1399bf215546Sopenharmony_ci * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error. 1400bf215546Sopenharmony_ci */ 1401bf215546Sopenharmony_cistatic struct v3dv_pipeline_stage * 1402bf215546Sopenharmony_cipipeline_stage_create_binning(const struct v3dv_pipeline_stage *src, 1403bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 1404bf215546Sopenharmony_ci{ 1405bf215546Sopenharmony_ci struct v3dv_device *device = src->pipeline->device; 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_ci struct v3dv_pipeline_stage *p_stage = 1408bf215546Sopenharmony_ci vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 1409bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_ci if (p_stage == NULL) 1412bf215546Sopenharmony_ci return NULL; 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_ci assert(src->stage == BROADCOM_SHADER_VERTEX || 1415bf215546Sopenharmony_ci src->stage == BROADCOM_SHADER_GEOMETRY); 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_ci enum broadcom_shader_stage bin_stage = 1418bf215546Sopenharmony_ci src->stage == BROADCOM_SHADER_VERTEX ? 1419bf215546Sopenharmony_ci BROADCOM_SHADER_VERTEX_BIN : 1420bf215546Sopenharmony_ci BROADCOM_SHADER_GEOMETRY_BIN; 1421bf215546Sopenharmony_ci 1422bf215546Sopenharmony_ci p_stage->pipeline = src->pipeline; 1423bf215546Sopenharmony_ci p_stage->stage = bin_stage; 1424bf215546Sopenharmony_ci p_stage->entrypoint = src->entrypoint; 1425bf215546Sopenharmony_ci p_stage->module = src->module; 1426bf215546Sopenharmony_ci /* For binning shaders we will clone the NIR code from the corresponding 1427bf215546Sopenharmony_ci * render shader later, when we call pipeline_compile_xxx_shader. This way 1428bf215546Sopenharmony_ci * we only have to run the relevant NIR lowerings once for render shaders 1429bf215546Sopenharmony_ci */ 1430bf215546Sopenharmony_ci p_stage->nir = NULL; 1431bf215546Sopenharmony_ci p_stage->spec_info = src->spec_info; 1432bf215546Sopenharmony_ci p_stage->feedback = (VkPipelineCreationFeedback) { 0 }; 1433bf215546Sopenharmony_ci memcpy(p_stage->shader_sha1, src->shader_sha1, 20); 1434bf215546Sopenharmony_ci 1435bf215546Sopenharmony_ci return p_stage; 1436bf215546Sopenharmony_ci} 1437bf215546Sopenharmony_ci 1438bf215546Sopenharmony_ci/** 1439bf215546Sopenharmony_ci * Returns false if it was not able to allocate or map the assembly bo memory. 1440bf215546Sopenharmony_ci */ 1441bf215546Sopenharmony_cistatic bool 1442bf215546Sopenharmony_ciupload_assembly(struct v3dv_pipeline *pipeline) 1443bf215546Sopenharmony_ci{ 1444bf215546Sopenharmony_ci uint32_t total_size = 0; 1445bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 1446bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = 1447bf215546Sopenharmony_ci pipeline->shared_data->variants[stage]; 1448bf215546Sopenharmony_ci 1449bf215546Sopenharmony_ci if (variant != NULL) 1450bf215546Sopenharmony_ci total_size += variant->qpu_insts_size; 1451bf215546Sopenharmony_ci } 1452bf215546Sopenharmony_ci 1453bf215546Sopenharmony_ci struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size, 1454bf215546Sopenharmony_ci "pipeline shader assembly", true); 1455bf215546Sopenharmony_ci if (!bo) { 1456bf215546Sopenharmony_ci fprintf(stderr, "failed to allocate memory for shader\n"); 1457bf215546Sopenharmony_ci return false; 1458bf215546Sopenharmony_ci } 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci bool ok = v3dv_bo_map(pipeline->device, bo, total_size); 1461bf215546Sopenharmony_ci if (!ok) { 1462bf215546Sopenharmony_ci fprintf(stderr, "failed to map source shader buffer\n"); 1463bf215546Sopenharmony_ci return false; 1464bf215546Sopenharmony_ci } 1465bf215546Sopenharmony_ci 1466bf215546Sopenharmony_ci uint32_t offset = 0; 1467bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 1468bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = 1469bf215546Sopenharmony_ci pipeline->shared_data->variants[stage]; 1470bf215546Sopenharmony_ci 1471bf215546Sopenharmony_ci if (variant != NULL) { 1472bf215546Sopenharmony_ci variant->assembly_offset = offset; 1473bf215546Sopenharmony_ci 1474bf215546Sopenharmony_ci memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size); 1475bf215546Sopenharmony_ci offset += variant->qpu_insts_size; 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci /* We dont need qpu_insts anymore. */ 1478bf215546Sopenharmony_ci free(variant->qpu_insts); 1479bf215546Sopenharmony_ci variant->qpu_insts = NULL; 1480bf215546Sopenharmony_ci } 1481bf215546Sopenharmony_ci } 1482bf215546Sopenharmony_ci assert(total_size == offset); 1483bf215546Sopenharmony_ci 1484bf215546Sopenharmony_ci pipeline->shared_data->assembly_bo = bo; 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci return true; 1487bf215546Sopenharmony_ci} 1488bf215546Sopenharmony_ci 1489bf215546Sopenharmony_cistatic void 1490bf215546Sopenharmony_cipipeline_hash_graphics(const struct v3dv_pipeline *pipeline, 1491bf215546Sopenharmony_ci struct v3dv_pipeline_key *key, 1492bf215546Sopenharmony_ci unsigned char *sha1_out) 1493bf215546Sopenharmony_ci{ 1494bf215546Sopenharmony_ci struct mesa_sha1 ctx; 1495bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 1496bf215546Sopenharmony_ci 1497bf215546Sopenharmony_ci if (pipeline->layout) { 1498bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &pipeline->layout->sha1, 1499bf215546Sopenharmony_ci sizeof(pipeline->layout->sha1)); 1500bf215546Sopenharmony_ci } 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci /* We need to include all shader stages in the sha1 key as linking may modify 1503bf215546Sopenharmony_ci * the shader code in any stage. An alternative would be to use the 1504bf215546Sopenharmony_ci * serialized NIR, but that seems like an overkill. 1505bf215546Sopenharmony_ci */ 1506bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1, 1507bf215546Sopenharmony_ci sizeof(pipeline->vs->shader_sha1)); 1508bf215546Sopenharmony_ci 1509bf215546Sopenharmony_ci if (pipeline->gs) { 1510bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1, 1511bf215546Sopenharmony_ci sizeof(pipeline->gs->shader_sha1)); 1512bf215546Sopenharmony_ci } 1513bf215546Sopenharmony_ci 1514bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1, 1515bf215546Sopenharmony_ci sizeof(pipeline->fs->shader_sha1)); 1516bf215546Sopenharmony_ci 1517bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); 1518bf215546Sopenharmony_ci 1519bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, sha1_out); 1520bf215546Sopenharmony_ci} 1521bf215546Sopenharmony_ci 1522bf215546Sopenharmony_cistatic void 1523bf215546Sopenharmony_cipipeline_hash_compute(const struct v3dv_pipeline *pipeline, 1524bf215546Sopenharmony_ci struct v3dv_pipeline_key *key, 1525bf215546Sopenharmony_ci unsigned char *sha1_out) 1526bf215546Sopenharmony_ci{ 1527bf215546Sopenharmony_ci struct mesa_sha1 ctx; 1528bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci if (pipeline->layout) { 1531bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, &pipeline->layout->sha1, 1532bf215546Sopenharmony_ci sizeof(pipeline->layout->sha1)); 1533bf215546Sopenharmony_ci } 1534bf215546Sopenharmony_ci 1535bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1, 1536bf215546Sopenharmony_ci sizeof(pipeline->cs->shader_sha1)); 1537bf215546Sopenharmony_ci 1538bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); 1539bf215546Sopenharmony_ci 1540bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, sha1_out); 1541bf215546Sopenharmony_ci} 1542bf215546Sopenharmony_ci 1543bf215546Sopenharmony_ci/* Checks that the pipeline has enough spill size to use for any of their 1544bf215546Sopenharmony_ci * variants 1545bf215546Sopenharmony_ci */ 1546bf215546Sopenharmony_cistatic void 1547bf215546Sopenharmony_cipipeline_check_spill_size(struct v3dv_pipeline *pipeline) 1548bf215546Sopenharmony_ci{ 1549bf215546Sopenharmony_ci uint32_t max_spill_size = 0; 1550bf215546Sopenharmony_ci 1551bf215546Sopenharmony_ci for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 1552bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = 1553bf215546Sopenharmony_ci pipeline->shared_data->variants[stage]; 1554bf215546Sopenharmony_ci 1555bf215546Sopenharmony_ci if (variant != NULL) { 1556bf215546Sopenharmony_ci max_spill_size = MAX2(variant->prog_data.base->spill_size, 1557bf215546Sopenharmony_ci max_spill_size); 1558bf215546Sopenharmony_ci } 1559bf215546Sopenharmony_ci } 1560bf215546Sopenharmony_ci 1561bf215546Sopenharmony_ci if (max_spill_size > 0) { 1562bf215546Sopenharmony_ci struct v3dv_device *device = pipeline->device; 1563bf215546Sopenharmony_ci 1564bf215546Sopenharmony_ci /* The TIDX register we use for choosing the area to access 1565bf215546Sopenharmony_ci * for scratch space is: (core << 6) | (qpu << 2) | thread. 1566bf215546Sopenharmony_ci * Even at minimum threadcount in a particular shader, that 1567bf215546Sopenharmony_ci * means we still multiply by qpus by 4. 1568bf215546Sopenharmony_ci */ 1569bf215546Sopenharmony_ci const uint32_t total_spill_size = 1570bf215546Sopenharmony_ci 4 * device->devinfo.qpu_count * max_spill_size; 1571bf215546Sopenharmony_ci if (pipeline->spill.bo) { 1572bf215546Sopenharmony_ci assert(pipeline->spill.size_per_thread > 0); 1573bf215546Sopenharmony_ci v3dv_bo_free(device, pipeline->spill.bo); 1574bf215546Sopenharmony_ci } 1575bf215546Sopenharmony_ci pipeline->spill.bo = 1576bf215546Sopenharmony_ci v3dv_bo_alloc(device, total_spill_size, "spill", true); 1577bf215546Sopenharmony_ci pipeline->spill.size_per_thread = max_spill_size; 1578bf215546Sopenharmony_ci } 1579bf215546Sopenharmony_ci} 1580bf215546Sopenharmony_ci 1581bf215546Sopenharmony_ci/** 1582bf215546Sopenharmony_ci * Creates a new shader_variant_create. Note that for prog_data is not const, 1583bf215546Sopenharmony_ci * so it is assumed that the caller will prove a pointer that the 1584bf215546Sopenharmony_ci * shader_variant will own. 1585bf215546Sopenharmony_ci * 1586bf215546Sopenharmony_ci * Creation doesn't include allocate a BO to store the content of qpu_insts, 1587bf215546Sopenharmony_ci * as we will try to share the same bo for several shader variants. Also note 1588bf215546Sopenharmony_ci * that qpu_ints being NULL is valid, for example if we are creating the 1589bf215546Sopenharmony_ci * shader_variants from the cache, so we can just upload the assembly of all 1590bf215546Sopenharmony_ci * the shader stages at once. 1591bf215546Sopenharmony_ci */ 1592bf215546Sopenharmony_cistruct v3dv_shader_variant * 1593bf215546Sopenharmony_civ3dv_shader_variant_create(struct v3dv_device *device, 1594bf215546Sopenharmony_ci enum broadcom_shader_stage stage, 1595bf215546Sopenharmony_ci struct v3d_prog_data *prog_data, 1596bf215546Sopenharmony_ci uint32_t prog_data_size, 1597bf215546Sopenharmony_ci uint32_t assembly_offset, 1598bf215546Sopenharmony_ci uint64_t *qpu_insts, 1599bf215546Sopenharmony_ci uint32_t qpu_insts_size, 1600bf215546Sopenharmony_ci VkResult *out_vk_result) 1601bf215546Sopenharmony_ci{ 1602bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = 1603bf215546Sopenharmony_ci vk_zalloc(&device->vk.alloc, sizeof(*variant), 8, 1604bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1605bf215546Sopenharmony_ci 1606bf215546Sopenharmony_ci if (variant == NULL) { 1607bf215546Sopenharmony_ci *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY; 1608bf215546Sopenharmony_ci return NULL; 1609bf215546Sopenharmony_ci } 1610bf215546Sopenharmony_ci 1611bf215546Sopenharmony_ci variant->stage = stage; 1612bf215546Sopenharmony_ci variant->prog_data_size = prog_data_size; 1613bf215546Sopenharmony_ci variant->prog_data.base = prog_data; 1614bf215546Sopenharmony_ci 1615bf215546Sopenharmony_ci variant->assembly_offset = assembly_offset; 1616bf215546Sopenharmony_ci variant->qpu_insts_size = qpu_insts_size; 1617bf215546Sopenharmony_ci variant->qpu_insts = qpu_insts; 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci *out_vk_result = VK_SUCCESS; 1620bf215546Sopenharmony_ci 1621bf215546Sopenharmony_ci return variant; 1622bf215546Sopenharmony_ci} 1623bf215546Sopenharmony_ci 1624bf215546Sopenharmony_ci/* For a given key, it returns the compiled version of the shader. Returns a 1625bf215546Sopenharmony_ci * new reference to the shader_variant to the caller, or NULL. 1626bf215546Sopenharmony_ci * 1627bf215546Sopenharmony_ci * If the method returns NULL it means that something wrong happened: 1628bf215546Sopenharmony_ci * * Not enough memory: this is one of the possible outcomes defined by 1629bf215546Sopenharmony_ci * vkCreateXXXPipelines. out_vk_result will return the proper oom error. 1630bf215546Sopenharmony_ci * * Compilation error: hypothetically this shouldn't happen, as the spec 1631bf215546Sopenharmony_ci * states that vkShaderModule needs to be created with a valid SPIR-V, so 1632bf215546Sopenharmony_ci * any compilation failure is a driver bug. In the practice, something as 1633bf215546Sopenharmony_ci * common as failing to register allocate can lead to a compilation 1634bf215546Sopenharmony_ci * failure. In that case the only option (for any driver) is 1635bf215546Sopenharmony_ci * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler 1636bf215546Sopenharmony_ci * error. 1637bf215546Sopenharmony_ci */ 1638bf215546Sopenharmony_cistatic struct v3dv_shader_variant * 1639bf215546Sopenharmony_cipipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage, 1640bf215546Sopenharmony_ci struct v3d_key *key, 1641bf215546Sopenharmony_ci size_t key_size, 1642bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 1643bf215546Sopenharmony_ci VkResult *out_vk_result) 1644bf215546Sopenharmony_ci{ 1645bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1646bf215546Sopenharmony_ci 1647bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = p_stage->pipeline; 1648bf215546Sopenharmony_ci struct v3dv_physical_device *physical_device = 1649bf215546Sopenharmony_ci &pipeline->device->instance->physicalDevice; 1650bf215546Sopenharmony_ci const struct v3d_compiler *compiler = physical_device->compiler; 1651bf215546Sopenharmony_ci 1652bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 1653bf215546Sopenharmony_ci v3d_debug_flag_for_shader_stage 1654bf215546Sopenharmony_ci (broadcom_shader_stage_to_gl(p_stage->stage))))) { 1655bf215546Sopenharmony_ci fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n", 1656bf215546Sopenharmony_ci broadcom_shader_stage_name(p_stage->stage), 1657bf215546Sopenharmony_ci p_stage->program_id); 1658bf215546Sopenharmony_ci nir_print_shader(p_stage->nir, stderr); 1659bf215546Sopenharmony_ci fprintf(stderr, "\n"); 1660bf215546Sopenharmony_ci } 1661bf215546Sopenharmony_ci 1662bf215546Sopenharmony_ci uint64_t *qpu_insts; 1663bf215546Sopenharmony_ci uint32_t qpu_insts_size; 1664bf215546Sopenharmony_ci struct v3d_prog_data *prog_data; 1665bf215546Sopenharmony_ci uint32_t prog_data_size = 1666bf215546Sopenharmony_ci v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage)); 1667bf215546Sopenharmony_ci 1668bf215546Sopenharmony_ci qpu_insts = v3d_compile(compiler, 1669bf215546Sopenharmony_ci key, &prog_data, 1670bf215546Sopenharmony_ci p_stage->nir, 1671bf215546Sopenharmony_ci shader_debug_output, NULL, 1672bf215546Sopenharmony_ci p_stage->program_id, 0, 1673bf215546Sopenharmony_ci &qpu_insts_size); 1674bf215546Sopenharmony_ci 1675bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = NULL; 1676bf215546Sopenharmony_ci 1677bf215546Sopenharmony_ci if (!qpu_insts) { 1678bf215546Sopenharmony_ci fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n", 1679bf215546Sopenharmony_ci gl_shader_stage_name(p_stage->stage), 1680bf215546Sopenharmony_ci p_stage->program_id); 1681bf215546Sopenharmony_ci *out_vk_result = VK_ERROR_UNKNOWN; 1682bf215546Sopenharmony_ci } else { 1683bf215546Sopenharmony_ci variant = 1684bf215546Sopenharmony_ci v3dv_shader_variant_create(pipeline->device, p_stage->stage, 1685bf215546Sopenharmony_ci prog_data, prog_data_size, 1686bf215546Sopenharmony_ci 0, /* assembly_offset, no final value yet */ 1687bf215546Sopenharmony_ci qpu_insts, qpu_insts_size, 1688bf215546Sopenharmony_ci out_vk_result); 1689bf215546Sopenharmony_ci } 1690bf215546Sopenharmony_ci /* At this point we don't need anymore the nir shader, but we are freeing 1691bf215546Sopenharmony_ci * all the temporary p_stage structs used during the pipeline creation when 1692bf215546Sopenharmony_ci * we finish it, so let's not worry about freeing the nir here. 1693bf215546Sopenharmony_ci */ 1694bf215546Sopenharmony_ci 1695bf215546Sopenharmony_ci p_stage->feedback.duration += os_time_get_nano() - stage_start; 1696bf215546Sopenharmony_ci 1697bf215546Sopenharmony_ci return variant; 1698bf215546Sopenharmony_ci} 1699bf215546Sopenharmony_ci 1700bf215546Sopenharmony_cistatic void 1701bf215546Sopenharmony_cilink_shaders(nir_shader *producer, nir_shader *consumer) 1702bf215546Sopenharmony_ci{ 1703bf215546Sopenharmony_ci assert(producer); 1704bf215546Sopenharmony_ci assert(consumer); 1705bf215546Sopenharmony_ci 1706bf215546Sopenharmony_ci if (producer->options->lower_to_scalar) { 1707bf215546Sopenharmony_ci NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out); 1708bf215546Sopenharmony_ci NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); 1709bf215546Sopenharmony_ci } 1710bf215546Sopenharmony_ci 1711bf215546Sopenharmony_ci nir_lower_io_arrays_to_elements(producer, consumer); 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ci nir_optimize(producer, false); 1714bf215546Sopenharmony_ci nir_optimize(consumer, false); 1715bf215546Sopenharmony_ci 1716bf215546Sopenharmony_ci if (nir_link_opt_varyings(producer, consumer)) 1717bf215546Sopenharmony_ci nir_optimize(consumer, false); 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_ci NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); 1720bf215546Sopenharmony_ci NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_ci if (nir_remove_unused_varyings(producer, consumer)) { 1723bf215546Sopenharmony_ci NIR_PASS(_, producer, nir_lower_global_vars_to_local); 1724bf215546Sopenharmony_ci NIR_PASS(_, consumer, nir_lower_global_vars_to_local); 1725bf215546Sopenharmony_ci 1726bf215546Sopenharmony_ci nir_optimize(producer, false); 1727bf215546Sopenharmony_ci nir_optimize(consumer, false); 1728bf215546Sopenharmony_ci 1729bf215546Sopenharmony_ci /* Optimizations can cause varyings to become unused. 1730bf215546Sopenharmony_ci * nir_compact_varyings() depends on all dead varyings being removed so 1731bf215546Sopenharmony_ci * we need to call nir_remove_dead_variables() again here. 1732bf215546Sopenharmony_ci */ 1733bf215546Sopenharmony_ci NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); 1734bf215546Sopenharmony_ci NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); 1735bf215546Sopenharmony_ci } 1736bf215546Sopenharmony_ci} 1737bf215546Sopenharmony_ci 1738bf215546Sopenharmony_cistatic void 1739bf215546Sopenharmony_cipipeline_lower_nir(struct v3dv_pipeline *pipeline, 1740bf215546Sopenharmony_ci struct v3dv_pipeline_stage *p_stage, 1741bf215546Sopenharmony_ci struct v3dv_pipeline_layout *layout) 1742bf215546Sopenharmony_ci{ 1743bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci assert(pipeline->shared_data && 1746bf215546Sopenharmony_ci pipeline->shared_data->maps[p_stage->stage]); 1747bf215546Sopenharmony_ci 1748bf215546Sopenharmony_ci nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir)); 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_ci /* We add this because we need a valid sampler for nir_lower_tex to do 1751bf215546Sopenharmony_ci * unpacking of the texture operation result, even for the case where there 1752bf215546Sopenharmony_ci * is no sampler state. 1753bf215546Sopenharmony_ci * 1754bf215546Sopenharmony_ci * We add two of those, one for the case we need a 16bit return_size, and 1755bf215546Sopenharmony_ci * another for the case we need a 32bit return size. 1756bf215546Sopenharmony_ci */ 1757bf215546Sopenharmony_ci struct v3dv_descriptor_maps *maps = 1758bf215546Sopenharmony_ci pipeline->shared_data->maps[p_stage->stage]; 1759bf215546Sopenharmony_ci 1760bf215546Sopenharmony_ci UNUSED unsigned index; 1761bf215546Sopenharmony_ci index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16); 1762bf215546Sopenharmony_ci assert(index == V3DV_NO_SAMPLER_16BIT_IDX); 1763bf215546Sopenharmony_ci 1764bf215546Sopenharmony_ci index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32); 1765bf215546Sopenharmony_ci assert(index == V3DV_NO_SAMPLER_32BIT_IDX); 1766bf215546Sopenharmony_ci 1767bf215546Sopenharmony_ci /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ 1768bf215546Sopenharmony_ci bool needs_default_sampler_state = false; 1769bf215546Sopenharmony_ci NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout, 1770bf215546Sopenharmony_ci &needs_default_sampler_state); 1771bf215546Sopenharmony_ci 1772bf215546Sopenharmony_ci /* If in the end we didn't need to use the default sampler states and the 1773bf215546Sopenharmony_ci * shader doesn't need any other samplers, get rid of them so we can 1774bf215546Sopenharmony_ci * recognize that this program doesn't use any samplers at all. 1775bf215546Sopenharmony_ci */ 1776bf215546Sopenharmony_ci if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2) 1777bf215546Sopenharmony_ci maps->sampler_map.num_desc = 0; 1778bf215546Sopenharmony_ci 1779bf215546Sopenharmony_ci p_stage->feedback.duration += os_time_get_nano() - stage_start; 1780bf215546Sopenharmony_ci} 1781bf215546Sopenharmony_ci 1782bf215546Sopenharmony_ci/** 1783bf215546Sopenharmony_ci * The SPIR-V compiler will insert a sized compact array for 1784bf215546Sopenharmony_ci * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[], 1785bf215546Sopenharmony_ci * where the size of the array determines the number of active clip planes. 1786bf215546Sopenharmony_ci */ 1787bf215546Sopenharmony_cistatic uint32_t 1788bf215546Sopenharmony_ciget_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage) 1789bf215546Sopenharmony_ci{ 1790bf215546Sopenharmony_ci assert(p_stage->stage == BROADCOM_SHADER_VERTEX); 1791bf215546Sopenharmony_ci const nir_shader *shader = p_stage->nir; 1792bf215546Sopenharmony_ci assert(shader); 1793bf215546Sopenharmony_ci 1794bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { 1795bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_CLIP_DIST0) { 1796bf215546Sopenharmony_ci assert(var->data.compact); 1797bf215546Sopenharmony_ci return (1 << glsl_get_length(var->type)) - 1; 1798bf215546Sopenharmony_ci } 1799bf215546Sopenharmony_ci } 1800bf215546Sopenharmony_ci return 0; 1801bf215546Sopenharmony_ci} 1802bf215546Sopenharmony_ci 1803bf215546Sopenharmony_cistatic nir_shader * 1804bf215546Sopenharmony_cipipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage, 1805bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline, 1806bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache) 1807bf215546Sopenharmony_ci{ 1808bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 1809bf215546Sopenharmony_ci 1810bf215546Sopenharmony_ci nir_shader *nir = NULL; 1811bf215546Sopenharmony_ci 1812bf215546Sopenharmony_ci nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache, 1813bf215546Sopenharmony_ci &v3dv_nir_options, 1814bf215546Sopenharmony_ci p_stage->shader_sha1); 1815bf215546Sopenharmony_ci 1816bf215546Sopenharmony_ci if (nir) { 1817bf215546Sopenharmony_ci assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage)); 1818bf215546Sopenharmony_ci 1819bf215546Sopenharmony_ci /* A NIR cach hit doesn't avoid the large majority of pipeline stage 1820bf215546Sopenharmony_ci * creation so the cache hit is not recorded in the pipeline feedback 1821bf215546Sopenharmony_ci * flags 1822bf215546Sopenharmony_ci */ 1823bf215546Sopenharmony_ci 1824bf215546Sopenharmony_ci p_stage->feedback.duration += os_time_get_nano() - stage_start; 1825bf215546Sopenharmony_ci 1826bf215546Sopenharmony_ci return nir; 1827bf215546Sopenharmony_ci } 1828bf215546Sopenharmony_ci 1829bf215546Sopenharmony_ci nir = shader_module_compile_to_nir(pipeline->device, p_stage); 1830bf215546Sopenharmony_ci 1831bf215546Sopenharmony_ci if (nir) { 1832bf215546Sopenharmony_ci struct v3dv_pipeline_cache *default_cache = 1833bf215546Sopenharmony_ci &pipeline->device->default_pipeline_cache; 1834bf215546Sopenharmony_ci 1835bf215546Sopenharmony_ci v3dv_pipeline_cache_upload_nir(pipeline, cache, nir, 1836bf215546Sopenharmony_ci p_stage->shader_sha1); 1837bf215546Sopenharmony_ci 1838bf215546Sopenharmony_ci /* Ensure that the variant is on the default cache, as cmd_buffer could 1839bf215546Sopenharmony_ci * need to change the current variant 1840bf215546Sopenharmony_ci */ 1841bf215546Sopenharmony_ci if (default_cache != cache) { 1842bf215546Sopenharmony_ci v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir, 1843bf215546Sopenharmony_ci p_stage->shader_sha1); 1844bf215546Sopenharmony_ci } 1845bf215546Sopenharmony_ci 1846bf215546Sopenharmony_ci p_stage->feedback.duration += os_time_get_nano() - stage_start; 1847bf215546Sopenharmony_ci 1848bf215546Sopenharmony_ci return nir; 1849bf215546Sopenharmony_ci } 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci /* FIXME: this shouldn't happen, raise error? */ 1852bf215546Sopenharmony_ci return NULL; 1853bf215546Sopenharmony_ci} 1854bf215546Sopenharmony_ci 1855bf215546Sopenharmony_cistatic VkResult 1856bf215546Sopenharmony_cipipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline, 1857bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 1858bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1859bf215546Sopenharmony_ci{ 1860bf215546Sopenharmony_ci assert(pipeline->vs_bin != NULL); 1861bf215546Sopenharmony_ci if (pipeline->vs_bin->nir == NULL) { 1862bf215546Sopenharmony_ci assert(pipeline->vs->nir); 1863bf215546Sopenharmony_ci pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir); 1864bf215546Sopenharmony_ci } 1865bf215546Sopenharmony_ci 1866bf215546Sopenharmony_ci VkResult vk_result; 1867bf215546Sopenharmony_ci struct v3d_vs_key key; 1868bf215546Sopenharmony_ci pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs); 1869bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] = 1870bf215546Sopenharmony_ci pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key), 1871bf215546Sopenharmony_ci pAllocator, &vk_result); 1872bf215546Sopenharmony_ci if (vk_result != VK_SUCCESS) 1873bf215546Sopenharmony_ci return vk_result; 1874bf215546Sopenharmony_ci 1875bf215546Sopenharmony_ci pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin); 1876bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] = 1877bf215546Sopenharmony_ci pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key), 1878bf215546Sopenharmony_ci pAllocator, &vk_result); 1879bf215546Sopenharmony_ci 1880bf215546Sopenharmony_ci return vk_result; 1881bf215546Sopenharmony_ci} 1882bf215546Sopenharmony_ci 1883bf215546Sopenharmony_cistatic VkResult 1884bf215546Sopenharmony_cipipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline, 1885bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 1886bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1887bf215546Sopenharmony_ci{ 1888bf215546Sopenharmony_ci assert(pipeline->gs); 1889bf215546Sopenharmony_ci 1890bf215546Sopenharmony_ci assert(pipeline->gs_bin != NULL); 1891bf215546Sopenharmony_ci if (pipeline->gs_bin->nir == NULL) { 1892bf215546Sopenharmony_ci assert(pipeline->gs->nir); 1893bf215546Sopenharmony_ci pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir); 1894bf215546Sopenharmony_ci } 1895bf215546Sopenharmony_ci 1896bf215546Sopenharmony_ci VkResult vk_result; 1897bf215546Sopenharmony_ci struct v3d_gs_key key; 1898bf215546Sopenharmony_ci pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs); 1899bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] = 1900bf215546Sopenharmony_ci pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key), 1901bf215546Sopenharmony_ci pAllocator, &vk_result); 1902bf215546Sopenharmony_ci if (vk_result != VK_SUCCESS) 1903bf215546Sopenharmony_ci return vk_result; 1904bf215546Sopenharmony_ci 1905bf215546Sopenharmony_ci pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin); 1906bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] = 1907bf215546Sopenharmony_ci pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key), 1908bf215546Sopenharmony_ci pAllocator, &vk_result); 1909bf215546Sopenharmony_ci 1910bf215546Sopenharmony_ci return vk_result; 1911bf215546Sopenharmony_ci} 1912bf215546Sopenharmony_ci 1913bf215546Sopenharmony_cistatic VkResult 1914bf215546Sopenharmony_cipipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline, 1915bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 1916bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1917bf215546Sopenharmony_ci{ 1918bf215546Sopenharmony_ci struct v3dv_pipeline_stage *p_stage = pipeline->vs; 1919bf215546Sopenharmony_ci 1920bf215546Sopenharmony_ci p_stage = pipeline->fs; 1921bf215546Sopenharmony_ci 1922bf215546Sopenharmony_ci struct v3d_fs_key key; 1923bf215546Sopenharmony_ci 1924bf215546Sopenharmony_ci pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage, 1925bf215546Sopenharmony_ci pipeline->gs != NULL, 1926bf215546Sopenharmony_ci get_ucp_enable_mask(pipeline->vs)); 1927bf215546Sopenharmony_ci 1928bf215546Sopenharmony_ci VkResult vk_result; 1929bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] = 1930bf215546Sopenharmony_ci pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key), 1931bf215546Sopenharmony_ci pAllocator, &vk_result); 1932bf215546Sopenharmony_ci 1933bf215546Sopenharmony_ci return vk_result; 1934bf215546Sopenharmony_ci} 1935bf215546Sopenharmony_ci 1936bf215546Sopenharmony_cistatic void 1937bf215546Sopenharmony_cipipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, 1938bf215546Sopenharmony_ci struct v3dv_pipeline_key *key, 1939bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1940bf215546Sopenharmony_ci{ 1941bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 1942bf215546Sopenharmony_ci key->robust_buffer_access = 1943bf215546Sopenharmony_ci pipeline->device->features.robustBufferAccess; 1944bf215546Sopenharmony_ci 1945bf215546Sopenharmony_ci const bool raster_enabled = 1946bf215546Sopenharmony_ci !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 1947bf215546Sopenharmony_ci 1948bf215546Sopenharmony_ci const VkPipelineInputAssemblyStateCreateInfo *ia_info = 1949bf215546Sopenharmony_ci pCreateInfo->pInputAssemblyState; 1950bf215546Sopenharmony_ci key->topology = vk_to_pipe_prim_type[ia_info->topology]; 1951bf215546Sopenharmony_ci 1952bf215546Sopenharmony_ci const VkPipelineColorBlendStateCreateInfo *cb_info = 1953bf215546Sopenharmony_ci raster_enabled ? pCreateInfo->pColorBlendState : NULL; 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_ci key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? 1956bf215546Sopenharmony_ci vk_to_pipe_logicop[cb_info->logicOp] : 1957bf215546Sopenharmony_ci PIPE_LOGICOP_COPY; 1958bf215546Sopenharmony_ci 1959bf215546Sopenharmony_ci /* Multisample rasterization state must be ignored if rasterization 1960bf215546Sopenharmony_ci * is disabled. 1961bf215546Sopenharmony_ci */ 1962bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *ms_info = 1963bf215546Sopenharmony_ci raster_enabled ? pCreateInfo->pMultisampleState : NULL; 1964bf215546Sopenharmony_ci if (ms_info) { 1965bf215546Sopenharmony_ci assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || 1966bf215546Sopenharmony_ci ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); 1967bf215546Sopenharmony_ci key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; 1968bf215546Sopenharmony_ci 1969bf215546Sopenharmony_ci if (key->msaa) { 1970bf215546Sopenharmony_ci key->sample_coverage = 1971bf215546Sopenharmony_ci pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1; 1972bf215546Sopenharmony_ci key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; 1973bf215546Sopenharmony_ci key->sample_alpha_to_one = ms_info->alphaToOneEnable; 1974bf215546Sopenharmony_ci } 1975bf215546Sopenharmony_ci } 1976bf215546Sopenharmony_ci 1977bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = 1978bf215546Sopenharmony_ci v3dv_render_pass_from_handle(pCreateInfo->renderPass); 1979bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = pipeline->subpass; 1980bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 1981bf215546Sopenharmony_ci const uint32_t att_idx = subpass->color_attachments[i].attachment; 1982bf215546Sopenharmony_ci if (att_idx == VK_ATTACHMENT_UNUSED) 1983bf215546Sopenharmony_ci continue; 1984bf215546Sopenharmony_ci 1985bf215546Sopenharmony_ci key->cbufs |= 1 << i; 1986bf215546Sopenharmony_ci 1987bf215546Sopenharmony_ci VkFormat fb_format = pass->attachments[att_idx].desc.format; 1988bf215546Sopenharmony_ci enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); 1989bf215546Sopenharmony_ci 1990bf215546Sopenharmony_ci /* If logic operations are enabled then we might emit color reads and we 1991bf215546Sopenharmony_ci * need to know the color buffer format and swizzle for that 1992bf215546Sopenharmony_ci */ 1993bf215546Sopenharmony_ci if (key->logicop_func != PIPE_LOGICOP_COPY) { 1994bf215546Sopenharmony_ci key->color_fmt[i].format = fb_pipe_format; 1995bf215546Sopenharmony_ci memcpy(key->color_fmt[i].swizzle, 1996bf215546Sopenharmony_ci v3dv_get_format_swizzle(pipeline->device, fb_format), 1997bf215546Sopenharmony_ci sizeof(key->color_fmt[i].swizzle)); 1998bf215546Sopenharmony_ci } 1999bf215546Sopenharmony_ci 2000bf215546Sopenharmony_ci const struct util_format_description *desc = 2001bf215546Sopenharmony_ci vk_format_description(fb_format); 2002bf215546Sopenharmony_ci 2003bf215546Sopenharmony_ci if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 2004bf215546Sopenharmony_ci desc->channel[0].size == 32) { 2005bf215546Sopenharmony_ci key->f32_color_rb |= 1 << i; 2006bf215546Sopenharmony_ci } 2007bf215546Sopenharmony_ci } 2008bf215546Sopenharmony_ci 2009bf215546Sopenharmony_ci const VkPipelineVertexInputStateCreateInfo *vi_info = 2010bf215546Sopenharmony_ci pCreateInfo->pVertexInputState; 2011bf215546Sopenharmony_ci for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 2012bf215546Sopenharmony_ci const VkVertexInputAttributeDescription *desc = 2013bf215546Sopenharmony_ci &vi_info->pVertexAttributeDescriptions[i]; 2014bf215546Sopenharmony_ci assert(desc->location < MAX_VERTEX_ATTRIBS); 2015bf215546Sopenharmony_ci if (desc->format == VK_FORMAT_B8G8R8A8_UNORM) 2016bf215546Sopenharmony_ci key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); 2017bf215546Sopenharmony_ci } 2018bf215546Sopenharmony_ci 2019bf215546Sopenharmony_ci assert(pipeline->subpass); 2020bf215546Sopenharmony_ci key->has_multiview = pipeline->subpass->view_mask != 0; 2021bf215546Sopenharmony_ci} 2022bf215546Sopenharmony_ci 2023bf215546Sopenharmony_cistatic void 2024bf215546Sopenharmony_cipipeline_populate_compute_key(struct v3dv_pipeline *pipeline, 2025bf215546Sopenharmony_ci struct v3dv_pipeline_key *key, 2026bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfo) 2027bf215546Sopenharmony_ci{ 2028bf215546Sopenharmony_ci /* We use the same pipeline key for graphics and compute, but we don't need 2029bf215546Sopenharmony_ci * to add a field to flag compute keys because this key is not used alone 2030bf215546Sopenharmony_ci * to search in the cache, we also use the SPIR-V or the serialized NIR for 2031bf215546Sopenharmony_ci * example, which already flags compute shaders. 2032bf215546Sopenharmony_ci */ 2033bf215546Sopenharmony_ci memset(key, 0, sizeof(*key)); 2034bf215546Sopenharmony_ci key->robust_buffer_access = 2035bf215546Sopenharmony_ci pipeline->device->features.robustBufferAccess; 2036bf215546Sopenharmony_ci} 2037bf215546Sopenharmony_ci 2038bf215546Sopenharmony_cistatic struct v3dv_pipeline_shared_data * 2039bf215546Sopenharmony_civ3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20], 2040bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline, 2041bf215546Sopenharmony_ci bool is_graphics_pipeline) 2042bf215546Sopenharmony_ci{ 2043bf215546Sopenharmony_ci /* We create new_entry using the device alloc. Right now shared_data is ref 2044bf215546Sopenharmony_ci * and unref by both the pipeline and the pipeline cache, so we can't 2045bf215546Sopenharmony_ci * ensure that the cache or pipeline alloc will be available on the last 2046bf215546Sopenharmony_ci * unref. 2047bf215546Sopenharmony_ci */ 2048bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *new_entry = 2049bf215546Sopenharmony_ci vk_zalloc2(&pipeline->device->vk.alloc, NULL, 2050bf215546Sopenharmony_ci sizeof(struct v3dv_pipeline_shared_data), 8, 2051bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2052bf215546Sopenharmony_ci 2053bf215546Sopenharmony_ci if (new_entry == NULL) 2054bf215546Sopenharmony_ci return NULL; 2055bf215546Sopenharmony_ci 2056bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 2057bf215546Sopenharmony_ci /* We don't need specific descriptor maps for binning stages we use the 2058bf215546Sopenharmony_ci * map for the render stage. 2059bf215546Sopenharmony_ci */ 2060bf215546Sopenharmony_ci if (broadcom_shader_stage_is_binning(stage)) 2061bf215546Sopenharmony_ci continue; 2062bf215546Sopenharmony_ci 2063bf215546Sopenharmony_ci if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) || 2064bf215546Sopenharmony_ci (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) { 2065bf215546Sopenharmony_ci continue; 2066bf215546Sopenharmony_ci } 2067bf215546Sopenharmony_ci 2068bf215546Sopenharmony_ci if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) { 2069bf215546Sopenharmony_ci /* We always inject a custom GS if we have multiview */ 2070bf215546Sopenharmony_ci if (!pipeline->subpass->view_mask) 2071bf215546Sopenharmony_ci continue; 2072bf215546Sopenharmony_ci } 2073bf215546Sopenharmony_ci 2074bf215546Sopenharmony_ci struct v3dv_descriptor_maps *new_maps = 2075bf215546Sopenharmony_ci vk_zalloc2(&pipeline->device->vk.alloc, NULL, 2076bf215546Sopenharmony_ci sizeof(struct v3dv_descriptor_maps), 8, 2077bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2078bf215546Sopenharmony_ci 2079bf215546Sopenharmony_ci if (new_maps == NULL) 2080bf215546Sopenharmony_ci goto fail; 2081bf215546Sopenharmony_ci 2082bf215546Sopenharmony_ci new_entry->maps[stage] = new_maps; 2083bf215546Sopenharmony_ci } 2084bf215546Sopenharmony_ci 2085bf215546Sopenharmony_ci new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] = 2086bf215546Sopenharmony_ci new_entry->maps[BROADCOM_SHADER_VERTEX]; 2087bf215546Sopenharmony_ci 2088bf215546Sopenharmony_ci new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] = 2089bf215546Sopenharmony_ci new_entry->maps[BROADCOM_SHADER_GEOMETRY]; 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_ci new_entry->ref_cnt = 1; 2092bf215546Sopenharmony_ci memcpy(new_entry->sha1_key, sha1_key, 20); 2093bf215546Sopenharmony_ci 2094bf215546Sopenharmony_ci return new_entry; 2095bf215546Sopenharmony_ci 2096bf215546Sopenharmony_cifail: 2097bf215546Sopenharmony_ci if (new_entry != NULL) { 2098bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 2099bf215546Sopenharmony_ci if (new_entry->maps[stage] != NULL) 2100bf215546Sopenharmony_ci vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]); 2101bf215546Sopenharmony_ci } 2102bf215546Sopenharmony_ci } 2103bf215546Sopenharmony_ci 2104bf215546Sopenharmony_ci vk_free(&pipeline->device->vk.alloc, new_entry); 2105bf215546Sopenharmony_ci 2106bf215546Sopenharmony_ci return NULL; 2107bf215546Sopenharmony_ci} 2108bf215546Sopenharmony_ci 2109bf215546Sopenharmony_cistatic void 2110bf215546Sopenharmony_ciwrite_creation_feedback(struct v3dv_pipeline *pipeline, 2111bf215546Sopenharmony_ci const void *next, 2112bf215546Sopenharmony_ci const VkPipelineCreationFeedback *pipeline_feedback, 2113bf215546Sopenharmony_ci uint32_t stage_count, 2114bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stages) 2115bf215546Sopenharmony_ci{ 2116bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *create_feedback = 2117bf215546Sopenharmony_ci vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 2118bf215546Sopenharmony_ci 2119bf215546Sopenharmony_ci if (create_feedback) { 2120bf215546Sopenharmony_ci typed_memcpy(create_feedback->pPipelineCreationFeedback, 2121bf215546Sopenharmony_ci pipeline_feedback, 2122bf215546Sopenharmony_ci 1); 2123bf215546Sopenharmony_ci 2124bf215546Sopenharmony_ci assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount); 2125bf215546Sopenharmony_ci 2126bf215546Sopenharmony_ci for (uint32_t i = 0; i < stage_count; i++) { 2127bf215546Sopenharmony_ci gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage); 2128bf215546Sopenharmony_ci switch (s) { 2129bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 2130bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[i] = 2131bf215546Sopenharmony_ci pipeline->vs->feedback; 2132bf215546Sopenharmony_ci 2133bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[i].duration += 2134bf215546Sopenharmony_ci pipeline->vs_bin->feedback.duration; 2135bf215546Sopenharmony_ci break; 2136bf215546Sopenharmony_ci 2137bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 2138bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[i] = 2139bf215546Sopenharmony_ci pipeline->gs->feedback; 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[i].duration += 2142bf215546Sopenharmony_ci pipeline->gs_bin->feedback.duration; 2143bf215546Sopenharmony_ci break; 2144bf215546Sopenharmony_ci 2145bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 2146bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[i] = 2147bf215546Sopenharmony_ci pipeline->fs->feedback; 2148bf215546Sopenharmony_ci break; 2149bf215546Sopenharmony_ci 2150bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 2151bf215546Sopenharmony_ci create_feedback->pPipelineStageCreationFeedbacks[i] = 2152bf215546Sopenharmony_ci pipeline->cs->feedback; 2153bf215546Sopenharmony_ci break; 2154bf215546Sopenharmony_ci 2155bf215546Sopenharmony_ci default: 2156bf215546Sopenharmony_ci unreachable("not supported shader stage"); 2157bf215546Sopenharmony_ci } 2158bf215546Sopenharmony_ci } 2159bf215546Sopenharmony_ci } 2160bf215546Sopenharmony_ci} 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_cistatic enum shader_prim 2163bf215546Sopenharmony_cimultiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline) 2164bf215546Sopenharmony_ci{ 2165bf215546Sopenharmony_ci switch (pipeline->topology) { 2166bf215546Sopenharmony_ci case PIPE_PRIM_POINTS: 2167bf215546Sopenharmony_ci return SHADER_PRIM_POINTS; 2168bf215546Sopenharmony_ci case PIPE_PRIM_LINES: 2169bf215546Sopenharmony_ci case PIPE_PRIM_LINE_STRIP: 2170bf215546Sopenharmony_ci return SHADER_PRIM_LINES; 2171bf215546Sopenharmony_ci case PIPE_PRIM_TRIANGLES: 2172bf215546Sopenharmony_ci case PIPE_PRIM_TRIANGLE_STRIP: 2173bf215546Sopenharmony_ci case PIPE_PRIM_TRIANGLE_FAN: 2174bf215546Sopenharmony_ci return SHADER_PRIM_TRIANGLES; 2175bf215546Sopenharmony_ci default: 2176bf215546Sopenharmony_ci /* Since we don't allow GS with multiview, we can only see non-adjacency 2177bf215546Sopenharmony_ci * primitives. 2178bf215546Sopenharmony_ci */ 2179bf215546Sopenharmony_ci unreachable("Unexpected pipeline primitive type"); 2180bf215546Sopenharmony_ci } 2181bf215546Sopenharmony_ci} 2182bf215546Sopenharmony_ci 2183bf215546Sopenharmony_cistatic enum shader_prim 2184bf215546Sopenharmony_cimultiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline) 2185bf215546Sopenharmony_ci{ 2186bf215546Sopenharmony_ci switch (pipeline->topology) { 2187bf215546Sopenharmony_ci case PIPE_PRIM_POINTS: 2188bf215546Sopenharmony_ci return SHADER_PRIM_POINTS; 2189bf215546Sopenharmony_ci case PIPE_PRIM_LINES: 2190bf215546Sopenharmony_ci case PIPE_PRIM_LINE_STRIP: 2191bf215546Sopenharmony_ci return SHADER_PRIM_LINE_STRIP; 2192bf215546Sopenharmony_ci case PIPE_PRIM_TRIANGLES: 2193bf215546Sopenharmony_ci case PIPE_PRIM_TRIANGLE_STRIP: 2194bf215546Sopenharmony_ci case PIPE_PRIM_TRIANGLE_FAN: 2195bf215546Sopenharmony_ci return SHADER_PRIM_TRIANGLE_STRIP; 2196bf215546Sopenharmony_ci default: 2197bf215546Sopenharmony_ci /* Since we don't allow GS with multiview, we can only see non-adjacency 2198bf215546Sopenharmony_ci * primitives. 2199bf215546Sopenharmony_ci */ 2200bf215546Sopenharmony_ci unreachable("Unexpected pipeline primitive type"); 2201bf215546Sopenharmony_ci } 2202bf215546Sopenharmony_ci} 2203bf215546Sopenharmony_ci 2204bf215546Sopenharmony_cistatic bool 2205bf215546Sopenharmony_cipipeline_add_multiview_gs(struct v3dv_pipeline *pipeline, 2206bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache, 2207bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 2208bf215546Sopenharmony_ci{ 2209bf215546Sopenharmony_ci /* Create the passthrough GS from the VS output interface */ 2210bf215546Sopenharmony_ci pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); 2211bf215546Sopenharmony_ci nir_shader *vs_nir = pipeline->vs->nir; 2212bf215546Sopenharmony_ci 2213bf215546Sopenharmony_ci const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); 2214bf215546Sopenharmony_ci nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, 2215bf215546Sopenharmony_ci "multiview broadcast gs"); 2216bf215546Sopenharmony_ci nir_shader *nir = b.shader; 2217bf215546Sopenharmony_ci nir->info.inputs_read = vs_nir->info.outputs_written; 2218bf215546Sopenharmony_ci nir->info.outputs_written = vs_nir->info.outputs_written | 2219bf215546Sopenharmony_ci (1ull << VARYING_SLOT_LAYER); 2220bf215546Sopenharmony_ci 2221bf215546Sopenharmony_ci uint32_t vertex_count = u_vertices_per_prim(pipeline->topology); 2222bf215546Sopenharmony_ci nir->info.gs.input_primitive = 2223bf215546Sopenharmony_ci multiview_gs_input_primitive_from_pipeline(pipeline); 2224bf215546Sopenharmony_ci nir->info.gs.output_primitive = 2225bf215546Sopenharmony_ci multiview_gs_output_primitive_from_pipeline(pipeline); 2226bf215546Sopenharmony_ci nir->info.gs.vertices_in = vertex_count; 2227bf215546Sopenharmony_ci nir->info.gs.vertices_out = nir->info.gs.vertices_in; 2228bf215546Sopenharmony_ci nir->info.gs.invocations = 1; 2229bf215546Sopenharmony_ci nir->info.gs.active_stream_mask = 0x1; 2230bf215546Sopenharmony_ci 2231bf215546Sopenharmony_ci /* Make a list of GS input/output variables from the VS outputs */ 2232bf215546Sopenharmony_ci nir_variable *in_vars[100]; 2233bf215546Sopenharmony_ci nir_variable *out_vars[100]; 2234bf215546Sopenharmony_ci uint32_t var_count = 0; 2235bf215546Sopenharmony_ci nir_foreach_shader_out_variable(out_vs_var, vs_nir) { 2236bf215546Sopenharmony_ci char name[8]; 2237bf215546Sopenharmony_ci snprintf(name, ARRAY_SIZE(name), "in_%d", var_count); 2238bf215546Sopenharmony_ci 2239bf215546Sopenharmony_ci in_vars[var_count] = 2240bf215546Sopenharmony_ci nir_variable_create(nir, nir_var_shader_in, 2241bf215546Sopenharmony_ci glsl_array_type(out_vs_var->type, vertex_count, 0), 2242bf215546Sopenharmony_ci name); 2243bf215546Sopenharmony_ci in_vars[var_count]->data.location = out_vs_var->data.location; 2244bf215546Sopenharmony_ci in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac; 2245bf215546Sopenharmony_ci in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; 2246bf215546Sopenharmony_ci 2247bf215546Sopenharmony_ci snprintf(name, ARRAY_SIZE(name), "out_%d", var_count); 2248bf215546Sopenharmony_ci out_vars[var_count] = 2249bf215546Sopenharmony_ci nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name); 2250bf215546Sopenharmony_ci out_vars[var_count]->data.location = out_vs_var->data.location; 2251bf215546Sopenharmony_ci out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; 2252bf215546Sopenharmony_ci 2253bf215546Sopenharmony_ci var_count++; 2254bf215546Sopenharmony_ci } 2255bf215546Sopenharmony_ci 2256bf215546Sopenharmony_ci /* Add the gl_Layer output variable */ 2257bf215546Sopenharmony_ci nir_variable *out_layer = 2258bf215546Sopenharmony_ci nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), 2259bf215546Sopenharmony_ci "out_Layer"); 2260bf215546Sopenharmony_ci out_layer->data.location = VARYING_SLOT_LAYER; 2261bf215546Sopenharmony_ci 2262bf215546Sopenharmony_ci /* Get the view index value that we will write to gl_Layer */ 2263bf215546Sopenharmony_ci nir_ssa_def *layer = 2264bf215546Sopenharmony_ci nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32); 2265bf215546Sopenharmony_ci 2266bf215546Sopenharmony_ci /* Emit all output vertices */ 2267bf215546Sopenharmony_ci for (uint32_t vi = 0; vi < vertex_count; vi++) { 2268bf215546Sopenharmony_ci /* Emit all output varyings */ 2269bf215546Sopenharmony_ci for (uint32_t i = 0; i < var_count; i++) { 2270bf215546Sopenharmony_ci nir_deref_instr *in_value = 2271bf215546Sopenharmony_ci nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi); 2272bf215546Sopenharmony_ci nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value); 2273bf215546Sopenharmony_ci } 2274bf215546Sopenharmony_ci 2275bf215546Sopenharmony_ci /* Emit gl_Layer write */ 2276bf215546Sopenharmony_ci nir_store_var(&b, out_layer, layer, 0x1); 2277bf215546Sopenharmony_ci 2278bf215546Sopenharmony_ci nir_emit_vertex(&b, 0); 2279bf215546Sopenharmony_ci } 2280bf215546Sopenharmony_ci nir_end_primitive(&b, 0); 2281bf215546Sopenharmony_ci 2282bf215546Sopenharmony_ci /* Make sure we run our pre-process NIR passes so we produce NIR compatible 2283bf215546Sopenharmony_ci * with what we expect from SPIR-V modules. 2284bf215546Sopenharmony_ci */ 2285bf215546Sopenharmony_ci preprocess_nir(nir); 2286bf215546Sopenharmony_ci 2287bf215546Sopenharmony_ci /* Attach the geometry shader to the pipeline */ 2288bf215546Sopenharmony_ci struct v3dv_device *device = pipeline->device; 2289bf215546Sopenharmony_ci struct v3dv_physical_device *physical_device = 2290bf215546Sopenharmony_ci &device->instance->physicalDevice; 2291bf215546Sopenharmony_ci 2292bf215546Sopenharmony_ci struct v3dv_pipeline_stage *p_stage = 2293bf215546Sopenharmony_ci vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 2294bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2295bf215546Sopenharmony_ci 2296bf215546Sopenharmony_ci if (p_stage == NULL) { 2297bf215546Sopenharmony_ci ralloc_free(nir); 2298bf215546Sopenharmony_ci return false; 2299bf215546Sopenharmony_ci } 2300bf215546Sopenharmony_ci 2301bf215546Sopenharmony_ci p_stage->pipeline = pipeline; 2302bf215546Sopenharmony_ci p_stage->stage = BROADCOM_SHADER_GEOMETRY; 2303bf215546Sopenharmony_ci p_stage->entrypoint = "main"; 2304bf215546Sopenharmony_ci p_stage->module = 0; 2305bf215546Sopenharmony_ci p_stage->nir = nir; 2306bf215546Sopenharmony_ci pipeline_compute_sha1_from_nir(p_stage); 2307bf215546Sopenharmony_ci p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); 2308bf215546Sopenharmony_ci 2309bf215546Sopenharmony_ci pipeline->has_gs = true; 2310bf215546Sopenharmony_ci pipeline->gs = p_stage; 2311bf215546Sopenharmony_ci pipeline->active_stages |= MESA_SHADER_GEOMETRY; 2312bf215546Sopenharmony_ci 2313bf215546Sopenharmony_ci pipeline->gs_bin = 2314bf215546Sopenharmony_ci pipeline_stage_create_binning(pipeline->gs, pAllocator); 2315bf215546Sopenharmony_ci if (pipeline->gs_bin == NULL) 2316bf215546Sopenharmony_ci return false; 2317bf215546Sopenharmony_ci 2318bf215546Sopenharmony_ci return true; 2319bf215546Sopenharmony_ci} 2320bf215546Sopenharmony_ci 2321bf215546Sopenharmony_cistatic void 2322bf215546Sopenharmony_cipipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline) 2323bf215546Sopenharmony_ci{ 2324bf215546Sopenharmony_ci for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) { 2325bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i]; 2326bf215546Sopenharmony_ci if (variant && variant->prog_data.base->has_global_address) { 2327bf215546Sopenharmony_ci pipeline->uses_buffer_device_address = true; 2328bf215546Sopenharmony_ci return; 2329bf215546Sopenharmony_ci } 2330bf215546Sopenharmony_ci } 2331bf215546Sopenharmony_ci 2332bf215546Sopenharmony_ci pipeline->uses_buffer_device_address = false; 2333bf215546Sopenharmony_ci} 2334bf215546Sopenharmony_ci 2335bf215546Sopenharmony_ci/* 2336bf215546Sopenharmony_ci * It compiles a pipeline. Note that it also allocate internal object, but if 2337bf215546Sopenharmony_ci * some allocations success, but other fails, the method is not freeing the 2338bf215546Sopenharmony_ci * successful ones. 2339bf215546Sopenharmony_ci * 2340bf215546Sopenharmony_ci * This is done to simplify the code, as what we do in this case is just call 2341bf215546Sopenharmony_ci * the pipeline destroy method, and this would handle freeing the internal 2342bf215546Sopenharmony_ci * objects allocated. We just need to be careful setting to NULL the objects 2343bf215546Sopenharmony_ci * not allocated. 2344bf215546Sopenharmony_ci */ 2345bf215546Sopenharmony_cistatic VkResult 2346bf215546Sopenharmony_cipipeline_compile_graphics(struct v3dv_pipeline *pipeline, 2347bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache, 2348bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 2349bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 2350bf215546Sopenharmony_ci{ 2351bf215546Sopenharmony_ci VkPipelineCreationFeedback pipeline_feedback = { 2352bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 2353bf215546Sopenharmony_ci }; 2354bf215546Sopenharmony_ci int64_t pipeline_start = os_time_get_nano(); 2355bf215546Sopenharmony_ci 2356bf215546Sopenharmony_ci struct v3dv_device *device = pipeline->device; 2357bf215546Sopenharmony_ci struct v3dv_physical_device *physical_device = 2358bf215546Sopenharmony_ci &device->instance->physicalDevice; 2359bf215546Sopenharmony_ci 2360bf215546Sopenharmony_ci /* First pass to get some common info from the shader, and create the 2361bf215546Sopenharmony_ci * individual pipeline_stage objects 2362bf215546Sopenharmony_ci */ 2363bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { 2364bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; 2365bf215546Sopenharmony_ci gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 2366bf215546Sopenharmony_ci 2367bf215546Sopenharmony_ci struct v3dv_pipeline_stage *p_stage = 2368bf215546Sopenharmony_ci vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 2369bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2370bf215546Sopenharmony_ci 2371bf215546Sopenharmony_ci if (p_stage == NULL) 2372bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 2373bf215546Sopenharmony_ci 2374bf215546Sopenharmony_ci /* Note that we are assigning program_id slightly differently that 2375bf215546Sopenharmony_ci * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin 2376bf215546Sopenharmony_ci * would have a different program_id, while v3d would have the same for 2377bf215546Sopenharmony_ci * both. For the case of v3dv, it is more natural to have an id this way, 2378bf215546Sopenharmony_ci * as right now we are using it for debugging, not for shader-db. 2379bf215546Sopenharmony_ci */ 2380bf215546Sopenharmony_ci p_stage->program_id = 2381bf215546Sopenharmony_ci p_atomic_inc_return(&physical_device->next_program_id); 2382bf215546Sopenharmony_ci 2383bf215546Sopenharmony_ci p_stage->pipeline = pipeline; 2384bf215546Sopenharmony_ci p_stage->stage = gl_shader_stage_to_broadcom(stage); 2385bf215546Sopenharmony_ci p_stage->entrypoint = sinfo->pName; 2386bf215546Sopenharmony_ci p_stage->module = vk_shader_module_from_handle(sinfo->module); 2387bf215546Sopenharmony_ci p_stage->spec_info = sinfo->pSpecializationInfo; 2388bf215546Sopenharmony_ci 2389bf215546Sopenharmony_ci vk_pipeline_hash_shader_stage(&pCreateInfo->pStages[i], p_stage->shader_sha1); 2390bf215546Sopenharmony_ci 2391bf215546Sopenharmony_ci pipeline->active_stages |= sinfo->stage; 2392bf215546Sopenharmony_ci 2393bf215546Sopenharmony_ci /* We will try to get directly the compiled shader variant, so let's not 2394bf215546Sopenharmony_ci * worry about getting the nir shader for now. 2395bf215546Sopenharmony_ci */ 2396bf215546Sopenharmony_ci p_stage->nir = NULL; 2397bf215546Sopenharmony_ci 2398bf215546Sopenharmony_ci switch(stage) { 2399bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 2400bf215546Sopenharmony_ci pipeline->vs = p_stage; 2401bf215546Sopenharmony_ci pipeline->vs_bin = 2402bf215546Sopenharmony_ci pipeline_stage_create_binning(pipeline->vs, pAllocator); 2403bf215546Sopenharmony_ci if (pipeline->vs_bin == NULL) 2404bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 2405bf215546Sopenharmony_ci break; 2406bf215546Sopenharmony_ci 2407bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 2408bf215546Sopenharmony_ci pipeline->has_gs = true; 2409bf215546Sopenharmony_ci pipeline->gs = p_stage; 2410bf215546Sopenharmony_ci pipeline->gs_bin = 2411bf215546Sopenharmony_ci pipeline_stage_create_binning(pipeline->gs, pAllocator); 2412bf215546Sopenharmony_ci if (pipeline->gs_bin == NULL) 2413bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 2414bf215546Sopenharmony_ci break; 2415bf215546Sopenharmony_ci 2416bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 2417bf215546Sopenharmony_ci pipeline->fs = p_stage; 2418bf215546Sopenharmony_ci break; 2419bf215546Sopenharmony_ci 2420bf215546Sopenharmony_ci default: 2421bf215546Sopenharmony_ci unreachable("not supported shader stage"); 2422bf215546Sopenharmony_ci } 2423bf215546Sopenharmony_ci } 2424bf215546Sopenharmony_ci 2425bf215546Sopenharmony_ci /* Add a no-op fragment shader if needed */ 2426bf215546Sopenharmony_ci if (!pipeline->fs) { 2427bf215546Sopenharmony_ci nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 2428bf215546Sopenharmony_ci &v3dv_nir_options, 2429bf215546Sopenharmony_ci "noop_fs"); 2430bf215546Sopenharmony_ci 2431bf215546Sopenharmony_ci struct v3dv_pipeline_stage *p_stage = 2432bf215546Sopenharmony_ci vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 2433bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2434bf215546Sopenharmony_ci 2435bf215546Sopenharmony_ci if (p_stage == NULL) 2436bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 2437bf215546Sopenharmony_ci 2438bf215546Sopenharmony_ci p_stage->pipeline = pipeline; 2439bf215546Sopenharmony_ci p_stage->stage = BROADCOM_SHADER_FRAGMENT; 2440bf215546Sopenharmony_ci p_stage->entrypoint = "main"; 2441bf215546Sopenharmony_ci p_stage->module = 0; 2442bf215546Sopenharmony_ci p_stage->nir = b.shader; 2443bf215546Sopenharmony_ci pipeline_compute_sha1_from_nir(p_stage); 2444bf215546Sopenharmony_ci p_stage->program_id = 2445bf215546Sopenharmony_ci p_atomic_inc_return(&physical_device->next_program_id); 2446bf215546Sopenharmony_ci 2447bf215546Sopenharmony_ci pipeline->fs = p_stage; 2448bf215546Sopenharmony_ci pipeline->active_stages |= MESA_SHADER_FRAGMENT; 2449bf215546Sopenharmony_ci } 2450bf215546Sopenharmony_ci 2451bf215546Sopenharmony_ci /* If multiview is enabled, we inject a custom passthrough geometry shader 2452bf215546Sopenharmony_ci * to broadcast draw calls to the appropriate views. 2453bf215546Sopenharmony_ci */ 2454bf215546Sopenharmony_ci assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs)); 2455bf215546Sopenharmony_ci if (pipeline->subpass->view_mask) { 2456bf215546Sopenharmony_ci if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator)) 2457bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 2458bf215546Sopenharmony_ci } 2459bf215546Sopenharmony_ci 2460bf215546Sopenharmony_ci /* First we try to get the variants from the pipeline cache (unless we are 2461bf215546Sopenharmony_ci * required to capture internal representations, since in that case we need 2462bf215546Sopenharmony_ci * compile). 2463bf215546Sopenharmony_ci */ 2464bf215546Sopenharmony_ci bool needs_executable_info = 2465bf215546Sopenharmony_ci pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; 2466bf215546Sopenharmony_ci if (!needs_executable_info) { 2467bf215546Sopenharmony_ci struct v3dv_pipeline_key pipeline_key; 2468bf215546Sopenharmony_ci pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo); 2469bf215546Sopenharmony_ci pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1); 2470bf215546Sopenharmony_ci 2471bf215546Sopenharmony_ci bool cache_hit = false; 2472bf215546Sopenharmony_ci 2473bf215546Sopenharmony_ci pipeline->shared_data = 2474bf215546Sopenharmony_ci v3dv_pipeline_cache_search_for_pipeline(cache, 2475bf215546Sopenharmony_ci pipeline->sha1, 2476bf215546Sopenharmony_ci &cache_hit); 2477bf215546Sopenharmony_ci 2478bf215546Sopenharmony_ci if (pipeline->shared_data != NULL) { 2479bf215546Sopenharmony_ci /* A correct pipeline must have at least a VS and FS */ 2480bf215546Sopenharmony_ci assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]); 2481bf215546Sopenharmony_ci assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); 2482bf215546Sopenharmony_ci assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); 2483bf215546Sopenharmony_ci assert(!pipeline->gs || 2484bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]); 2485bf215546Sopenharmony_ci assert(!pipeline->gs || 2486bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); 2487bf215546Sopenharmony_ci 2488bf215546Sopenharmony_ci if (cache_hit && cache != &pipeline->device->default_pipeline_cache) 2489bf215546Sopenharmony_ci pipeline_feedback.flags |= 2490bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 2491bf215546Sopenharmony_ci 2492bf215546Sopenharmony_ci goto success; 2493bf215546Sopenharmony_ci } 2494bf215546Sopenharmony_ci } 2495bf215546Sopenharmony_ci 2496bf215546Sopenharmony_ci if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) 2497bf215546Sopenharmony_ci return VK_PIPELINE_COMPILE_REQUIRED; 2498bf215546Sopenharmony_ci 2499bf215546Sopenharmony_ci /* Otherwise we try to get the NIR shaders (either from the original SPIR-V 2500bf215546Sopenharmony_ci * shader or the pipeline cache) and compile. 2501bf215546Sopenharmony_ci */ 2502bf215546Sopenharmony_ci pipeline->shared_data = 2503bf215546Sopenharmony_ci v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true); 2504bf215546Sopenharmony_ci if (!pipeline->shared_data) 2505bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 2506bf215546Sopenharmony_ci 2507bf215546Sopenharmony_ci pipeline->vs->feedback.flags |= 2508bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 2509bf215546Sopenharmony_ci if (pipeline->gs) 2510bf215546Sopenharmony_ci pipeline->gs->feedback.flags |= 2511bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 2512bf215546Sopenharmony_ci pipeline->fs->feedback.flags |= 2513bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 2514bf215546Sopenharmony_ci 2515bf215546Sopenharmony_ci if (!pipeline->vs->nir) 2516bf215546Sopenharmony_ci pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); 2517bf215546Sopenharmony_ci if (pipeline->gs && !pipeline->gs->nir) 2518bf215546Sopenharmony_ci pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache); 2519bf215546Sopenharmony_ci if (!pipeline->fs->nir) 2520bf215546Sopenharmony_ci pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache); 2521bf215546Sopenharmony_ci 2522bf215546Sopenharmony_ci /* Linking + pipeline lowerings */ 2523bf215546Sopenharmony_ci if (pipeline->gs) { 2524bf215546Sopenharmony_ci link_shaders(pipeline->gs->nir, pipeline->fs->nir); 2525bf215546Sopenharmony_ci link_shaders(pipeline->vs->nir, pipeline->gs->nir); 2526bf215546Sopenharmony_ci } else { 2527bf215546Sopenharmony_ci link_shaders(pipeline->vs->nir, pipeline->fs->nir); 2528bf215546Sopenharmony_ci } 2529bf215546Sopenharmony_ci 2530bf215546Sopenharmony_ci pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout); 2531bf215546Sopenharmony_ci lower_fs_io(pipeline->fs->nir); 2532bf215546Sopenharmony_ci 2533bf215546Sopenharmony_ci if (pipeline->gs) { 2534bf215546Sopenharmony_ci pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout); 2535bf215546Sopenharmony_ci lower_gs_io(pipeline->gs->nir); 2536bf215546Sopenharmony_ci } 2537bf215546Sopenharmony_ci 2538bf215546Sopenharmony_ci pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout); 2539bf215546Sopenharmony_ci lower_vs_io(pipeline->vs->nir); 2540bf215546Sopenharmony_ci 2541bf215546Sopenharmony_ci /* Compiling to vir */ 2542bf215546Sopenharmony_ci VkResult vk_result; 2543bf215546Sopenharmony_ci 2544bf215546Sopenharmony_ci /* We should have got all the variants or no variants from the cache */ 2545bf215546Sopenharmony_ci assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); 2546bf215546Sopenharmony_ci vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo); 2547bf215546Sopenharmony_ci if (vk_result != VK_SUCCESS) 2548bf215546Sopenharmony_ci return vk_result; 2549bf215546Sopenharmony_ci 2550bf215546Sopenharmony_ci assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] && 2551bf215546Sopenharmony_ci !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); 2552bf215546Sopenharmony_ci 2553bf215546Sopenharmony_ci if (pipeline->gs) { 2554bf215546Sopenharmony_ci vk_result = 2555bf215546Sopenharmony_ci pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo); 2556bf215546Sopenharmony_ci if (vk_result != VK_SUCCESS) 2557bf215546Sopenharmony_ci return vk_result; 2558bf215546Sopenharmony_ci } 2559bf215546Sopenharmony_ci 2560bf215546Sopenharmony_ci assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] && 2561bf215546Sopenharmony_ci !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); 2562bf215546Sopenharmony_ci 2563bf215546Sopenharmony_ci vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo); 2564bf215546Sopenharmony_ci if (vk_result != VK_SUCCESS) 2565bf215546Sopenharmony_ci return vk_result; 2566bf215546Sopenharmony_ci 2567bf215546Sopenharmony_ci if (!upload_assembly(pipeline)) 2568bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_DEVICE_MEMORY; 2569bf215546Sopenharmony_ci 2570bf215546Sopenharmony_ci v3dv_pipeline_cache_upload_pipeline(pipeline, cache); 2571bf215546Sopenharmony_ci 2572bf215546Sopenharmony_ci success: 2573bf215546Sopenharmony_ci 2574bf215546Sopenharmony_ci pipeline_check_buffer_device_address(pipeline); 2575bf215546Sopenharmony_ci 2576bf215546Sopenharmony_ci pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 2577bf215546Sopenharmony_ci write_creation_feedback(pipeline, 2578bf215546Sopenharmony_ci pCreateInfo->pNext, 2579bf215546Sopenharmony_ci &pipeline_feedback, 2580bf215546Sopenharmony_ci pCreateInfo->stageCount, 2581bf215546Sopenharmony_ci pCreateInfo->pStages); 2582bf215546Sopenharmony_ci 2583bf215546Sopenharmony_ci /* Since we have the variants in the pipeline shared data we can now free 2584bf215546Sopenharmony_ci * the pipeline stages. 2585bf215546Sopenharmony_ci */ 2586bf215546Sopenharmony_ci if (!needs_executable_info) 2587bf215546Sopenharmony_ci pipeline_free_stages(device, pipeline, pAllocator); 2588bf215546Sopenharmony_ci 2589bf215546Sopenharmony_ci pipeline_check_spill_size(pipeline); 2590bf215546Sopenharmony_ci 2591bf215546Sopenharmony_ci return compute_vpm_config(pipeline); 2592bf215546Sopenharmony_ci} 2593bf215546Sopenharmony_ci 2594bf215546Sopenharmony_cistatic VkResult 2595bf215546Sopenharmony_cicompute_vpm_config(struct v3dv_pipeline *pipeline) 2596bf215546Sopenharmony_ci{ 2597bf215546Sopenharmony_ci struct v3dv_shader_variant *vs_variant = 2598bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 2599bf215546Sopenharmony_ci struct v3dv_shader_variant *vs_bin_variant = 2600bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 2601bf215546Sopenharmony_ci struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs; 2602bf215546Sopenharmony_ci struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs; 2603bf215546Sopenharmony_ci 2604bf215546Sopenharmony_ci struct v3d_gs_prog_data *gs = NULL; 2605bf215546Sopenharmony_ci struct v3d_gs_prog_data *gs_bin = NULL; 2606bf215546Sopenharmony_ci if (pipeline->has_gs) { 2607bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_variant = 2608bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 2609bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_bin_variant = 2610bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 2611bf215546Sopenharmony_ci gs = gs_variant->prog_data.gs; 2612bf215546Sopenharmony_ci gs_bin = gs_bin_variant->prog_data.gs; 2613bf215546Sopenharmony_ci } 2614bf215546Sopenharmony_ci 2615bf215546Sopenharmony_ci if (!v3d_compute_vpm_config(&pipeline->device->devinfo, 2616bf215546Sopenharmony_ci vs_bin, vs, gs_bin, gs, 2617bf215546Sopenharmony_ci &pipeline->vpm_cfg_bin, 2618bf215546Sopenharmony_ci &pipeline->vpm_cfg)) { 2619bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_DEVICE_MEMORY; 2620bf215546Sopenharmony_ci } 2621bf215546Sopenharmony_ci 2622bf215546Sopenharmony_ci return VK_SUCCESS; 2623bf215546Sopenharmony_ci} 2624bf215546Sopenharmony_ci 2625bf215546Sopenharmony_cistatic unsigned 2626bf215546Sopenharmony_civ3dv_dynamic_state_mask(VkDynamicState state) 2627bf215546Sopenharmony_ci{ 2628bf215546Sopenharmony_ci switch(state) { 2629bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_VIEWPORT: 2630bf215546Sopenharmony_ci return V3DV_DYNAMIC_VIEWPORT; 2631bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_SCISSOR: 2632bf215546Sopenharmony_ci return V3DV_DYNAMIC_SCISSOR; 2633bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: 2634bf215546Sopenharmony_ci return V3DV_DYNAMIC_STENCIL_COMPARE_MASK; 2635bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: 2636bf215546Sopenharmony_ci return V3DV_DYNAMIC_STENCIL_WRITE_MASK; 2637bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_REFERENCE: 2638bf215546Sopenharmony_ci return V3DV_DYNAMIC_STENCIL_REFERENCE; 2639bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_BLEND_CONSTANTS: 2640bf215546Sopenharmony_ci return V3DV_DYNAMIC_BLEND_CONSTANTS; 2641bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_BIAS: 2642bf215546Sopenharmony_ci return V3DV_DYNAMIC_DEPTH_BIAS; 2643bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_LINE_WIDTH: 2644bf215546Sopenharmony_ci return V3DV_DYNAMIC_LINE_WIDTH; 2645bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: 2646bf215546Sopenharmony_ci return V3DV_DYNAMIC_COLOR_WRITE_ENABLE; 2647bf215546Sopenharmony_ci 2648bf215546Sopenharmony_ci /* Depth bounds testing is not available in in V3D 4.2 so here we are just 2649bf215546Sopenharmony_ci * ignoring this dynamic state. We are already asserting at pipeline creation 2650bf215546Sopenharmony_ci * time that depth bounds testing is not enabled. 2651bf215546Sopenharmony_ci */ 2652bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_BOUNDS: 2653bf215546Sopenharmony_ci return 0; 2654bf215546Sopenharmony_ci 2655bf215546Sopenharmony_ci default: 2656bf215546Sopenharmony_ci unreachable("Unhandled dynamic state"); 2657bf215546Sopenharmony_ci } 2658bf215546Sopenharmony_ci} 2659bf215546Sopenharmony_ci 2660bf215546Sopenharmony_cistatic void 2661bf215546Sopenharmony_cipipeline_init_dynamic_state( 2662bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline, 2663bf215546Sopenharmony_ci const VkPipelineDynamicStateCreateInfo *pDynamicState, 2664bf215546Sopenharmony_ci const VkPipelineViewportStateCreateInfo *pViewportState, 2665bf215546Sopenharmony_ci const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState, 2666bf215546Sopenharmony_ci const VkPipelineColorBlendStateCreateInfo *pColorBlendState, 2667bf215546Sopenharmony_ci const VkPipelineRasterizationStateCreateInfo *pRasterizationState, 2668bf215546Sopenharmony_ci const VkPipelineColorWriteCreateInfoEXT *pColorWriteState) 2669bf215546Sopenharmony_ci{ 2670bf215546Sopenharmony_ci /* Initialize to default values */ 2671bf215546Sopenharmony_ci struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state; 2672bf215546Sopenharmony_ci memset(dynamic, 0, sizeof(*dynamic)); 2673bf215546Sopenharmony_ci dynamic->stencil_compare_mask.front = ~0; 2674bf215546Sopenharmony_ci dynamic->stencil_compare_mask.back = ~0; 2675bf215546Sopenharmony_ci dynamic->stencil_write_mask.front = ~0; 2676bf215546Sopenharmony_ci dynamic->stencil_write_mask.back = ~0; 2677bf215546Sopenharmony_ci dynamic->line_width = 1.0f; 2678bf215546Sopenharmony_ci dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1; 2679bf215546Sopenharmony_ci 2680bf215546Sopenharmony_ci /* Create a mask of enabled dynamic states */ 2681bf215546Sopenharmony_ci uint32_t dynamic_states = 0; 2682bf215546Sopenharmony_ci if (pDynamicState) { 2683bf215546Sopenharmony_ci uint32_t count = pDynamicState->dynamicStateCount; 2684bf215546Sopenharmony_ci for (uint32_t s = 0; s < count; s++) { 2685bf215546Sopenharmony_ci dynamic_states |= 2686bf215546Sopenharmony_ci v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]); 2687bf215546Sopenharmony_ci } 2688bf215546Sopenharmony_ci } 2689bf215546Sopenharmony_ci 2690bf215546Sopenharmony_ci /* For any pipeline states that are not dynamic, set the dynamic state 2691bf215546Sopenharmony_ci * from the static pipeline state. 2692bf215546Sopenharmony_ci */ 2693bf215546Sopenharmony_ci if (pViewportState) { 2694bf215546Sopenharmony_ci if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) { 2695bf215546Sopenharmony_ci dynamic->viewport.count = pViewportState->viewportCount; 2696bf215546Sopenharmony_ci typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports, 2697bf215546Sopenharmony_ci pViewportState->viewportCount); 2698bf215546Sopenharmony_ci 2699bf215546Sopenharmony_ci for (uint32_t i = 0; i < dynamic->viewport.count; i++) { 2700bf215546Sopenharmony_ci v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i], 2701bf215546Sopenharmony_ci dynamic->viewport.scale[i], 2702bf215546Sopenharmony_ci dynamic->viewport.translate[i]); 2703bf215546Sopenharmony_ci } 2704bf215546Sopenharmony_ci } 2705bf215546Sopenharmony_ci 2706bf215546Sopenharmony_ci if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) { 2707bf215546Sopenharmony_ci dynamic->scissor.count = pViewportState->scissorCount; 2708bf215546Sopenharmony_ci typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors, 2709bf215546Sopenharmony_ci pViewportState->scissorCount); 2710bf215546Sopenharmony_ci } 2711bf215546Sopenharmony_ci } 2712bf215546Sopenharmony_ci 2713bf215546Sopenharmony_ci if (pDepthStencilState) { 2714bf215546Sopenharmony_ci if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) { 2715bf215546Sopenharmony_ci dynamic->stencil_compare_mask.front = 2716bf215546Sopenharmony_ci pDepthStencilState->front.compareMask; 2717bf215546Sopenharmony_ci dynamic->stencil_compare_mask.back = 2718bf215546Sopenharmony_ci pDepthStencilState->back.compareMask; 2719bf215546Sopenharmony_ci } 2720bf215546Sopenharmony_ci 2721bf215546Sopenharmony_ci if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) { 2722bf215546Sopenharmony_ci dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask; 2723bf215546Sopenharmony_ci dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask; 2724bf215546Sopenharmony_ci } 2725bf215546Sopenharmony_ci 2726bf215546Sopenharmony_ci if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) { 2727bf215546Sopenharmony_ci dynamic->stencil_reference.front = pDepthStencilState->front.reference; 2728bf215546Sopenharmony_ci dynamic->stencil_reference.back = pDepthStencilState->back.reference; 2729bf215546Sopenharmony_ci } 2730bf215546Sopenharmony_ci } 2731bf215546Sopenharmony_ci 2732bf215546Sopenharmony_ci if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) { 2733bf215546Sopenharmony_ci memcpy(dynamic->blend_constants, pColorBlendState->blendConstants, 2734bf215546Sopenharmony_ci sizeof(dynamic->blend_constants)); 2735bf215546Sopenharmony_ci } 2736bf215546Sopenharmony_ci 2737bf215546Sopenharmony_ci if (pRasterizationState) { 2738bf215546Sopenharmony_ci if (pRasterizationState->depthBiasEnable && 2739bf215546Sopenharmony_ci !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) { 2740bf215546Sopenharmony_ci dynamic->depth_bias.constant_factor = 2741bf215546Sopenharmony_ci pRasterizationState->depthBiasConstantFactor; 2742bf215546Sopenharmony_ci dynamic->depth_bias.depth_bias_clamp = 2743bf215546Sopenharmony_ci pRasterizationState->depthBiasClamp; 2744bf215546Sopenharmony_ci dynamic->depth_bias.slope_factor = 2745bf215546Sopenharmony_ci pRasterizationState->depthBiasSlopeFactor; 2746bf215546Sopenharmony_ci } 2747bf215546Sopenharmony_ci if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH)) 2748bf215546Sopenharmony_ci dynamic->line_width = pRasterizationState->lineWidth; 2749bf215546Sopenharmony_ci } 2750bf215546Sopenharmony_ci 2751bf215546Sopenharmony_ci if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) { 2752bf215546Sopenharmony_ci dynamic->color_write_enable = 0; 2753bf215546Sopenharmony_ci for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++) 2754bf215546Sopenharmony_ci dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; 2755bf215546Sopenharmony_ci } 2756bf215546Sopenharmony_ci 2757bf215546Sopenharmony_ci pipeline->dynamic_state.mask = dynamic_states; 2758bf215546Sopenharmony_ci} 2759bf215546Sopenharmony_ci 2760bf215546Sopenharmony_cistatic bool 2761bf215546Sopenharmony_cistencil_op_is_no_op(const VkStencilOpState *stencil) 2762bf215546Sopenharmony_ci{ 2763bf215546Sopenharmony_ci return stencil->depthFailOp == VK_STENCIL_OP_KEEP && 2764bf215546Sopenharmony_ci stencil->compareOp == VK_COMPARE_OP_ALWAYS; 2765bf215546Sopenharmony_ci} 2766bf215546Sopenharmony_ci 2767bf215546Sopenharmony_cistatic void 2768bf215546Sopenharmony_cienable_depth_bias(struct v3dv_pipeline *pipeline, 2769bf215546Sopenharmony_ci const VkPipelineRasterizationStateCreateInfo *rs_info) 2770bf215546Sopenharmony_ci{ 2771bf215546Sopenharmony_ci pipeline->depth_bias.enabled = false; 2772bf215546Sopenharmony_ci pipeline->depth_bias.is_z16 = false; 2773bf215546Sopenharmony_ci 2774bf215546Sopenharmony_ci if (!rs_info || !rs_info->depthBiasEnable) 2775bf215546Sopenharmony_ci return; 2776bf215546Sopenharmony_ci 2777bf215546Sopenharmony_ci /* Check the depth/stencil attachment description for the subpass used with 2778bf215546Sopenharmony_ci * this pipeline. 2779bf215546Sopenharmony_ci */ 2780bf215546Sopenharmony_ci assert(pipeline->pass && pipeline->subpass); 2781bf215546Sopenharmony_ci struct v3dv_render_pass *pass = pipeline->pass; 2782bf215546Sopenharmony_ci struct v3dv_subpass *subpass = pipeline->subpass; 2783bf215546Sopenharmony_ci 2784bf215546Sopenharmony_ci if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) 2785bf215546Sopenharmony_ci return; 2786bf215546Sopenharmony_ci 2787bf215546Sopenharmony_ci assert(subpass->ds_attachment.attachment < pass->attachment_count); 2788bf215546Sopenharmony_ci struct v3dv_render_pass_attachment *att = 2789bf215546Sopenharmony_ci &pass->attachments[subpass->ds_attachment.attachment]; 2790bf215546Sopenharmony_ci 2791bf215546Sopenharmony_ci if (att->desc.format == VK_FORMAT_D16_UNORM) 2792bf215546Sopenharmony_ci pipeline->depth_bias.is_z16 = true; 2793bf215546Sopenharmony_ci 2794bf215546Sopenharmony_ci pipeline->depth_bias.enabled = true; 2795bf215546Sopenharmony_ci} 2796bf215546Sopenharmony_ci 2797bf215546Sopenharmony_cistatic void 2798bf215546Sopenharmony_cipipeline_set_ez_state(struct v3dv_pipeline *pipeline, 2799bf215546Sopenharmony_ci const VkPipelineDepthStencilStateCreateInfo *ds_info) 2800bf215546Sopenharmony_ci{ 2801bf215546Sopenharmony_ci if (!ds_info || !ds_info->depthTestEnable) { 2802bf215546Sopenharmony_ci pipeline->ez_state = V3D_EZ_DISABLED; 2803bf215546Sopenharmony_ci return; 2804bf215546Sopenharmony_ci } 2805bf215546Sopenharmony_ci 2806bf215546Sopenharmony_ci switch (ds_info->depthCompareOp) { 2807bf215546Sopenharmony_ci case VK_COMPARE_OP_LESS: 2808bf215546Sopenharmony_ci case VK_COMPARE_OP_LESS_OR_EQUAL: 2809bf215546Sopenharmony_ci pipeline->ez_state = V3D_EZ_LT_LE; 2810bf215546Sopenharmony_ci break; 2811bf215546Sopenharmony_ci case VK_COMPARE_OP_GREATER: 2812bf215546Sopenharmony_ci case VK_COMPARE_OP_GREATER_OR_EQUAL: 2813bf215546Sopenharmony_ci pipeline->ez_state = V3D_EZ_GT_GE; 2814bf215546Sopenharmony_ci break; 2815bf215546Sopenharmony_ci case VK_COMPARE_OP_NEVER: 2816bf215546Sopenharmony_ci case VK_COMPARE_OP_EQUAL: 2817bf215546Sopenharmony_ci pipeline->ez_state = V3D_EZ_UNDECIDED; 2818bf215546Sopenharmony_ci break; 2819bf215546Sopenharmony_ci default: 2820bf215546Sopenharmony_ci pipeline->ez_state = V3D_EZ_DISABLED; 2821bf215546Sopenharmony_ci pipeline->incompatible_ez_test = true; 2822bf215546Sopenharmony_ci break; 2823bf215546Sopenharmony_ci } 2824bf215546Sopenharmony_ci 2825bf215546Sopenharmony_ci /* If stencil is enabled and is not a no-op, we need to disable EZ */ 2826bf215546Sopenharmony_ci if (ds_info->stencilTestEnable && 2827bf215546Sopenharmony_ci (!stencil_op_is_no_op(&ds_info->front) || 2828bf215546Sopenharmony_ci !stencil_op_is_no_op(&ds_info->back))) { 2829bf215546Sopenharmony_ci pipeline->ez_state = V3D_EZ_DISABLED; 2830bf215546Sopenharmony_ci } 2831bf215546Sopenharmony_ci 2832bf215546Sopenharmony_ci /* If the FS writes Z, then it may update against the chosen EZ direction */ 2833bf215546Sopenharmony_ci struct v3dv_shader_variant *fs_variant = 2834bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 2835bf215546Sopenharmony_ci if (fs_variant && fs_variant->prog_data.fs->writes_z && 2836bf215546Sopenharmony_ci !fs_variant->prog_data.fs->writes_z_from_fep) { 2837bf215546Sopenharmony_ci pipeline->ez_state = V3D_EZ_DISABLED; 2838bf215546Sopenharmony_ci } 2839bf215546Sopenharmony_ci} 2840bf215546Sopenharmony_ci 2841bf215546Sopenharmony_cistatic bool 2842bf215546Sopenharmony_cipipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) 2843bf215546Sopenharmony_ci{ 2844bf215546Sopenharmony_ci for (uint8_t i = 0; i < pipeline->va_count; i++) { 2845bf215546Sopenharmony_ci if (vk_format_is_int(pipeline->va[i].vk_format)) 2846bf215546Sopenharmony_ci return true; 2847bf215546Sopenharmony_ci } 2848bf215546Sopenharmony_ci return false; 2849bf215546Sopenharmony_ci} 2850bf215546Sopenharmony_ci 2851bf215546Sopenharmony_ci/* @pipeline can be NULL. We assume in that case that all the attributes have 2852bf215546Sopenharmony_ci * a float format (we only create an all-float BO once and we reuse it with 2853bf215546Sopenharmony_ci * all float pipelines), otherwise we look at the actual type of each 2854bf215546Sopenharmony_ci * attribute used with the specific pipeline passed in. 2855bf215546Sopenharmony_ci */ 2856bf215546Sopenharmony_cistruct v3dv_bo * 2857bf215546Sopenharmony_civ3dv_pipeline_create_default_attribute_values(struct v3dv_device *device, 2858bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline) 2859bf215546Sopenharmony_ci{ 2860bf215546Sopenharmony_ci uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4; 2861bf215546Sopenharmony_ci struct v3dv_bo *bo; 2862bf215546Sopenharmony_ci 2863bf215546Sopenharmony_ci bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true); 2864bf215546Sopenharmony_ci 2865bf215546Sopenharmony_ci if (!bo) { 2866bf215546Sopenharmony_ci fprintf(stderr, "failed to allocate memory for the default " 2867bf215546Sopenharmony_ci "attribute values\n"); 2868bf215546Sopenharmony_ci return NULL; 2869bf215546Sopenharmony_ci } 2870bf215546Sopenharmony_ci 2871bf215546Sopenharmony_ci bool ok = v3dv_bo_map(device, bo, size); 2872bf215546Sopenharmony_ci if (!ok) { 2873bf215546Sopenharmony_ci fprintf(stderr, "failed to map default attribute values buffer\n"); 2874bf215546Sopenharmony_ci return false; 2875bf215546Sopenharmony_ci } 2876bf215546Sopenharmony_ci 2877bf215546Sopenharmony_ci uint32_t *attrs = bo->map; 2878bf215546Sopenharmony_ci uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0; 2879bf215546Sopenharmony_ci for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) { 2880bf215546Sopenharmony_ci attrs[i * 4 + 0] = 0; 2881bf215546Sopenharmony_ci attrs[i * 4 + 1] = 0; 2882bf215546Sopenharmony_ci attrs[i * 4 + 2] = 0; 2883bf215546Sopenharmony_ci VkFormat attr_format = 2884bf215546Sopenharmony_ci pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED; 2885bf215546Sopenharmony_ci if (i < va_count && vk_format_is_int(attr_format)) { 2886bf215546Sopenharmony_ci attrs[i * 4 + 3] = 1; 2887bf215546Sopenharmony_ci } else { 2888bf215546Sopenharmony_ci attrs[i * 4 + 3] = fui(1.0); 2889bf215546Sopenharmony_ci } 2890bf215546Sopenharmony_ci } 2891bf215546Sopenharmony_ci 2892bf215546Sopenharmony_ci v3dv_bo_unmap(device, bo); 2893bf215546Sopenharmony_ci 2894bf215546Sopenharmony_ci return bo; 2895bf215546Sopenharmony_ci} 2896bf215546Sopenharmony_ci 2897bf215546Sopenharmony_cistatic void 2898bf215546Sopenharmony_cipipeline_set_sample_mask(struct v3dv_pipeline *pipeline, 2899bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *ms_info) 2900bf215546Sopenharmony_ci{ 2901bf215546Sopenharmony_ci pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1; 2902bf215546Sopenharmony_ci 2903bf215546Sopenharmony_ci /* Ignore pSampleMask if we are not enabling multisampling. The hardware 2904bf215546Sopenharmony_ci * requires this to be 0xf or 0x0 if using a single sample. 2905bf215546Sopenharmony_ci */ 2906bf215546Sopenharmony_ci if (ms_info && ms_info->pSampleMask && 2907bf215546Sopenharmony_ci ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) { 2908bf215546Sopenharmony_ci pipeline->sample_mask &= ms_info->pSampleMask[0]; 2909bf215546Sopenharmony_ci } 2910bf215546Sopenharmony_ci} 2911bf215546Sopenharmony_ci 2912bf215546Sopenharmony_cistatic void 2913bf215546Sopenharmony_cipipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline, 2914bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *ms_info) 2915bf215546Sopenharmony_ci{ 2916bf215546Sopenharmony_ci pipeline->sample_rate_shading = 2917bf215546Sopenharmony_ci ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT && 2918bf215546Sopenharmony_ci ms_info->sampleShadingEnable; 2919bf215546Sopenharmony_ci} 2920bf215546Sopenharmony_ci 2921bf215546Sopenharmony_cistatic VkResult 2922bf215546Sopenharmony_cipipeline_init(struct v3dv_pipeline *pipeline, 2923bf215546Sopenharmony_ci struct v3dv_device *device, 2924bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache, 2925bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 2926bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 2927bf215546Sopenharmony_ci{ 2928bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 2929bf215546Sopenharmony_ci 2930bf215546Sopenharmony_ci pipeline->device = device; 2931bf215546Sopenharmony_ci 2932bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout); 2933bf215546Sopenharmony_ci pipeline->layout = layout; 2934bf215546Sopenharmony_ci 2935bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass); 2936bf215546Sopenharmony_ci assert(pCreateInfo->subpass < render_pass->subpass_count); 2937bf215546Sopenharmony_ci pipeline->pass = render_pass; 2938bf215546Sopenharmony_ci pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; 2939bf215546Sopenharmony_ci 2940bf215546Sopenharmony_ci const VkPipelineInputAssemblyStateCreateInfo *ia_info = 2941bf215546Sopenharmony_ci pCreateInfo->pInputAssemblyState; 2942bf215546Sopenharmony_ci pipeline->topology = vk_to_pipe_prim_type[ia_info->topology]; 2943bf215546Sopenharmony_ci 2944bf215546Sopenharmony_ci /* If rasterization is not enabled, various CreateInfo structs must be 2945bf215546Sopenharmony_ci * ignored. 2946bf215546Sopenharmony_ci */ 2947bf215546Sopenharmony_ci const bool raster_enabled = 2948bf215546Sopenharmony_ci !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 2949bf215546Sopenharmony_ci 2950bf215546Sopenharmony_ci const VkPipelineViewportStateCreateInfo *vp_info = 2951bf215546Sopenharmony_ci raster_enabled ? pCreateInfo->pViewportState : NULL; 2952bf215546Sopenharmony_ci 2953bf215546Sopenharmony_ci const VkPipelineDepthStencilStateCreateInfo *ds_info = 2954bf215546Sopenharmony_ci raster_enabled ? pCreateInfo->pDepthStencilState : NULL; 2955bf215546Sopenharmony_ci 2956bf215546Sopenharmony_ci const VkPipelineRasterizationStateCreateInfo *rs_info = 2957bf215546Sopenharmony_ci raster_enabled ? pCreateInfo->pRasterizationState : NULL; 2958bf215546Sopenharmony_ci 2959bf215546Sopenharmony_ci const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info = 2960bf215546Sopenharmony_ci rs_info ? vk_find_struct_const( 2961bf215546Sopenharmony_ci rs_info->pNext, 2962bf215546Sopenharmony_ci PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) : 2963bf215546Sopenharmony_ci NULL; 2964bf215546Sopenharmony_ci 2965bf215546Sopenharmony_ci const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info = 2966bf215546Sopenharmony_ci rs_info ? vk_find_struct_const( 2967bf215546Sopenharmony_ci rs_info->pNext, 2968bf215546Sopenharmony_ci PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) : 2969bf215546Sopenharmony_ci NULL; 2970bf215546Sopenharmony_ci 2971bf215546Sopenharmony_ci const VkPipelineColorBlendStateCreateInfo *cb_info = 2972bf215546Sopenharmony_ci raster_enabled ? pCreateInfo->pColorBlendState : NULL; 2973bf215546Sopenharmony_ci 2974bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *ms_info = 2975bf215546Sopenharmony_ci raster_enabled ? pCreateInfo->pMultisampleState : NULL; 2976bf215546Sopenharmony_ci 2977bf215546Sopenharmony_ci const VkPipelineColorWriteCreateInfoEXT *cw_info = 2978bf215546Sopenharmony_ci cb_info ? vk_find_struct_const(cb_info->pNext, 2979bf215546Sopenharmony_ci PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) : 2980bf215546Sopenharmony_ci NULL; 2981bf215546Sopenharmony_ci 2982bf215546Sopenharmony_ci pipeline_init_dynamic_state(pipeline, 2983bf215546Sopenharmony_ci pCreateInfo->pDynamicState, 2984bf215546Sopenharmony_ci vp_info, ds_info, cb_info, rs_info, cw_info); 2985bf215546Sopenharmony_ci 2986bf215546Sopenharmony_ci /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that 2987bf215546Sopenharmony_ci * feature and it shouldn't be used by any pipeline. 2988bf215546Sopenharmony_ci */ 2989bf215546Sopenharmony_ci assert(!ds_info || !ds_info->depthBoundsTestEnable); 2990bf215546Sopenharmony_ci 2991bf215546Sopenharmony_ci v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info, 2992bf215546Sopenharmony_ci rs_info, pv_info, ls_info, 2993bf215546Sopenharmony_ci ms_info); 2994bf215546Sopenharmony_ci 2995bf215546Sopenharmony_ci enable_depth_bias(pipeline, rs_info); 2996bf215546Sopenharmony_ci pipeline_set_sample_mask(pipeline, ms_info); 2997bf215546Sopenharmony_ci pipeline_set_sample_rate_shading(pipeline, ms_info); 2998bf215546Sopenharmony_ci 2999bf215546Sopenharmony_ci pipeline->primitive_restart = 3000bf215546Sopenharmony_ci pCreateInfo->pInputAssemblyState->primitiveRestartEnable; 3001bf215546Sopenharmony_ci 3002bf215546Sopenharmony_ci result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator); 3003bf215546Sopenharmony_ci 3004bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 3005bf215546Sopenharmony_ci /* Caller would already destroy the pipeline, and we didn't allocate any 3006bf215546Sopenharmony_ci * extra info. We don't need to do anything else. 3007bf215546Sopenharmony_ci */ 3008bf215546Sopenharmony_ci return result; 3009bf215546Sopenharmony_ci } 3010bf215546Sopenharmony_ci 3011bf215546Sopenharmony_ci const VkPipelineVertexInputStateCreateInfo *vi_info = 3012bf215546Sopenharmony_ci pCreateInfo->pVertexInputState; 3013bf215546Sopenharmony_ci 3014bf215546Sopenharmony_ci const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info = 3015bf215546Sopenharmony_ci vk_find_struct_const(vi_info->pNext, 3016bf215546Sopenharmony_ci PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 3017bf215546Sopenharmony_ci 3018bf215546Sopenharmony_ci v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info); 3019bf215546Sopenharmony_ci 3020bf215546Sopenharmony_ci if (pipeline_has_integer_vertex_attrib(pipeline)) { 3021bf215546Sopenharmony_ci pipeline->default_attribute_values = 3022bf215546Sopenharmony_ci v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline); 3023bf215546Sopenharmony_ci if (!pipeline->default_attribute_values) 3024bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_DEVICE_MEMORY; 3025bf215546Sopenharmony_ci } else { 3026bf215546Sopenharmony_ci pipeline->default_attribute_values = NULL; 3027bf215546Sopenharmony_ci } 3028bf215546Sopenharmony_ci 3029bf215546Sopenharmony_ci /* This must be done after the pipeline has been compiled */ 3030bf215546Sopenharmony_ci pipeline_set_ez_state(pipeline, ds_info); 3031bf215546Sopenharmony_ci 3032bf215546Sopenharmony_ci return result; 3033bf215546Sopenharmony_ci} 3034bf215546Sopenharmony_ci 3035bf215546Sopenharmony_cistatic VkResult 3036bf215546Sopenharmony_cigraphics_pipeline_create(VkDevice _device, 3037bf215546Sopenharmony_ci VkPipelineCache _cache, 3038bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 3039bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 3040bf215546Sopenharmony_ci VkPipeline *pPipeline) 3041bf215546Sopenharmony_ci{ 3042bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 3043bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 3044bf215546Sopenharmony_ci 3045bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline; 3046bf215546Sopenharmony_ci VkResult result; 3047bf215546Sopenharmony_ci 3048bf215546Sopenharmony_ci /* Use the default pipeline cache if none is specified */ 3049bf215546Sopenharmony_ci if (cache == NULL && device->instance->default_pipeline_cache_enabled) 3050bf215546Sopenharmony_ci cache = &device->default_pipeline_cache; 3051bf215546Sopenharmony_ci 3052bf215546Sopenharmony_ci pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), 3053bf215546Sopenharmony_ci VK_OBJECT_TYPE_PIPELINE); 3054bf215546Sopenharmony_ci 3055bf215546Sopenharmony_ci if (pipeline == NULL) 3056bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 3057bf215546Sopenharmony_ci 3058bf215546Sopenharmony_ci result = pipeline_init(pipeline, device, cache, 3059bf215546Sopenharmony_ci pCreateInfo, 3060bf215546Sopenharmony_ci pAllocator); 3061bf215546Sopenharmony_ci 3062bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 3063bf215546Sopenharmony_ci v3dv_destroy_pipeline(pipeline, device, pAllocator); 3064bf215546Sopenharmony_ci if (result == VK_PIPELINE_COMPILE_REQUIRED) 3065bf215546Sopenharmony_ci *pPipeline = VK_NULL_HANDLE; 3066bf215546Sopenharmony_ci return result; 3067bf215546Sopenharmony_ci } 3068bf215546Sopenharmony_ci 3069bf215546Sopenharmony_ci *pPipeline = v3dv_pipeline_to_handle(pipeline); 3070bf215546Sopenharmony_ci 3071bf215546Sopenharmony_ci return VK_SUCCESS; 3072bf215546Sopenharmony_ci} 3073bf215546Sopenharmony_ci 3074bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 3075bf215546Sopenharmony_civ3dv_CreateGraphicsPipelines(VkDevice _device, 3076bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 3077bf215546Sopenharmony_ci uint32_t count, 3078bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfos, 3079bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 3080bf215546Sopenharmony_ci VkPipeline *pPipelines) 3081bf215546Sopenharmony_ci{ 3082bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 3083bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 3084bf215546Sopenharmony_ci 3085bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 3086bf215546Sopenharmony_ci mtx_lock(&device->pdevice->mutex); 3087bf215546Sopenharmony_ci 3088bf215546Sopenharmony_ci uint32_t i = 0; 3089bf215546Sopenharmony_ci for (; i < count; i++) { 3090bf215546Sopenharmony_ci VkResult local_result; 3091bf215546Sopenharmony_ci 3092bf215546Sopenharmony_ci local_result = graphics_pipeline_create(_device, 3093bf215546Sopenharmony_ci pipelineCache, 3094bf215546Sopenharmony_ci &pCreateInfos[i], 3095bf215546Sopenharmony_ci pAllocator, 3096bf215546Sopenharmony_ci &pPipelines[i]); 3097bf215546Sopenharmony_ci 3098bf215546Sopenharmony_ci if (local_result != VK_SUCCESS) { 3099bf215546Sopenharmony_ci result = local_result; 3100bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 3101bf215546Sopenharmony_ci 3102bf215546Sopenharmony_ci if (pCreateInfos[i].flags & 3103bf215546Sopenharmony_ci VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 3104bf215546Sopenharmony_ci break; 3105bf215546Sopenharmony_ci } 3106bf215546Sopenharmony_ci } 3107bf215546Sopenharmony_ci 3108bf215546Sopenharmony_ci for (; i < count; i++) 3109bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 3110bf215546Sopenharmony_ci 3111bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 3112bf215546Sopenharmony_ci mtx_unlock(&device->pdevice->mutex); 3113bf215546Sopenharmony_ci 3114bf215546Sopenharmony_ci return result; 3115bf215546Sopenharmony_ci} 3116bf215546Sopenharmony_ci 3117bf215546Sopenharmony_cistatic void 3118bf215546Sopenharmony_cishared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 3119bf215546Sopenharmony_ci{ 3120bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(type)); 3121bf215546Sopenharmony_ci 3122bf215546Sopenharmony_ci uint32_t comp_size = glsl_type_is_boolean(type) 3123bf215546Sopenharmony_ci ? 4 : glsl_get_bit_size(type) / 8; 3124bf215546Sopenharmony_ci unsigned length = glsl_get_vector_elements(type); 3125bf215546Sopenharmony_ci *size = comp_size * length, 3126bf215546Sopenharmony_ci *align = comp_size * (length == 3 ? 4 : length); 3127bf215546Sopenharmony_ci} 3128bf215546Sopenharmony_ci 3129bf215546Sopenharmony_cistatic void 3130bf215546Sopenharmony_cilower_cs_shared(struct nir_shader *nir) 3131bf215546Sopenharmony_ci{ 3132bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, 3133bf215546Sopenharmony_ci nir_var_mem_shared, shared_type_info); 3134bf215546Sopenharmony_ci NIR_PASS(_, nir, nir_lower_explicit_io, 3135bf215546Sopenharmony_ci nir_var_mem_shared, nir_address_format_32bit_offset); 3136bf215546Sopenharmony_ci} 3137bf215546Sopenharmony_ci 3138bf215546Sopenharmony_cistatic VkResult 3139bf215546Sopenharmony_cipipeline_compile_compute(struct v3dv_pipeline *pipeline, 3140bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache, 3141bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *info, 3142bf215546Sopenharmony_ci const VkAllocationCallbacks *alloc) 3143bf215546Sopenharmony_ci{ 3144bf215546Sopenharmony_ci VkPipelineCreationFeedback pipeline_feedback = { 3145bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 3146bf215546Sopenharmony_ci }; 3147bf215546Sopenharmony_ci int64_t pipeline_start = os_time_get_nano(); 3148bf215546Sopenharmony_ci 3149bf215546Sopenharmony_ci struct v3dv_device *device = pipeline->device; 3150bf215546Sopenharmony_ci struct v3dv_physical_device *physical_device = 3151bf215546Sopenharmony_ci &device->instance->physicalDevice; 3152bf215546Sopenharmony_ci 3153bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *sinfo = &info->stage; 3154bf215546Sopenharmony_ci gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 3155bf215546Sopenharmony_ci 3156bf215546Sopenharmony_ci struct v3dv_pipeline_stage *p_stage = 3157bf215546Sopenharmony_ci vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8, 3158bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3159bf215546Sopenharmony_ci if (!p_stage) 3160bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 3161bf215546Sopenharmony_ci 3162bf215546Sopenharmony_ci p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); 3163bf215546Sopenharmony_ci p_stage->pipeline = pipeline; 3164bf215546Sopenharmony_ci p_stage->stage = gl_shader_stage_to_broadcom(stage); 3165bf215546Sopenharmony_ci p_stage->entrypoint = sinfo->pName; 3166bf215546Sopenharmony_ci p_stage->module = vk_shader_module_from_handle(sinfo->module); 3167bf215546Sopenharmony_ci p_stage->spec_info = sinfo->pSpecializationInfo; 3168bf215546Sopenharmony_ci p_stage->feedback = (VkPipelineCreationFeedback) { 0 }; 3169bf215546Sopenharmony_ci 3170bf215546Sopenharmony_ci vk_pipeline_hash_shader_stage(&info->stage, p_stage->shader_sha1); 3171bf215546Sopenharmony_ci 3172bf215546Sopenharmony_ci p_stage->nir = NULL; 3173bf215546Sopenharmony_ci 3174bf215546Sopenharmony_ci pipeline->cs = p_stage; 3175bf215546Sopenharmony_ci pipeline->active_stages |= sinfo->stage; 3176bf215546Sopenharmony_ci 3177bf215546Sopenharmony_ci /* First we try to get the variants from the pipeline cache (unless we are 3178bf215546Sopenharmony_ci * required to capture internal representations, since in that case we need 3179bf215546Sopenharmony_ci * compile). 3180bf215546Sopenharmony_ci */ 3181bf215546Sopenharmony_ci bool needs_executable_info = 3182bf215546Sopenharmony_ci info->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; 3183bf215546Sopenharmony_ci if (!needs_executable_info) { 3184bf215546Sopenharmony_ci struct v3dv_pipeline_key pipeline_key; 3185bf215546Sopenharmony_ci pipeline_populate_compute_key(pipeline, &pipeline_key, info); 3186bf215546Sopenharmony_ci pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1); 3187bf215546Sopenharmony_ci 3188bf215546Sopenharmony_ci bool cache_hit = false; 3189bf215546Sopenharmony_ci pipeline->shared_data = 3190bf215546Sopenharmony_ci v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit); 3191bf215546Sopenharmony_ci 3192bf215546Sopenharmony_ci if (pipeline->shared_data != NULL) { 3193bf215546Sopenharmony_ci assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); 3194bf215546Sopenharmony_ci if (cache_hit && cache != &pipeline->device->default_pipeline_cache) 3195bf215546Sopenharmony_ci pipeline_feedback.flags |= 3196bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 3197bf215546Sopenharmony_ci 3198bf215546Sopenharmony_ci goto success; 3199bf215546Sopenharmony_ci } 3200bf215546Sopenharmony_ci } 3201bf215546Sopenharmony_ci 3202bf215546Sopenharmony_ci if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) 3203bf215546Sopenharmony_ci return VK_PIPELINE_COMPILE_REQUIRED; 3204bf215546Sopenharmony_ci 3205bf215546Sopenharmony_ci pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1, 3206bf215546Sopenharmony_ci pipeline, 3207bf215546Sopenharmony_ci false); 3208bf215546Sopenharmony_ci if (!pipeline->shared_data) 3209bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 3210bf215546Sopenharmony_ci 3211bf215546Sopenharmony_ci p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 3212bf215546Sopenharmony_ci 3213bf215546Sopenharmony_ci /* If not found on cache, compile it */ 3214bf215546Sopenharmony_ci p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); 3215bf215546Sopenharmony_ci assert(p_stage->nir); 3216bf215546Sopenharmony_ci 3217bf215546Sopenharmony_ci nir_optimize(p_stage->nir, false); 3218bf215546Sopenharmony_ci pipeline_lower_nir(pipeline, p_stage, pipeline->layout); 3219bf215546Sopenharmony_ci lower_cs_shared(p_stage->nir); 3220bf215546Sopenharmony_ci 3221bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 3222bf215546Sopenharmony_ci 3223bf215546Sopenharmony_ci struct v3d_key key; 3224bf215546Sopenharmony_ci memset(&key, 0, sizeof(key)); 3225bf215546Sopenharmony_ci pipeline_populate_v3d_key(&key, p_stage, 0, 3226bf215546Sopenharmony_ci pipeline->device->features.robustBufferAccess); 3227bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] = 3228bf215546Sopenharmony_ci pipeline_compile_shader_variant(p_stage, &key, sizeof(key), 3229bf215546Sopenharmony_ci alloc, &result); 3230bf215546Sopenharmony_ci 3231bf215546Sopenharmony_ci if (result != VK_SUCCESS) 3232bf215546Sopenharmony_ci return result; 3233bf215546Sopenharmony_ci 3234bf215546Sopenharmony_ci if (!upload_assembly(pipeline)) 3235bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_DEVICE_MEMORY; 3236bf215546Sopenharmony_ci 3237bf215546Sopenharmony_ci v3dv_pipeline_cache_upload_pipeline(pipeline, cache); 3238bf215546Sopenharmony_ci 3239bf215546Sopenharmony_cisuccess: 3240bf215546Sopenharmony_ci 3241bf215546Sopenharmony_ci pipeline_check_buffer_device_address(pipeline); 3242bf215546Sopenharmony_ci 3243bf215546Sopenharmony_ci pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 3244bf215546Sopenharmony_ci write_creation_feedback(pipeline, 3245bf215546Sopenharmony_ci info->pNext, 3246bf215546Sopenharmony_ci &pipeline_feedback, 3247bf215546Sopenharmony_ci 1, 3248bf215546Sopenharmony_ci &info->stage); 3249bf215546Sopenharmony_ci 3250bf215546Sopenharmony_ci /* As we got the variants in pipeline->shared_data, after compiling we 3251bf215546Sopenharmony_ci * don't need the pipeline_stages. 3252bf215546Sopenharmony_ci */ 3253bf215546Sopenharmony_ci if (!needs_executable_info) 3254bf215546Sopenharmony_ci pipeline_free_stages(device, pipeline, alloc); 3255bf215546Sopenharmony_ci 3256bf215546Sopenharmony_ci pipeline_check_spill_size(pipeline); 3257bf215546Sopenharmony_ci 3258bf215546Sopenharmony_ci return VK_SUCCESS; 3259bf215546Sopenharmony_ci} 3260bf215546Sopenharmony_ci 3261bf215546Sopenharmony_cistatic VkResult 3262bf215546Sopenharmony_cicompute_pipeline_init(struct v3dv_pipeline *pipeline, 3263bf215546Sopenharmony_ci struct v3dv_device *device, 3264bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache, 3265bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *info, 3266bf215546Sopenharmony_ci const VkAllocationCallbacks *alloc) 3267bf215546Sopenharmony_ci{ 3268bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout); 3269bf215546Sopenharmony_ci 3270bf215546Sopenharmony_ci pipeline->device = device; 3271bf215546Sopenharmony_ci pipeline->layout = layout; 3272bf215546Sopenharmony_ci 3273bf215546Sopenharmony_ci VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc); 3274bf215546Sopenharmony_ci 3275bf215546Sopenharmony_ci return result; 3276bf215546Sopenharmony_ci} 3277bf215546Sopenharmony_ci 3278bf215546Sopenharmony_cistatic VkResult 3279bf215546Sopenharmony_cicompute_pipeline_create(VkDevice _device, 3280bf215546Sopenharmony_ci VkPipelineCache _cache, 3281bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfo, 3282bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 3283bf215546Sopenharmony_ci VkPipeline *pPipeline) 3284bf215546Sopenharmony_ci{ 3285bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 3286bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 3287bf215546Sopenharmony_ci 3288bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline; 3289bf215546Sopenharmony_ci VkResult result; 3290bf215546Sopenharmony_ci 3291bf215546Sopenharmony_ci /* Use the default pipeline cache if none is specified */ 3292bf215546Sopenharmony_ci if (cache == NULL && device->instance->default_pipeline_cache_enabled) 3293bf215546Sopenharmony_ci cache = &device->default_pipeline_cache; 3294bf215546Sopenharmony_ci 3295bf215546Sopenharmony_ci pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), 3296bf215546Sopenharmony_ci VK_OBJECT_TYPE_PIPELINE); 3297bf215546Sopenharmony_ci if (pipeline == NULL) 3298bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 3299bf215546Sopenharmony_ci 3300bf215546Sopenharmony_ci result = compute_pipeline_init(pipeline, device, cache, 3301bf215546Sopenharmony_ci pCreateInfo, pAllocator); 3302bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 3303bf215546Sopenharmony_ci v3dv_destroy_pipeline(pipeline, device, pAllocator); 3304bf215546Sopenharmony_ci if (result == VK_PIPELINE_COMPILE_REQUIRED) 3305bf215546Sopenharmony_ci *pPipeline = VK_NULL_HANDLE; 3306bf215546Sopenharmony_ci return result; 3307bf215546Sopenharmony_ci } 3308bf215546Sopenharmony_ci 3309bf215546Sopenharmony_ci *pPipeline = v3dv_pipeline_to_handle(pipeline); 3310bf215546Sopenharmony_ci 3311bf215546Sopenharmony_ci return VK_SUCCESS; 3312bf215546Sopenharmony_ci} 3313bf215546Sopenharmony_ci 3314bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 3315bf215546Sopenharmony_civ3dv_CreateComputePipelines(VkDevice _device, 3316bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 3317bf215546Sopenharmony_ci uint32_t createInfoCount, 3318bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfos, 3319bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 3320bf215546Sopenharmony_ci VkPipeline *pPipelines) 3321bf215546Sopenharmony_ci{ 3322bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 3323bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 3324bf215546Sopenharmony_ci 3325bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 3326bf215546Sopenharmony_ci mtx_lock(&device->pdevice->mutex); 3327bf215546Sopenharmony_ci 3328bf215546Sopenharmony_ci uint32_t i = 0; 3329bf215546Sopenharmony_ci for (; i < createInfoCount; i++) { 3330bf215546Sopenharmony_ci VkResult local_result; 3331bf215546Sopenharmony_ci local_result = compute_pipeline_create(_device, 3332bf215546Sopenharmony_ci pipelineCache, 3333bf215546Sopenharmony_ci &pCreateInfos[i], 3334bf215546Sopenharmony_ci pAllocator, 3335bf215546Sopenharmony_ci &pPipelines[i]); 3336bf215546Sopenharmony_ci 3337bf215546Sopenharmony_ci if (local_result != VK_SUCCESS) { 3338bf215546Sopenharmony_ci result = local_result; 3339bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 3340bf215546Sopenharmony_ci 3341bf215546Sopenharmony_ci if (pCreateInfos[i].flags & 3342bf215546Sopenharmony_ci VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 3343bf215546Sopenharmony_ci break; 3344bf215546Sopenharmony_ci } 3345bf215546Sopenharmony_ci } 3346bf215546Sopenharmony_ci 3347bf215546Sopenharmony_ci for (; i < createInfoCount; i++) 3348bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 3349bf215546Sopenharmony_ci 3350bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 3351bf215546Sopenharmony_ci mtx_unlock(&device->pdevice->mutex); 3352bf215546Sopenharmony_ci 3353bf215546Sopenharmony_ci return result; 3354bf215546Sopenharmony_ci} 3355bf215546Sopenharmony_ci 3356bf215546Sopenharmony_cistatic nir_shader * 3357bf215546Sopenharmony_cipipeline_get_nir(struct v3dv_pipeline *pipeline, 3358bf215546Sopenharmony_ci enum broadcom_shader_stage stage) 3359bf215546Sopenharmony_ci{ 3360bf215546Sopenharmony_ci switch (stage) { 3361bf215546Sopenharmony_ci case BROADCOM_SHADER_VERTEX: 3362bf215546Sopenharmony_ci if (pipeline->vs) 3363bf215546Sopenharmony_ci return pipeline->vs->nir; 3364bf215546Sopenharmony_ci break; 3365bf215546Sopenharmony_ci case BROADCOM_SHADER_VERTEX_BIN: 3366bf215546Sopenharmony_ci if(pipeline->vs_bin) 3367bf215546Sopenharmony_ci return pipeline->vs_bin->nir; 3368bf215546Sopenharmony_ci break; 3369bf215546Sopenharmony_ci case BROADCOM_SHADER_GEOMETRY: 3370bf215546Sopenharmony_ci if(pipeline->gs) 3371bf215546Sopenharmony_ci return pipeline->gs->nir; 3372bf215546Sopenharmony_ci break; 3373bf215546Sopenharmony_ci case BROADCOM_SHADER_GEOMETRY_BIN: 3374bf215546Sopenharmony_ci if (pipeline->gs_bin) 3375bf215546Sopenharmony_ci return pipeline->gs_bin->nir; 3376bf215546Sopenharmony_ci break; 3377bf215546Sopenharmony_ci case BROADCOM_SHADER_FRAGMENT: 3378bf215546Sopenharmony_ci if (pipeline->fs) 3379bf215546Sopenharmony_ci return pipeline->fs->nir; 3380bf215546Sopenharmony_ci break; 3381bf215546Sopenharmony_ci case BROADCOM_SHADER_COMPUTE: 3382bf215546Sopenharmony_ci if(pipeline->cs) 3383bf215546Sopenharmony_ci return pipeline->cs->nir; 3384bf215546Sopenharmony_ci break; 3385bf215546Sopenharmony_ci default: 3386bf215546Sopenharmony_ci unreachable("Unsupported shader stage"); 3387bf215546Sopenharmony_ci } 3388bf215546Sopenharmony_ci 3389bf215546Sopenharmony_ci return NULL; 3390bf215546Sopenharmony_ci} 3391bf215546Sopenharmony_ci 3392bf215546Sopenharmony_cistatic struct v3d_prog_data * 3393bf215546Sopenharmony_cipipeline_get_prog_data(struct v3dv_pipeline *pipeline, 3394bf215546Sopenharmony_ci enum broadcom_shader_stage stage) 3395bf215546Sopenharmony_ci{ 3396bf215546Sopenharmony_ci if (pipeline->shared_data->variants[stage]) 3397bf215546Sopenharmony_ci return pipeline->shared_data->variants[stage]->prog_data.base; 3398bf215546Sopenharmony_ci return NULL; 3399bf215546Sopenharmony_ci} 3400bf215546Sopenharmony_ci 3401bf215546Sopenharmony_cistatic uint64_t * 3402bf215546Sopenharmony_cipipeline_get_qpu(struct v3dv_pipeline *pipeline, 3403bf215546Sopenharmony_ci enum broadcom_shader_stage stage, 3404bf215546Sopenharmony_ci uint32_t *qpu_size) 3405bf215546Sopenharmony_ci{ 3406bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = 3407bf215546Sopenharmony_ci pipeline->shared_data->variants[stage]; 3408bf215546Sopenharmony_ci if (!variant) { 3409bf215546Sopenharmony_ci *qpu_size = 0; 3410bf215546Sopenharmony_ci return NULL; 3411bf215546Sopenharmony_ci } 3412bf215546Sopenharmony_ci 3413bf215546Sopenharmony_ci /* We expect the QPU BO to have been mapped before calling here */ 3414bf215546Sopenharmony_ci struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo; 3415bf215546Sopenharmony_ci assert(qpu_bo && qpu_bo->map_size >= variant->assembly_offset + 3416bf215546Sopenharmony_ci variant->qpu_insts_size); 3417bf215546Sopenharmony_ci 3418bf215546Sopenharmony_ci *qpu_size = variant->qpu_insts_size; 3419bf215546Sopenharmony_ci uint64_t *qpu = (uint64_t *) 3420bf215546Sopenharmony_ci (((uint8_t *) qpu_bo->map) + variant->assembly_offset); 3421bf215546Sopenharmony_ci return qpu; 3422bf215546Sopenharmony_ci} 3423bf215546Sopenharmony_ci 3424bf215546Sopenharmony_ci/* FIXME: we use the same macro in various drivers, maybe move it to 3425bf215546Sopenharmony_ci * the comon vk_util.h? 3426bf215546Sopenharmony_ci */ 3427bf215546Sopenharmony_ci#define WRITE_STR(field, ...) ({ \ 3428bf215546Sopenharmony_ci memset(field, 0, sizeof(field)); \ 3429bf215546Sopenharmony_ci UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \ 3430bf215546Sopenharmony_ci assert(_i > 0 && _i < sizeof(field)); \ 3431bf215546Sopenharmony_ci}) 3432bf215546Sopenharmony_ci 3433bf215546Sopenharmony_cistatic bool 3434bf215546Sopenharmony_ciwrite_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, 3435bf215546Sopenharmony_ci const char *data) 3436bf215546Sopenharmony_ci{ 3437bf215546Sopenharmony_ci ir->isText = VK_TRUE; 3438bf215546Sopenharmony_ci 3439bf215546Sopenharmony_ci size_t data_len = strlen(data) + 1; 3440bf215546Sopenharmony_ci 3441bf215546Sopenharmony_ci if (ir->pData == NULL) { 3442bf215546Sopenharmony_ci ir->dataSize = data_len; 3443bf215546Sopenharmony_ci return true; 3444bf215546Sopenharmony_ci } 3445bf215546Sopenharmony_ci 3446bf215546Sopenharmony_ci strncpy(ir->pData, data, ir->dataSize); 3447bf215546Sopenharmony_ci if (ir->dataSize < data_len) 3448bf215546Sopenharmony_ci return false; 3449bf215546Sopenharmony_ci 3450bf215546Sopenharmony_ci ir->dataSize = data_len; 3451bf215546Sopenharmony_ci return true; 3452bf215546Sopenharmony_ci} 3453bf215546Sopenharmony_ci 3454bf215546Sopenharmony_cistatic void 3455bf215546Sopenharmony_ciappend(char **str, size_t *offset, const char *fmt, ...) 3456bf215546Sopenharmony_ci{ 3457bf215546Sopenharmony_ci va_list args; 3458bf215546Sopenharmony_ci va_start(args, fmt); 3459bf215546Sopenharmony_ci ralloc_vasprintf_rewrite_tail(str, offset, fmt, args); 3460bf215546Sopenharmony_ci va_end(args); 3461bf215546Sopenharmony_ci} 3462bf215546Sopenharmony_ci 3463bf215546Sopenharmony_cistatic void 3464bf215546Sopenharmony_cipipeline_collect_executable_data(struct v3dv_pipeline *pipeline) 3465bf215546Sopenharmony_ci{ 3466bf215546Sopenharmony_ci if (pipeline->executables.mem_ctx) 3467bf215546Sopenharmony_ci return; 3468bf215546Sopenharmony_ci 3469bf215546Sopenharmony_ci pipeline->executables.mem_ctx = ralloc_context(NULL); 3470bf215546Sopenharmony_ci util_dynarray_init(&pipeline->executables.data, 3471bf215546Sopenharmony_ci pipeline->executables.mem_ctx); 3472bf215546Sopenharmony_ci 3473bf215546Sopenharmony_ci /* Don't crash for failed/bogus pipelines */ 3474bf215546Sopenharmony_ci if (!pipeline->shared_data || !pipeline->shared_data->assembly_bo) 3475bf215546Sopenharmony_ci return; 3476bf215546Sopenharmony_ci 3477bf215546Sopenharmony_ci /* Map the assembly BO so we can read the pipeline's QPU code */ 3478bf215546Sopenharmony_ci struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo; 3479bf215546Sopenharmony_ci 3480bf215546Sopenharmony_ci if (!v3dv_bo_map(pipeline->device, qpu_bo, qpu_bo->size)) { 3481bf215546Sopenharmony_ci fprintf(stderr, "failed to map QPU buffer\n"); 3482bf215546Sopenharmony_ci return; 3483bf215546Sopenharmony_ci } 3484bf215546Sopenharmony_ci 3485bf215546Sopenharmony_ci for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) { 3486bf215546Sopenharmony_ci VkShaderStageFlags vk_stage = 3487bf215546Sopenharmony_ci mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s)); 3488bf215546Sopenharmony_ci if (!(vk_stage & pipeline->active_stages)) 3489bf215546Sopenharmony_ci continue; 3490bf215546Sopenharmony_ci 3491bf215546Sopenharmony_ci nir_shader *nir = pipeline_get_nir(pipeline, s); 3492bf215546Sopenharmony_ci char *nir_str = nir ? 3493bf215546Sopenharmony_ci nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL; 3494bf215546Sopenharmony_ci 3495bf215546Sopenharmony_ci char *qpu_str = NULL; 3496bf215546Sopenharmony_ci uint32_t qpu_size; 3497bf215546Sopenharmony_ci uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size); 3498bf215546Sopenharmony_ci if (qpu) { 3499bf215546Sopenharmony_ci uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t); 3500bf215546Sopenharmony_ci qpu_str = rzalloc_size(pipeline->executables.mem_ctx, 3501bf215546Sopenharmony_ci qpu_inst_count * 96); 3502bf215546Sopenharmony_ci size_t offset = 0; 3503bf215546Sopenharmony_ci for (int i = 0; i < qpu_inst_count; i++) { 3504bf215546Sopenharmony_ci const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]); 3505bf215546Sopenharmony_ci append(&qpu_str, &offset, "%s\n", str); 3506bf215546Sopenharmony_ci ralloc_free((void *)str); 3507bf215546Sopenharmony_ci } 3508bf215546Sopenharmony_ci } 3509bf215546Sopenharmony_ci 3510bf215546Sopenharmony_ci struct v3dv_pipeline_executable_data data = { 3511bf215546Sopenharmony_ci .stage = s, 3512bf215546Sopenharmony_ci .nir_str = nir_str, 3513bf215546Sopenharmony_ci .qpu_str = qpu_str, 3514bf215546Sopenharmony_ci }; 3515bf215546Sopenharmony_ci util_dynarray_append(&pipeline->executables.data, 3516bf215546Sopenharmony_ci struct v3dv_pipeline_executable_data, data); 3517bf215546Sopenharmony_ci } 3518bf215546Sopenharmony_ci 3519bf215546Sopenharmony_ci v3dv_bo_unmap(pipeline->device, qpu_bo); 3520bf215546Sopenharmony_ci} 3521bf215546Sopenharmony_ci 3522bf215546Sopenharmony_cistatic const struct v3dv_pipeline_executable_data * 3523bf215546Sopenharmony_cipipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index) 3524bf215546Sopenharmony_ci{ 3525bf215546Sopenharmony_ci assert(index < util_dynarray_num_elements(&pipeline->executables.data, 3526bf215546Sopenharmony_ci struct v3dv_pipeline_executable_data)); 3527bf215546Sopenharmony_ci return util_dynarray_element(&pipeline->executables.data, 3528bf215546Sopenharmony_ci struct v3dv_pipeline_executable_data, 3529bf215546Sopenharmony_ci index); 3530bf215546Sopenharmony_ci} 3531bf215546Sopenharmony_ci 3532bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 3533bf215546Sopenharmony_civ3dv_GetPipelineExecutableInternalRepresentationsKHR( 3534bf215546Sopenharmony_ci VkDevice device, 3535bf215546Sopenharmony_ci const VkPipelineExecutableInfoKHR *pExecutableInfo, 3536bf215546Sopenharmony_ci uint32_t *pInternalRepresentationCount, 3537bf215546Sopenharmony_ci VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations) 3538bf215546Sopenharmony_ci{ 3539bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline); 3540bf215546Sopenharmony_ci 3541bf215546Sopenharmony_ci pipeline_collect_executable_data(pipeline); 3542bf215546Sopenharmony_ci 3543bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out, 3544bf215546Sopenharmony_ci pInternalRepresentations, pInternalRepresentationCount); 3545bf215546Sopenharmony_ci 3546bf215546Sopenharmony_ci bool incomplete = false; 3547bf215546Sopenharmony_ci const struct v3dv_pipeline_executable_data *exe = 3548bf215546Sopenharmony_ci pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3549bf215546Sopenharmony_ci 3550bf215546Sopenharmony_ci if (exe->nir_str) { 3551bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, 3552bf215546Sopenharmony_ci &out, ir) { 3553bf215546Sopenharmony_ci WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage)); 3554bf215546Sopenharmony_ci WRITE_STR(ir->description, "Final NIR form"); 3555bf215546Sopenharmony_ci if (!write_ir_text(ir, exe->nir_str)) 3556bf215546Sopenharmony_ci incomplete = true; 3557bf215546Sopenharmony_ci } 3558bf215546Sopenharmony_ci } 3559bf215546Sopenharmony_ci 3560bf215546Sopenharmony_ci if (exe->qpu_str) { 3561bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, 3562bf215546Sopenharmony_ci &out, ir) { 3563bf215546Sopenharmony_ci WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage)); 3564bf215546Sopenharmony_ci WRITE_STR(ir->description, "Final QPU assembly"); 3565bf215546Sopenharmony_ci if (!write_ir_text(ir, exe->qpu_str)) 3566bf215546Sopenharmony_ci incomplete = true; 3567bf215546Sopenharmony_ci } 3568bf215546Sopenharmony_ci } 3569bf215546Sopenharmony_ci 3570bf215546Sopenharmony_ci return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out); 3571bf215546Sopenharmony_ci} 3572bf215546Sopenharmony_ci 3573bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 3574bf215546Sopenharmony_civ3dv_GetPipelineExecutablePropertiesKHR( 3575bf215546Sopenharmony_ci VkDevice device, 3576bf215546Sopenharmony_ci const VkPipelineInfoKHR *pPipelineInfo, 3577bf215546Sopenharmony_ci uint32_t *pExecutableCount, 3578bf215546Sopenharmony_ci VkPipelineExecutablePropertiesKHR *pProperties) 3579bf215546Sopenharmony_ci{ 3580bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline); 3581bf215546Sopenharmony_ci 3582bf215546Sopenharmony_ci pipeline_collect_executable_data(pipeline); 3583bf215546Sopenharmony_ci 3584bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, 3585bf215546Sopenharmony_ci pProperties, pExecutableCount); 3586bf215546Sopenharmony_ci 3587bf215546Sopenharmony_ci util_dynarray_foreach(&pipeline->executables.data, 3588bf215546Sopenharmony_ci struct v3dv_pipeline_executable_data, exe) { 3589bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { 3590bf215546Sopenharmony_ci gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage); 3591bf215546Sopenharmony_ci props->stages = mesa_to_vk_shader_stage(mesa_stage); 3592bf215546Sopenharmony_ci 3593bf215546Sopenharmony_ci WRITE_STR(props->name, "%s (%s)", 3594bf215546Sopenharmony_ci _mesa_shader_stage_to_abbrev(mesa_stage), 3595bf215546Sopenharmony_ci broadcom_shader_stage_is_binning(exe->stage) ? 3596bf215546Sopenharmony_ci "Binning" : "Render"); 3597bf215546Sopenharmony_ci 3598bf215546Sopenharmony_ci WRITE_STR(props->description, "%s", 3599bf215546Sopenharmony_ci _mesa_shader_stage_to_string(mesa_stage)); 3600bf215546Sopenharmony_ci 3601bf215546Sopenharmony_ci props->subgroupSize = V3D_CHANNELS; 3602bf215546Sopenharmony_ci } 3603bf215546Sopenharmony_ci } 3604bf215546Sopenharmony_ci 3605bf215546Sopenharmony_ci return vk_outarray_status(&out); 3606bf215546Sopenharmony_ci} 3607bf215546Sopenharmony_ci 3608bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 3609bf215546Sopenharmony_civ3dv_GetPipelineExecutableStatisticsKHR( 3610bf215546Sopenharmony_ci VkDevice device, 3611bf215546Sopenharmony_ci const VkPipelineExecutableInfoKHR *pExecutableInfo, 3612bf215546Sopenharmony_ci uint32_t *pStatisticCount, 3613bf215546Sopenharmony_ci VkPipelineExecutableStatisticKHR *pStatistics) 3614bf215546Sopenharmony_ci{ 3615bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline); 3616bf215546Sopenharmony_ci 3617bf215546Sopenharmony_ci pipeline_collect_executable_data(pipeline); 3618bf215546Sopenharmony_ci 3619bf215546Sopenharmony_ci const struct v3dv_pipeline_executable_data *exe = 3620bf215546Sopenharmony_ci pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3621bf215546Sopenharmony_ci 3622bf215546Sopenharmony_ci struct v3d_prog_data *prog_data = 3623bf215546Sopenharmony_ci pipeline_get_prog_data(pipeline, exe->stage); 3624bf215546Sopenharmony_ci 3625bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = 3626bf215546Sopenharmony_ci pipeline->shared_data->variants[exe->stage]; 3627bf215546Sopenharmony_ci uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t); 3628bf215546Sopenharmony_ci 3629bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, 3630bf215546Sopenharmony_ci pStatistics, pStatisticCount); 3631bf215546Sopenharmony_ci 3632bf215546Sopenharmony_ci if (qpu_inst_count > 0) { 3633bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3634bf215546Sopenharmony_ci WRITE_STR(stat->name, "Compile Strategy"); 3635bf215546Sopenharmony_ci WRITE_STR(stat->description, "Chosen compile strategy index"); 3636bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3637bf215546Sopenharmony_ci stat->value.u64 = prog_data->compile_strategy_idx; 3638bf215546Sopenharmony_ci } 3639bf215546Sopenharmony_ci 3640bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3641bf215546Sopenharmony_ci WRITE_STR(stat->name, "Instruction Count"); 3642bf215546Sopenharmony_ci WRITE_STR(stat->description, "Number of QPU instructions"); 3643bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3644bf215546Sopenharmony_ci stat->value.u64 = qpu_inst_count; 3645bf215546Sopenharmony_ci } 3646bf215546Sopenharmony_ci 3647bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3648bf215546Sopenharmony_ci WRITE_STR(stat->name, "Thread Count"); 3649bf215546Sopenharmony_ci WRITE_STR(stat->description, "Number of QPU threads dispatched"); 3650bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3651bf215546Sopenharmony_ci stat->value.u64 = prog_data->threads; 3652bf215546Sopenharmony_ci } 3653bf215546Sopenharmony_ci 3654bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3655bf215546Sopenharmony_ci WRITE_STR(stat->name, "Spill Size"); 3656bf215546Sopenharmony_ci WRITE_STR(stat->description, "Size of the spill buffer in bytes"); 3657bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3658bf215546Sopenharmony_ci stat->value.u64 = prog_data->spill_size; 3659bf215546Sopenharmony_ci } 3660bf215546Sopenharmony_ci 3661bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3662bf215546Sopenharmony_ci WRITE_STR(stat->name, "TMU Spills"); 3663bf215546Sopenharmony_ci WRITE_STR(stat->description, "Number of times a register was spilled " 3664bf215546Sopenharmony_ci "to memory"); 3665bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3666bf215546Sopenharmony_ci stat->value.u64 = prog_data->spill_size; 3667bf215546Sopenharmony_ci } 3668bf215546Sopenharmony_ci 3669bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3670bf215546Sopenharmony_ci WRITE_STR(stat->name, "TMU Fills"); 3671bf215546Sopenharmony_ci WRITE_STR(stat->description, "Number of times a register was filled " 3672bf215546Sopenharmony_ci "from memory"); 3673bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3674bf215546Sopenharmony_ci stat->value.u64 = prog_data->spill_size; 3675bf215546Sopenharmony_ci } 3676bf215546Sopenharmony_ci 3677bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3678bf215546Sopenharmony_ci WRITE_STR(stat->name, "QPU Read Stalls"); 3679bf215546Sopenharmony_ci WRITE_STR(stat->description, "Number of cycles the QPU stalls for a " 3680bf215546Sopenharmony_ci "register read dependency"); 3681bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3682bf215546Sopenharmony_ci stat->value.u64 = prog_data->qpu_read_stalls; 3683bf215546Sopenharmony_ci } 3684bf215546Sopenharmony_ci } 3685bf215546Sopenharmony_ci 3686bf215546Sopenharmony_ci return vk_outarray_status(&out); 3687bf215546Sopenharmony_ci} 3688