1/* 2 * Copyright © 2019 Raspberry Pi Ltd 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "vk_util.h" 25 26#include "v3dv_debug.h" 27#include "v3dv_private.h" 28 29#include "common/v3d_debug.h" 30#include "qpu/qpu_disasm.h" 31 32#include "compiler/nir/nir_builder.h" 33#include "nir/nir_serialize.h" 34 35#include "util/u_atomic.h" 36#include "util/u_prim.h" 37#include "util/os_time.h" 38 39#include "vk_pipeline.h" 40#include "vulkan/util/vk_format.h" 41 42static VkResult 43compute_vpm_config(struct v3dv_pipeline *pipeline); 44 45void 46v3dv_print_v3d_key(struct v3d_key *key, 47 uint32_t v3d_key_size) 48{ 49 struct mesa_sha1 ctx; 50 unsigned char sha1[20]; 51 char sha1buf[41]; 52 53 _mesa_sha1_init(&ctx); 54 55 _mesa_sha1_update(&ctx, key, v3d_key_size); 56 57 _mesa_sha1_final(&ctx, sha1); 58 _mesa_sha1_format(sha1buf, sha1); 59 60 fprintf(stderr, "key %p: %s\n", key, sha1buf); 61} 62 63static void 64pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage) 65{ 66 VkPipelineShaderStageCreateInfo info = { 67 .module = vk_shader_module_handle_from_nir(p_stage->nir), 68 .pName = p_stage->entrypoint, 69 .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage), 70 }; 71 72 vk_pipeline_hash_shader_stage(&info, p_stage->shader_sha1); 73} 74 75void 76v3dv_shader_variant_destroy(struct v3dv_device *device, 77 struct v3dv_shader_variant *variant) 78{ 79 /* The assembly BO is shared by all variants in the pipeline, so it can't 80 * be freed here and should be freed with the pipeline 81 */ 82 if (variant->qpu_insts) 83 free(variant->qpu_insts); 84 ralloc_free(variant->prog_data.base); 85 vk_free(&device->vk.alloc, variant); 86} 87 88static void 89destroy_pipeline_stage(struct v3dv_device *device, 90 struct v3dv_pipeline_stage *p_stage, 91 const VkAllocationCallbacks *pAllocator) 92{ 93 if (!p_stage) 94 return; 95 96 ralloc_free(p_stage->nir); 97 vk_free2(&device->vk.alloc, pAllocator, p_stage); 98} 99 100static void 101pipeline_free_stages(struct v3dv_device *device, 102 struct v3dv_pipeline *pipeline, 103 const VkAllocationCallbacks *pAllocator) 104{ 105 assert(pipeline); 106 107 /* FIXME: we can't just use a loop over mesa stage due the bin, would be 108 * good to find an alternative. 109 */ 110 destroy_pipeline_stage(device, pipeline->vs, pAllocator); 111 destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator); 112 destroy_pipeline_stage(device, pipeline->gs, pAllocator); 113 destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator); 114 destroy_pipeline_stage(device, pipeline->fs, pAllocator); 115 destroy_pipeline_stage(device, pipeline->cs, pAllocator); 116 117 pipeline->vs = NULL; 118 pipeline->vs_bin = NULL; 119 pipeline->gs = NULL; 120 pipeline->gs_bin = NULL; 121 pipeline->fs = NULL; 122 pipeline->cs = NULL; 123} 124 125static void 126v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline, 127 struct v3dv_device *device, 128 const VkAllocationCallbacks *pAllocator) 129{ 130 if (!pipeline) 131 return; 132 133 pipeline_free_stages(device, pipeline, pAllocator); 134 135 if (pipeline->shared_data) { 136 v3dv_pipeline_shared_data_unref(device, pipeline->shared_data); 137 pipeline->shared_data = NULL; 138 } 139 140 if (pipeline->spill.bo) { 141 assert(pipeline->spill.size_per_thread > 0); 142 v3dv_bo_free(device, pipeline->spill.bo); 143 } 144 145 if (pipeline->default_attribute_values) { 146 v3dv_bo_free(device, pipeline->default_attribute_values); 147 pipeline->default_attribute_values = NULL; 148 } 149 150 if (pipeline->executables.mem_ctx) 151 ralloc_free(pipeline->executables.mem_ctx); 152 153 vk_object_free(&device->vk, pAllocator, pipeline); 154} 155 156VKAPI_ATTR void VKAPI_CALL 157v3dv_DestroyPipeline(VkDevice _device, 158 VkPipeline _pipeline, 159 const VkAllocationCallbacks *pAllocator) 160{ 161 V3DV_FROM_HANDLE(v3dv_device, device, _device); 162 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline); 163 164 if (!pipeline) 165 return; 166 167 v3dv_destroy_pipeline(pipeline, device, pAllocator); 168} 169 170static const struct spirv_to_nir_options default_spirv_options = { 171 .caps = { 172 .device_group = true, 173 .float_controls = true, 174 .multiview = true, 175 .storage_8bit = true, 176 .storage_16bit = true, 177 .subgroup_basic = true, 178 .variable_pointers = true, 179 .vk_memory_model = true, 180 .vk_memory_model_device_scope = true, 181 .physical_storage_buffer_address = true, 182 }, 183 .ubo_addr_format = nir_address_format_32bit_index_offset, 184 .ssbo_addr_format = nir_address_format_32bit_index_offset, 185 .phys_ssbo_addr_format = nir_address_format_2x32bit_global, 186 .push_const_addr_format = nir_address_format_logical, 187 .shared_addr_format = nir_address_format_32bit_offset, 188}; 189 190const nir_shader_compiler_options v3dv_nir_options = { 191 .lower_uadd_sat = true, 192 .lower_usub_sat = true, 193 .lower_iadd_sat = true, 194 .lower_all_io_to_temps = true, 195 .lower_extract_byte = true, 196 .lower_extract_word = true, 197 .lower_insert_byte = true, 198 .lower_insert_word = true, 199 .lower_bitfield_insert_to_shifts = true, 200 .lower_bitfield_extract_to_shifts = true, 201 .lower_bitfield_reverse = true, 202 .lower_bit_count = true, 203 .lower_cs_local_id_to_index = true, 204 .lower_ffract = true, 205 .lower_fmod = true, 206 .lower_pack_unorm_2x16 = true, 207 .lower_pack_snorm_2x16 = true, 208 .lower_unpack_unorm_2x16 = true, 209 .lower_unpack_snorm_2x16 = true, 210 .lower_pack_unorm_4x8 = true, 211 .lower_pack_snorm_4x8 = true, 212 .lower_unpack_unorm_4x8 = true, 213 .lower_unpack_snorm_4x8 = true, 214 .lower_pack_half_2x16 = true, 215 .lower_unpack_half_2x16 = true, 216 .lower_pack_32_2x16 = true, 217 .lower_pack_32_2x16_split = true, 218 .lower_unpack_32_2x16_split = true, 219 .lower_mul_2x32_64 = true, 220 .lower_fdiv = true, 221 .lower_find_lsb = true, 222 .lower_ffma16 = true, 223 .lower_ffma32 = true, 224 .lower_ffma64 = true, 225 .lower_flrp32 = true, 226 .lower_fpow = true, 227 .lower_fsat = true, 228 .lower_fsqrt = true, 229 .lower_ifind_msb = true, 230 .lower_isign = true, 231 .lower_ldexp = true, 232 .lower_mul_high = true, 233 .lower_wpos_pntc = true, 234 .lower_rotate = true, 235 .lower_to_scalar = true, 236 .lower_device_index_to_zero = true, 237 .has_fsub = true, 238 .has_isub = true, 239 .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic 240 * needs to be supported */ 241 .lower_interpolate_at = true, 242 .max_unroll_iterations = 16, 243 .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp), 244 .divergence_analysis_options = 245 nir_divergence_multiple_workgroup_per_compute_subgroup 246}; 247 248const nir_shader_compiler_options * 249v3dv_pipeline_get_nir_options(void) 250{ 251 return &v3dv_nir_options; 252} 253 254#define OPT(pass, ...) ({ \ 255 bool this_progress = false; \ 256 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 257 if (this_progress) \ 258 progress = true; \ 259 this_progress; \ 260}) 261 262static void 263nir_optimize(nir_shader *nir, bool allow_copies) 264{ 265 bool progress; 266 267 do { 268 progress = false; 269 OPT(nir_split_array_vars, nir_var_function_temp); 270 OPT(nir_shrink_vec_array_vars, nir_var_function_temp); 271 OPT(nir_opt_deref); 272 OPT(nir_lower_vars_to_ssa); 273 if (allow_copies) { 274 /* Only run this pass in the first call to nir_optimize. Later calls 275 * assume that we've lowered away any copy_deref instructions and we 276 * don't want to introduce any more. 277 */ 278 OPT(nir_opt_find_array_copies); 279 } 280 281 OPT(nir_remove_dead_variables, 282 (nir_variable_mode)(nir_var_function_temp | 283 nir_var_shader_temp | 284 nir_var_mem_shared), 285 NULL); 286 287 OPT(nir_opt_copy_prop_vars); 288 OPT(nir_opt_dead_write_vars); 289 OPT(nir_opt_combine_stores, nir_var_all); 290 291 OPT(nir_lower_alu_to_scalar, NULL, NULL); 292 293 OPT(nir_copy_prop); 294 OPT(nir_lower_phis_to_scalar, false); 295 296 OPT(nir_copy_prop); 297 OPT(nir_opt_dce); 298 OPT(nir_opt_cse); 299 OPT(nir_opt_combine_stores, nir_var_all); 300 301 /* Passing 0 to the peephole select pass causes it to convert 302 * if-statements that contain only move instructions in the branches 303 * regardless of the count. 304 * 305 * Passing 1 to the peephole select pass causes it to convert 306 * if-statements that contain at most a single ALU instruction (total) 307 * in both branches. 308 */ 309 OPT(nir_opt_peephole_select, 0, false, false); 310 OPT(nir_opt_peephole_select, 8, false, true); 311 312 OPT(nir_opt_intrinsics); 313 OPT(nir_opt_idiv_const, 32); 314 OPT(nir_opt_algebraic); 315 OPT(nir_lower_alu); 316 OPT(nir_opt_constant_folding); 317 318 OPT(nir_opt_dead_cf); 319 if (nir_opt_trivial_continues(nir)) { 320 progress = true; 321 OPT(nir_copy_prop); 322 OPT(nir_opt_dce); 323 } 324 OPT(nir_opt_conditional_discard); 325 326 OPT(nir_opt_remove_phis); 327 OPT(nir_opt_gcm, false); 328 OPT(nir_opt_if, nir_opt_if_optimize_phi_true_false); 329 OPT(nir_opt_undef); 330 OPT(nir_lower_pack); 331 332 /* There are two optimizations that we don't do here, and we rely on the 333 * backend: 334 * 335 * nir_lower_flrp only needs to be called once, as nothing should 336 * rematerialize any flrps. As we are already calling it on the backend 337 * compiler, we don't call it again. 338 * 339 * nir_opt_loop_unroll: as the backend includes custom strategies in 340 * order to get the lowest spill/fills possible, and some of them 341 * include disable loop unrolling. 342 * 343 * FIXME: ideally we would like to just remove this method and 344 * v3d_optimize_nir. But: 345 * 346 * * Using it leads to some regressions on Vulkan CTS tests, due to 347 * some lowering use there 348 * * We would need to move to the backend some additional 349 * lowerings/optimizations that are used on the Vulkan 350 * frontend. That would require to check that we are not getting any 351 * regression or performance drop on OpenGL 352 * 353 * For now we would keep this Vulkan fronted nir_optimize 354 */ 355 356 } while (progress); 357} 358 359static void 360preprocess_nir(nir_shader *nir) 361{ 362 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { 363 .frag_coord = true, 364 .point_coord = true, 365 }; 366 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); 367 368 /* Vulkan uses the separate-shader linking model */ 369 nir->info.separate_shader = true; 370 371 /* Make sure we lower variable initializers on output variables so that 372 * nir_remove_dead_variables below sees the corresponding stores 373 */ 374 NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out); 375 376 if (nir->info.stage == MESA_SHADER_FRAGMENT) 377 NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out); 378 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 379 NIR_PASS(_, nir, nir_lower_input_attachments, 380 &(nir_input_attachment_options) { 381 .use_fragcoord_sysval = false, 382 }); 383 } 384 385 NIR_PASS_V(nir, nir_lower_io_to_temporaries, 386 nir_shader_get_entrypoint(nir), true, false); 387 388 NIR_PASS(_, nir, nir_lower_system_values); 389 390 NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL); 391 392 NIR_PASS(_, nir, nir_normalize_cubemap_coords); 393 394 NIR_PASS(_, nir, nir_lower_global_vars_to_local); 395 396 NIR_PASS(_, nir, nir_split_var_copies); 397 NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp); 398 399 nir_optimize(nir, true); 400 401 NIR_PASS(_, nir, nir_lower_explicit_io, 402 nir_var_mem_push_const, 403 nir_address_format_32bit_offset); 404 405 NIR_PASS(_, nir, nir_lower_explicit_io, 406 nir_var_mem_ubo | nir_var_mem_ssbo, 407 nir_address_format_32bit_index_offset); 408 409 NIR_PASS(_, nir, nir_lower_explicit_io, 410 nir_var_mem_global, 411 nir_address_format_2x32bit_global); 412 413 NIR_PASS(_, nir, nir_lower_load_const_to_scalar); 414 415 /* Lower a bunch of stuff */ 416 NIR_PASS(_, nir, nir_lower_var_copies); 417 418 NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX); 419 420 NIR_PASS(_, nir, nir_lower_indirect_derefs, 421 nir_var_function_temp, 2); 422 423 NIR_PASS(_, nir, nir_lower_array_deref_of_vec, 424 nir_var_mem_ubo | nir_var_mem_ssbo, 425 nir_lower_direct_array_deref_of_vec_load); 426 427 NIR_PASS(_, nir, nir_lower_frexp); 428 429 /* Get rid of split copies */ 430 nir_optimize(nir, false); 431} 432 433static nir_shader * 434shader_module_compile_to_nir(struct v3dv_device *device, 435 struct v3dv_pipeline_stage *stage) 436{ 437 nir_shader *nir; 438 const nir_shader_compiler_options *nir_options = &v3dv_nir_options; 439 440 441 if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) && stage->module->nir == NULL) 442 v3dv_print_spirv(stage->module->data, stage->module->size, stderr); 443 444 /* vk_shader_module_to_nir also handles internal shaders, when module->nir 445 * != NULL. It also calls nir_validate_shader on both cases, so we don't 446 * call it again here. 447 */ 448 VkResult result = vk_shader_module_to_nir(&device->vk, stage->module, 449 broadcom_shader_stage_to_gl(stage->stage), 450 stage->entrypoint, 451 stage->spec_info, 452 &default_spirv_options, 453 nir_options, 454 NULL, &nir); 455 if (result != VK_SUCCESS) 456 return NULL; 457 assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage)); 458 459 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERDB) && stage->module->nir == NULL) { 460 char sha1buf[41]; 461 _mesa_sha1_format(sha1buf, stage->pipeline->sha1); 462 nir->info.name = ralloc_strdup(nir, sha1buf); 463 } 464 465 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 466 v3d_debug_flag_for_shader_stage( 467 broadcom_shader_stage_to_gl(stage->stage))))) { 468 fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n", 469 broadcom_shader_stage_name(stage->stage), 470 stage->program_id); 471 nir_print_shader(nir, stderr); 472 fprintf(stderr, "\n"); 473 } 474 475 preprocess_nir(nir); 476 477 return nir; 478} 479 480static int 481type_size_vec4(const struct glsl_type *type, bool bindless) 482{ 483 return glsl_count_attribute_slots(type, false); 484} 485 486/* FIXME: the number of parameters for this method is somewhat big. Perhaps 487 * rethink. 488 */ 489static unsigned 490descriptor_map_add(struct v3dv_descriptor_map *map, 491 int set, 492 int binding, 493 int array_index, 494 int array_size, 495 int start_index, 496 uint8_t return_size) 497{ 498 assert(array_index < array_size); 499 assert(return_size == 16 || return_size == 32); 500 501 unsigned index = start_index; 502 for (; index < map->num_desc; index++) { 503 if (map->used[index] && 504 set == map->set[index] && 505 binding == map->binding[index] && 506 array_index == map->array_index[index]) { 507 assert(array_size == map->array_size[index]); 508 if (return_size != map->return_size[index]) { 509 /* It the return_size is different it means that the same sampler 510 * was used for operations with different precision 511 * requirement. In this case we need to ensure that we use the 512 * larger one. 513 */ 514 map->return_size[index] = 32; 515 } 516 return index; 517 } else if (!map->used[index]) { 518 break; 519 } 520 } 521 522 assert(index < DESCRIPTOR_MAP_SIZE); 523 assert(!map->used[index]); 524 525 map->used[index] = true; 526 map->set[index] = set; 527 map->binding[index] = binding; 528 map->array_index[index] = array_index; 529 map->array_size[index] = array_size; 530 map->return_size[index] = return_size; 531 map->num_desc = MAX2(map->num_desc, index + 1); 532 533 return index; 534} 535 536struct lower_pipeline_layout_state { 537 struct v3dv_pipeline *pipeline; 538 const struct v3dv_pipeline_layout *layout; 539 bool needs_default_sampler_state; 540}; 541 542 543static void 544lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr, 545 struct lower_pipeline_layout_state *state) 546{ 547 assert(instr->intrinsic == nir_intrinsic_load_push_constant); 548 instr->intrinsic = nir_intrinsic_load_uniform; 549} 550 551static struct v3dv_descriptor_map* 552pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline, 553 VkDescriptorType desc_type, 554 gl_shader_stage gl_stage, 555 bool is_sampler) 556{ 557 enum broadcom_shader_stage broadcom_stage = 558 gl_shader_stage_to_broadcom(gl_stage); 559 560 assert(pipeline->shared_data && 561 pipeline->shared_data->maps[broadcom_stage]); 562 563 switch(desc_type) { 564 case VK_DESCRIPTOR_TYPE_SAMPLER: 565 return &pipeline->shared_data->maps[broadcom_stage]->sampler_map; 566 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 567 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 568 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 569 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 570 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 571 return &pipeline->shared_data->maps[broadcom_stage]->texture_map; 572 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 573 return is_sampler ? 574 &pipeline->shared_data->maps[broadcom_stage]->sampler_map : 575 &pipeline->shared_data->maps[broadcom_stage]->texture_map; 576 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 577 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 578 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: 579 return &pipeline->shared_data->maps[broadcom_stage]->ubo_map; 580 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 581 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 582 return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map; 583 default: 584 unreachable("Descriptor type unknown or not having a descriptor map"); 585 } 586} 587 588/* Gathers info from the intrinsic (set and binding) and then lowers it so it 589 * could be used by the v3d_compiler */ 590static void 591lower_vulkan_resource_index(nir_builder *b, 592 nir_intrinsic_instr *instr, 593 struct lower_pipeline_layout_state *state) 594{ 595 assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index); 596 597 nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); 598 599 unsigned set = nir_intrinsic_desc_set(instr); 600 unsigned binding = nir_intrinsic_binding(instr); 601 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; 602 struct v3dv_descriptor_set_binding_layout *binding_layout = 603 &set_layout->binding[binding]; 604 unsigned index = 0; 605 606 switch (binding_layout->type) { 607 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 608 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 609 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 610 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 611 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: { 612 struct v3dv_descriptor_map *descriptor_map = 613 pipeline_get_descriptor_map(state->pipeline, binding_layout->type, 614 b->shader->info.stage, false); 615 616 if (!const_val) 617 unreachable("non-constant vulkan_resource_index array index"); 618 619 /* At compile-time we will need to know if we are processing a UBO load 620 * for an inline or a regular UBO so we can handle inline loads like 621 * push constants. At the level of NIR level however, the inline 622 * information is gone, so we rely on the index to make this distinction. 623 * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for 624 * inline buffers. This means that at the descriptor map level 625 * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1, 626 * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS. 627 */ 628 uint32_t start_index = 0; 629 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || 630 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { 631 start_index = MAX_INLINE_UNIFORM_BUFFERS; 632 } 633 634 index = descriptor_map_add(descriptor_map, set, binding, 635 const_val->u32, 636 binding_layout->array_size, 637 start_index, 638 32 /* return_size: doesn't really apply for this case */); 639 640 /* We always reserve index 0 for push constants */ 641 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || 642 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || 643 binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { 644 index++; 645 } 646 647 break; 648 } 649 650 default: 651 unreachable("unsupported descriptor type for vulkan_resource_index"); 652 break; 653 } 654 655 /* Since we use the deref pass, both vulkan_resource_index and 656 * vulkan_load_descriptor return a vec2 providing an index and 657 * offset. Our backend compiler only cares about the index part. 658 */ 659 nir_ssa_def_rewrite_uses(&instr->dest.ssa, 660 nir_imm_ivec2(b, index, 0)); 661 nir_instr_remove(&instr->instr); 662} 663 664/* Returns return_size, so it could be used for the case of not having a 665 * sampler object 666 */ 667static uint8_t 668lower_tex_src_to_offset(nir_builder *b, 669 nir_tex_instr *instr, 670 unsigned src_idx, 671 struct lower_pipeline_layout_state *state) 672{ 673 nir_ssa_def *index = NULL; 674 unsigned base_index = 0; 675 unsigned array_elements = 1; 676 nir_tex_src *src = &instr->src[src_idx]; 677 bool is_sampler = src->src_type == nir_tex_src_sampler_deref; 678 679 /* We compute first the offsets */ 680 nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr); 681 while (deref->deref_type != nir_deref_type_var) { 682 assert(deref->parent.is_ssa); 683 nir_deref_instr *parent = 684 nir_instr_as_deref(deref->parent.ssa->parent_instr); 685 686 assert(deref->deref_type == nir_deref_type_array); 687 688 if (nir_src_is_const(deref->arr.index) && index == NULL) { 689 /* We're still building a direct index */ 690 base_index += nir_src_as_uint(deref->arr.index) * array_elements; 691 } else { 692 if (index == NULL) { 693 /* We used to be direct but not anymore */ 694 index = nir_imm_int(b, base_index); 695 base_index = 0; 696 } 697 698 index = nir_iadd(b, index, 699 nir_imul(b, nir_imm_int(b, array_elements), 700 nir_ssa_for_src(b, deref->arr.index, 1))); 701 } 702 703 array_elements *= glsl_get_length(parent->type); 704 705 deref = parent; 706 } 707 708 if (index) 709 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); 710 711 /* We have the offsets, we apply them, rewriting the source or removing 712 * instr if needed 713 */ 714 if (index) { 715 nir_instr_rewrite_src(&instr->instr, &src->src, 716 nir_src_for_ssa(index)); 717 718 src->src_type = is_sampler ? 719 nir_tex_src_sampler_offset : 720 nir_tex_src_texture_offset; 721 } else { 722 nir_tex_instr_remove_src(instr, src_idx); 723 } 724 725 uint32_t set = deref->var->data.descriptor_set; 726 uint32_t binding = deref->var->data.binding; 727 /* FIXME: this is a really simplified check for the precision to be used 728 * for the sampling. Right now we are ony checking for the variables used 729 * on the operation itself, but there are other cases that we could use to 730 * infer the precision requirement. 731 */ 732 bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM || 733 deref->var->data.precision == GLSL_PRECISION_LOW; 734 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; 735 struct v3dv_descriptor_set_binding_layout *binding_layout = 736 &set_layout->binding[binding]; 737 738 /* For input attachments, the shader includes the attachment_idx. As we are 739 * treating them as a texture, we only want the base_index 740 */ 741 uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ? 742 deref->var->data.index + base_index : 743 base_index; 744 745 uint8_t return_size; 746 if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT)) 747 return_size = 16; 748 else if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT)) 749 return_size = 32; 750 else 751 return_size = relaxed_precision || instr->is_shadow ? 16 : 32; 752 753 struct v3dv_descriptor_map *map = 754 pipeline_get_descriptor_map(state->pipeline, binding_layout->type, 755 b->shader->info.stage, is_sampler); 756 int desc_index = 757 descriptor_map_add(map, 758 deref->var->data.descriptor_set, 759 deref->var->data.binding, 760 array_index, 761 binding_layout->array_size, 762 0, 763 return_size); 764 765 if (is_sampler) 766 instr->sampler_index = desc_index; 767 else 768 instr->texture_index = desc_index; 769 770 return return_size; 771} 772 773static bool 774lower_sampler(nir_builder *b, 775 nir_tex_instr *instr, 776 struct lower_pipeline_layout_state *state) 777{ 778 uint8_t return_size = 0; 779 780 int texture_idx = 781 nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); 782 783 if (texture_idx >= 0) 784 return_size = lower_tex_src_to_offset(b, instr, texture_idx, state); 785 786 int sampler_idx = 787 nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); 788 789 if (sampler_idx >= 0) 790 lower_tex_src_to_offset(b, instr, sampler_idx, state); 791 792 if (texture_idx < 0 && sampler_idx < 0) 793 return false; 794 795 /* If we don't have a sampler, we assign it the idx we reserve for this 796 * case, and we ensure that it is using the correct return size. 797 */ 798 if (sampler_idx < 0) { 799 state->needs_default_sampler_state = true; 800 instr->sampler_index = return_size == 16 ? 801 V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX; 802 } 803 804 return true; 805} 806 807/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */ 808static void 809lower_image_deref(nir_builder *b, 810 nir_intrinsic_instr *instr, 811 struct lower_pipeline_layout_state *state) 812{ 813 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); 814 nir_ssa_def *index = NULL; 815 unsigned array_elements = 1; 816 unsigned base_index = 0; 817 818 while (deref->deref_type != nir_deref_type_var) { 819 assert(deref->parent.is_ssa); 820 nir_deref_instr *parent = 821 nir_instr_as_deref(deref->parent.ssa->parent_instr); 822 823 assert(deref->deref_type == nir_deref_type_array); 824 825 if (nir_src_is_const(deref->arr.index) && index == NULL) { 826 /* We're still building a direct index */ 827 base_index += nir_src_as_uint(deref->arr.index) * array_elements; 828 } else { 829 if (index == NULL) { 830 /* We used to be direct but not anymore */ 831 index = nir_imm_int(b, base_index); 832 base_index = 0; 833 } 834 835 index = nir_iadd(b, index, 836 nir_imul(b, nir_imm_int(b, array_elements), 837 nir_ssa_for_src(b, deref->arr.index, 1))); 838 } 839 840 array_elements *= glsl_get_length(parent->type); 841 842 deref = parent; 843 } 844 845 if (index) 846 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); 847 848 uint32_t set = deref->var->data.descriptor_set; 849 uint32_t binding = deref->var->data.binding; 850 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; 851 struct v3dv_descriptor_set_binding_layout *binding_layout = 852 &set_layout->binding[binding]; 853 854 uint32_t array_index = deref->var->data.index + base_index; 855 856 assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || 857 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); 858 859 struct v3dv_descriptor_map *map = 860 pipeline_get_descriptor_map(state->pipeline, binding_layout->type, 861 b->shader->info.stage, false); 862 863 int desc_index = 864 descriptor_map_add(map, 865 deref->var->data.descriptor_set, 866 deref->var->data.binding, 867 array_index, 868 binding_layout->array_size, 869 0, 870 32 /* return_size: doesn't apply for textures */); 871 872 /* Note: we don't need to do anything here in relation to the precision and 873 * the output size because for images we can infer that info from the image 874 * intrinsic, that includes the image format (see 875 * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler. 876 */ 877 878 index = nir_imm_int(b, desc_index); 879 880 nir_rewrite_image_intrinsic(instr, index, false); 881} 882 883static bool 884lower_intrinsic(nir_builder *b, 885 nir_intrinsic_instr *instr, 886 struct lower_pipeline_layout_state *state) 887{ 888 switch (instr->intrinsic) { 889 case nir_intrinsic_load_push_constant: 890 lower_load_push_constant(b, instr, state); 891 return true; 892 893 case nir_intrinsic_vulkan_resource_index: 894 lower_vulkan_resource_index(b, instr, state); 895 return true; 896 897 case nir_intrinsic_load_vulkan_descriptor: { 898 /* Loading the descriptor happens as part of load/store instructions, 899 * so for us this is a no-op. 900 */ 901 nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa); 902 nir_instr_remove(&instr->instr); 903 return true; 904 } 905 906 case nir_intrinsic_image_deref_load: 907 case nir_intrinsic_image_deref_store: 908 case nir_intrinsic_image_deref_atomic_add: 909 case nir_intrinsic_image_deref_atomic_imin: 910 case nir_intrinsic_image_deref_atomic_umin: 911 case nir_intrinsic_image_deref_atomic_imax: 912 case nir_intrinsic_image_deref_atomic_umax: 913 case nir_intrinsic_image_deref_atomic_and: 914 case nir_intrinsic_image_deref_atomic_or: 915 case nir_intrinsic_image_deref_atomic_xor: 916 case nir_intrinsic_image_deref_atomic_exchange: 917 case nir_intrinsic_image_deref_atomic_comp_swap: 918 case nir_intrinsic_image_deref_size: 919 case nir_intrinsic_image_deref_samples: 920 lower_image_deref(b, instr, state); 921 return true; 922 923 default: 924 return false; 925 } 926} 927 928static bool 929lower_pipeline_layout_cb(nir_builder *b, 930 nir_instr *instr, 931 void *_state) 932{ 933 bool progress = false; 934 struct lower_pipeline_layout_state *state = _state; 935 936 b->cursor = nir_before_instr(instr); 937 switch (instr->type) { 938 case nir_instr_type_tex: 939 progress |= lower_sampler(b, nir_instr_as_tex(instr), state); 940 break; 941 case nir_instr_type_intrinsic: 942 progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state); 943 break; 944 default: 945 break; 946 } 947 948 return progress; 949} 950 951static bool 952lower_pipeline_layout_info(nir_shader *shader, 953 struct v3dv_pipeline *pipeline, 954 const struct v3dv_pipeline_layout *layout, 955 bool *needs_default_sampler_state) 956{ 957 bool progress = false; 958 959 struct lower_pipeline_layout_state state = { 960 .pipeline = pipeline, 961 .layout = layout, 962 .needs_default_sampler_state = false, 963 }; 964 965 progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb, 966 nir_metadata_block_index | 967 nir_metadata_dominance, 968 &state); 969 970 *needs_default_sampler_state = state.needs_default_sampler_state; 971 972 return progress; 973} 974 975 976static void 977lower_fs_io(nir_shader *nir) 978{ 979 /* Our backend doesn't handle array fragment shader outputs */ 980 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 981 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL); 982 983 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 984 MESA_SHADER_FRAGMENT); 985 986 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 987 MESA_SHADER_FRAGMENT); 988 989 NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 990 type_size_vec4, 0); 991} 992 993static void 994lower_gs_io(struct nir_shader *nir) 995{ 996 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 997 998 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 999 MESA_SHADER_GEOMETRY); 1000 1001 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 1002 MESA_SHADER_GEOMETRY); 1003} 1004 1005static void 1006lower_vs_io(struct nir_shader *nir) 1007{ 1008 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 1009 1010 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, 1011 MESA_SHADER_VERTEX); 1012 1013 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 1014 MESA_SHADER_VERTEX); 1015 1016 /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it 1017 * overlaps with v3d_nir_lower_io. Need further research though. 1018 */ 1019} 1020 1021static void 1022shader_debug_output(const char *message, void *data) 1023{ 1024 /* FIXME: We probably don't want to debug anything extra here, and in fact 1025 * the compiler is not using this callback too much, only as an alternative 1026 * way to debug out the shaderdb stats, that you can already get using 1027 * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d 1028 * compiler to remove that callback. 1029 */ 1030} 1031 1032static void 1033pipeline_populate_v3d_key(struct v3d_key *key, 1034 const struct v3dv_pipeline_stage *p_stage, 1035 uint32_t ucp_enables, 1036 bool robust_buffer_access) 1037{ 1038 assert(p_stage->pipeline->shared_data && 1039 p_stage->pipeline->shared_data->maps[p_stage->stage]); 1040 1041 /* The following values are default values used at pipeline create. We use 1042 * there 32 bit as default return size. 1043 */ 1044 struct v3dv_descriptor_map *sampler_map = 1045 &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map; 1046 struct v3dv_descriptor_map *texture_map = 1047 &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map; 1048 1049 key->num_tex_used = texture_map->num_desc; 1050 assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS); 1051 for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) { 1052 key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X; 1053 key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y; 1054 key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z; 1055 key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W; 1056 } 1057 1058 key->num_samplers_used = sampler_map->num_desc; 1059 assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS); 1060 for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc; 1061 sampler_idx++) { 1062 key->sampler[sampler_idx].return_size = 1063 sampler_map->return_size[sampler_idx]; 1064 1065 key->sampler[sampler_idx].return_channels = 1066 key->sampler[sampler_idx].return_size == 32 ? 4 : 2; 1067 } 1068 1069 switch (p_stage->stage) { 1070 case BROADCOM_SHADER_VERTEX: 1071 case BROADCOM_SHADER_VERTEX_BIN: 1072 key->is_last_geometry_stage = p_stage->pipeline->gs == NULL; 1073 break; 1074 case BROADCOM_SHADER_GEOMETRY: 1075 case BROADCOM_SHADER_GEOMETRY_BIN: 1076 /* FIXME: while we don't implement tessellation shaders */ 1077 key->is_last_geometry_stage = true; 1078 break; 1079 case BROADCOM_SHADER_FRAGMENT: 1080 case BROADCOM_SHADER_COMPUTE: 1081 key->is_last_geometry_stage = false; 1082 break; 1083 default: 1084 unreachable("unsupported shader stage"); 1085 } 1086 1087 /* Vulkan doesn't have fixed function state for user clip planes. Instead, 1088 * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler 1089 * takes care of adding a single compact array variable at 1090 * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering. 1091 * 1092 * The only lowering we are interested is specific to the fragment shader, 1093 * where we want to emit discards to honor writes to gl_ClipDistance[] in 1094 * previous stages. This is done via nir_lower_clip_fs() so we only set up 1095 * the ucp enable mask for that stage. 1096 */ 1097 key->ucp_enables = ucp_enables; 1098 1099 key->robust_buffer_access = robust_buffer_access; 1100 1101 key->environment = V3D_ENVIRONMENT_VULKAN; 1102} 1103 1104/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the 1105 * same. For not using prim_mode that is the one already used on v3d 1106 */ 1107static const enum pipe_prim_type vk_to_pipe_prim_type[] = { 1108 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS, 1109 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES, 1110 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP, 1111 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES, 1112 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP, 1113 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN, 1114 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY, 1115 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY, 1116 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY, 1117 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY, 1118}; 1119 1120static const enum pipe_logicop vk_to_pipe_logicop[] = { 1121 [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR, 1122 [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND, 1123 [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE, 1124 [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY, 1125 [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED, 1126 [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP, 1127 [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR, 1128 [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR, 1129 [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR, 1130 [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV, 1131 [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT, 1132 [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE, 1133 [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED, 1134 [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED, 1135 [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND, 1136 [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET, 1137}; 1138 1139static void 1140pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, 1141 const VkGraphicsPipelineCreateInfo *pCreateInfo, 1142 const struct v3dv_pipeline_stage *p_stage, 1143 bool has_geometry_shader, 1144 uint32_t ucp_enables) 1145{ 1146 assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT); 1147 1148 memset(key, 0, sizeof(*key)); 1149 1150 const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 1151 pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba); 1152 1153 const VkPipelineInputAssemblyStateCreateInfo *ia_info = 1154 pCreateInfo->pInputAssemblyState; 1155 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; 1156 1157 key->is_points = (topology == PIPE_PRIM_POINTS); 1158 key->is_lines = (topology >= PIPE_PRIM_LINES && 1159 topology <= PIPE_PRIM_LINE_STRIP); 1160 key->has_gs = has_geometry_shader; 1161 1162 const VkPipelineColorBlendStateCreateInfo *cb_info = 1163 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? 1164 pCreateInfo->pColorBlendState : NULL; 1165 1166 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? 1167 vk_to_pipe_logicop[cb_info->logicOp] : 1168 PIPE_LOGICOP_COPY; 1169 1170 const bool raster_enabled = 1171 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 1172 1173 /* Multisample rasterization state must be ignored if rasterization 1174 * is disabled. 1175 */ 1176 const VkPipelineMultisampleStateCreateInfo *ms_info = 1177 raster_enabled ? pCreateInfo->pMultisampleState : NULL; 1178 if (ms_info) { 1179 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || 1180 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); 1181 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; 1182 1183 if (key->msaa) { 1184 key->sample_coverage = 1185 p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1; 1186 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; 1187 key->sample_alpha_to_one = ms_info->alphaToOneEnable; 1188 } 1189 } 1190 1191 /* This is intended for V3D versions before 4.1, otherwise we just use the 1192 * tile buffer load/store swap R/B bit. 1193 */ 1194 key->swap_color_rb = 0; 1195 1196 const struct v3dv_render_pass *pass = 1197 v3dv_render_pass_from_handle(pCreateInfo->renderPass); 1198 const struct v3dv_subpass *subpass = p_stage->pipeline->subpass; 1199 for (uint32_t i = 0; i < subpass->color_count; i++) { 1200 const uint32_t att_idx = subpass->color_attachments[i].attachment; 1201 if (att_idx == VK_ATTACHMENT_UNUSED) 1202 continue; 1203 1204 key->cbufs |= 1 << i; 1205 1206 VkFormat fb_format = pass->attachments[att_idx].desc.format; 1207 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); 1208 1209 /* If logic operations are enabled then we might emit color reads and we 1210 * need to know the color buffer format and swizzle for that 1211 */ 1212 if (key->logicop_func != PIPE_LOGICOP_COPY) { 1213 key->color_fmt[i].format = fb_pipe_format; 1214 memcpy(key->color_fmt[i].swizzle, 1215 v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format), 1216 sizeof(key->color_fmt[i].swizzle)); 1217 } 1218 1219 const struct util_format_description *desc = 1220 vk_format_description(fb_format); 1221 1222 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 1223 desc->channel[0].size == 32) { 1224 key->f32_color_rb |= 1 << i; 1225 } 1226 1227 if (p_stage->nir->info.fs.untyped_color_outputs) { 1228 if (util_format_is_pure_uint(fb_pipe_format)) 1229 key->uint_color_rb |= 1 << i; 1230 else if (util_format_is_pure_sint(fb_pipe_format)) 1231 key->int_color_rb |= 1 << i; 1232 } 1233 1234 if (key->is_points) { 1235 /* This mask represents state for GL_ARB_point_sprite which is not 1236 * relevant to Vulkan. 1237 */ 1238 key->point_sprite_mask = 0; 1239 1240 /* Vulkan mandates upper left. */ 1241 key->point_coord_upper_left = true; 1242 } 1243 } 1244} 1245 1246static void 1247setup_stage_outputs_from_next_stage_inputs( 1248 uint8_t next_stage_num_inputs, 1249 struct v3d_varying_slot *next_stage_input_slots, 1250 uint8_t *num_used_outputs, 1251 struct v3d_varying_slot *used_output_slots, 1252 uint32_t size_of_used_output_slots) 1253{ 1254 *num_used_outputs = next_stage_num_inputs; 1255 memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots); 1256} 1257 1258static void 1259pipeline_populate_v3d_gs_key(struct v3d_gs_key *key, 1260 const VkGraphicsPipelineCreateInfo *pCreateInfo, 1261 const struct v3dv_pipeline_stage *p_stage) 1262{ 1263 assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY || 1264 p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN); 1265 1266 memset(key, 0, sizeof(*key)); 1267 1268 const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 1269 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba); 1270 1271 struct v3dv_pipeline *pipeline = p_stage->pipeline; 1272 1273 key->per_vertex_point_size = 1274 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ); 1275 1276 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); 1277 1278 assert(key->base.is_last_geometry_stage); 1279 if (key->is_coord) { 1280 /* Output varyings in the last binning shader are only used for transform 1281 * feedback. Set to 0 as VK_EXT_transform_feedback is not supported. 1282 */ 1283 key->num_used_outputs = 0; 1284 } else { 1285 struct v3dv_shader_variant *fs_variant = 1286 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 1287 1288 STATIC_ASSERT(sizeof(key->used_outputs) == 1289 sizeof(fs_variant->prog_data.fs->input_slots)); 1290 1291 setup_stage_outputs_from_next_stage_inputs( 1292 fs_variant->prog_data.fs->num_inputs, 1293 fs_variant->prog_data.fs->input_slots, 1294 &key->num_used_outputs, 1295 key->used_outputs, 1296 sizeof(key->used_outputs)); 1297 } 1298} 1299 1300static void 1301pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, 1302 const VkGraphicsPipelineCreateInfo *pCreateInfo, 1303 const struct v3dv_pipeline_stage *p_stage) 1304{ 1305 assert(p_stage->stage == BROADCOM_SHADER_VERTEX || 1306 p_stage->stage == BROADCOM_SHADER_VERTEX_BIN); 1307 1308 memset(key, 0, sizeof(*key)); 1309 1310 const bool rba = p_stage->pipeline->device->features.robustBufferAccess; 1311 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba); 1312 1313 struct v3dv_pipeline *pipeline = p_stage->pipeline; 1314 1315 /* Vulkan specifies a point size per vertex, so true for if the prim are 1316 * points, like on ES2) 1317 */ 1318 const VkPipelineInputAssemblyStateCreateInfo *ia_info = 1319 pCreateInfo->pInputAssemblyState; 1320 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; 1321 1322 /* FIXME: PRIM_POINTS is not enough, in gallium the full check is 1323 * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */ 1324 key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS); 1325 1326 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); 1327 1328 if (key->is_coord) { /* Binning VS*/ 1329 if (key->base.is_last_geometry_stage) { 1330 /* Output varyings in the last binning shader are only used for 1331 * transform feedback. Set to 0 as VK_EXT_transform_feedback is not 1332 * supported. 1333 */ 1334 key->num_used_outputs = 0; 1335 } else { 1336 /* Linking against GS binning program */ 1337 assert(pipeline->gs); 1338 struct v3dv_shader_variant *gs_bin_variant = 1339 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 1340 1341 STATIC_ASSERT(sizeof(key->used_outputs) == 1342 sizeof(gs_bin_variant->prog_data.gs->input_slots)); 1343 1344 setup_stage_outputs_from_next_stage_inputs( 1345 gs_bin_variant->prog_data.gs->num_inputs, 1346 gs_bin_variant->prog_data.gs->input_slots, 1347 &key->num_used_outputs, 1348 key->used_outputs, 1349 sizeof(key->used_outputs)); 1350 } 1351 } else { /* Render VS */ 1352 if (pipeline->gs) { 1353 /* Linking against GS render program */ 1354 struct v3dv_shader_variant *gs_variant = 1355 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 1356 1357 STATIC_ASSERT(sizeof(key->used_outputs) == 1358 sizeof(gs_variant->prog_data.gs->input_slots)); 1359 1360 setup_stage_outputs_from_next_stage_inputs( 1361 gs_variant->prog_data.gs->num_inputs, 1362 gs_variant->prog_data.gs->input_slots, 1363 &key->num_used_outputs, 1364 key->used_outputs, 1365 sizeof(key->used_outputs)); 1366 } else { 1367 /* Linking against FS program */ 1368 struct v3dv_shader_variant *fs_variant = 1369 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 1370 1371 STATIC_ASSERT(sizeof(key->used_outputs) == 1372 sizeof(fs_variant->prog_data.fs->input_slots)); 1373 1374 setup_stage_outputs_from_next_stage_inputs( 1375 fs_variant->prog_data.fs->num_inputs, 1376 fs_variant->prog_data.fs->input_slots, 1377 &key->num_used_outputs, 1378 key->used_outputs, 1379 sizeof(key->used_outputs)); 1380 } 1381 } 1382 1383 const VkPipelineVertexInputStateCreateInfo *vi_info = 1384 pCreateInfo->pVertexInputState; 1385 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 1386 const VkVertexInputAttributeDescription *desc = 1387 &vi_info->pVertexAttributeDescriptions[i]; 1388 assert(desc->location < MAX_VERTEX_ATTRIBS); 1389 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM) 1390 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); 1391 } 1392} 1393 1394/** 1395 * Creates the initial form of the pipeline stage for a binning shader by 1396 * cloning the render shader and flagging it as a coordinate shader. 1397 * 1398 * Returns NULL if it was not able to allocate the object, so it should be 1399 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error. 1400 */ 1401static struct v3dv_pipeline_stage * 1402pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src, 1403 const VkAllocationCallbacks *pAllocator) 1404{ 1405 struct v3dv_device *device = src->pipeline->device; 1406 1407 struct v3dv_pipeline_stage *p_stage = 1408 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 1409 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1410 1411 if (p_stage == NULL) 1412 return NULL; 1413 1414 assert(src->stage == BROADCOM_SHADER_VERTEX || 1415 src->stage == BROADCOM_SHADER_GEOMETRY); 1416 1417 enum broadcom_shader_stage bin_stage = 1418 src->stage == BROADCOM_SHADER_VERTEX ? 1419 BROADCOM_SHADER_VERTEX_BIN : 1420 BROADCOM_SHADER_GEOMETRY_BIN; 1421 1422 p_stage->pipeline = src->pipeline; 1423 p_stage->stage = bin_stage; 1424 p_stage->entrypoint = src->entrypoint; 1425 p_stage->module = src->module; 1426 /* For binning shaders we will clone the NIR code from the corresponding 1427 * render shader later, when we call pipeline_compile_xxx_shader. This way 1428 * we only have to run the relevant NIR lowerings once for render shaders 1429 */ 1430 p_stage->nir = NULL; 1431 p_stage->spec_info = src->spec_info; 1432 p_stage->feedback = (VkPipelineCreationFeedback) { 0 }; 1433 memcpy(p_stage->shader_sha1, src->shader_sha1, 20); 1434 1435 return p_stage; 1436} 1437 1438/** 1439 * Returns false if it was not able to allocate or map the assembly bo memory. 1440 */ 1441static bool 1442upload_assembly(struct v3dv_pipeline *pipeline) 1443{ 1444 uint32_t total_size = 0; 1445 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 1446 struct v3dv_shader_variant *variant = 1447 pipeline->shared_data->variants[stage]; 1448 1449 if (variant != NULL) 1450 total_size += variant->qpu_insts_size; 1451 } 1452 1453 struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size, 1454 "pipeline shader assembly", true); 1455 if (!bo) { 1456 fprintf(stderr, "failed to allocate memory for shader\n"); 1457 return false; 1458 } 1459 1460 bool ok = v3dv_bo_map(pipeline->device, bo, total_size); 1461 if (!ok) { 1462 fprintf(stderr, "failed to map source shader buffer\n"); 1463 return false; 1464 } 1465 1466 uint32_t offset = 0; 1467 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 1468 struct v3dv_shader_variant *variant = 1469 pipeline->shared_data->variants[stage]; 1470 1471 if (variant != NULL) { 1472 variant->assembly_offset = offset; 1473 1474 memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size); 1475 offset += variant->qpu_insts_size; 1476 1477 /* We dont need qpu_insts anymore. */ 1478 free(variant->qpu_insts); 1479 variant->qpu_insts = NULL; 1480 } 1481 } 1482 assert(total_size == offset); 1483 1484 pipeline->shared_data->assembly_bo = bo; 1485 1486 return true; 1487} 1488 1489static void 1490pipeline_hash_graphics(const struct v3dv_pipeline *pipeline, 1491 struct v3dv_pipeline_key *key, 1492 unsigned char *sha1_out) 1493{ 1494 struct mesa_sha1 ctx; 1495 _mesa_sha1_init(&ctx); 1496 1497 if (pipeline->layout) { 1498 _mesa_sha1_update(&ctx, &pipeline->layout->sha1, 1499 sizeof(pipeline->layout->sha1)); 1500 } 1501 1502 /* We need to include all shader stages in the sha1 key as linking may modify 1503 * the shader code in any stage. An alternative would be to use the 1504 * serialized NIR, but that seems like an overkill. 1505 */ 1506 _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1, 1507 sizeof(pipeline->vs->shader_sha1)); 1508 1509 if (pipeline->gs) { 1510 _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1, 1511 sizeof(pipeline->gs->shader_sha1)); 1512 } 1513 1514 _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1, 1515 sizeof(pipeline->fs->shader_sha1)); 1516 1517 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); 1518 1519 _mesa_sha1_final(&ctx, sha1_out); 1520} 1521 1522static void 1523pipeline_hash_compute(const struct v3dv_pipeline *pipeline, 1524 struct v3dv_pipeline_key *key, 1525 unsigned char *sha1_out) 1526{ 1527 struct mesa_sha1 ctx; 1528 _mesa_sha1_init(&ctx); 1529 1530 if (pipeline->layout) { 1531 _mesa_sha1_update(&ctx, &pipeline->layout->sha1, 1532 sizeof(pipeline->layout->sha1)); 1533 } 1534 1535 _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1, 1536 sizeof(pipeline->cs->shader_sha1)); 1537 1538 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); 1539 1540 _mesa_sha1_final(&ctx, sha1_out); 1541} 1542 1543/* Checks that the pipeline has enough spill size to use for any of their 1544 * variants 1545 */ 1546static void 1547pipeline_check_spill_size(struct v3dv_pipeline *pipeline) 1548{ 1549 uint32_t max_spill_size = 0; 1550 1551 for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 1552 struct v3dv_shader_variant *variant = 1553 pipeline->shared_data->variants[stage]; 1554 1555 if (variant != NULL) { 1556 max_spill_size = MAX2(variant->prog_data.base->spill_size, 1557 max_spill_size); 1558 } 1559 } 1560 1561 if (max_spill_size > 0) { 1562 struct v3dv_device *device = pipeline->device; 1563 1564 /* The TIDX register we use for choosing the area to access 1565 * for scratch space is: (core << 6) | (qpu << 2) | thread. 1566 * Even at minimum threadcount in a particular shader, that 1567 * means we still multiply by qpus by 4. 1568 */ 1569 const uint32_t total_spill_size = 1570 4 * device->devinfo.qpu_count * max_spill_size; 1571 if (pipeline->spill.bo) { 1572 assert(pipeline->spill.size_per_thread > 0); 1573 v3dv_bo_free(device, pipeline->spill.bo); 1574 } 1575 pipeline->spill.bo = 1576 v3dv_bo_alloc(device, total_spill_size, "spill", true); 1577 pipeline->spill.size_per_thread = max_spill_size; 1578 } 1579} 1580 1581/** 1582 * Creates a new shader_variant_create. Note that for prog_data is not const, 1583 * so it is assumed that the caller will prove a pointer that the 1584 * shader_variant will own. 1585 * 1586 * Creation doesn't include allocate a BO to store the content of qpu_insts, 1587 * as we will try to share the same bo for several shader variants. Also note 1588 * that qpu_ints being NULL is valid, for example if we are creating the 1589 * shader_variants from the cache, so we can just upload the assembly of all 1590 * the shader stages at once. 1591 */ 1592struct v3dv_shader_variant * 1593v3dv_shader_variant_create(struct v3dv_device *device, 1594 enum broadcom_shader_stage stage, 1595 struct v3d_prog_data *prog_data, 1596 uint32_t prog_data_size, 1597 uint32_t assembly_offset, 1598 uint64_t *qpu_insts, 1599 uint32_t qpu_insts_size, 1600 VkResult *out_vk_result) 1601{ 1602 struct v3dv_shader_variant *variant = 1603 vk_zalloc(&device->vk.alloc, sizeof(*variant), 8, 1604 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1605 1606 if (variant == NULL) { 1607 *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY; 1608 return NULL; 1609 } 1610 1611 variant->stage = stage; 1612 variant->prog_data_size = prog_data_size; 1613 variant->prog_data.base = prog_data; 1614 1615 variant->assembly_offset = assembly_offset; 1616 variant->qpu_insts_size = qpu_insts_size; 1617 variant->qpu_insts = qpu_insts; 1618 1619 *out_vk_result = VK_SUCCESS; 1620 1621 return variant; 1622} 1623 1624/* For a given key, it returns the compiled version of the shader. Returns a 1625 * new reference to the shader_variant to the caller, or NULL. 1626 * 1627 * If the method returns NULL it means that something wrong happened: 1628 * * Not enough memory: this is one of the possible outcomes defined by 1629 * vkCreateXXXPipelines. out_vk_result will return the proper oom error. 1630 * * Compilation error: hypothetically this shouldn't happen, as the spec 1631 * states that vkShaderModule needs to be created with a valid SPIR-V, so 1632 * any compilation failure is a driver bug. In the practice, something as 1633 * common as failing to register allocate can lead to a compilation 1634 * failure. In that case the only option (for any driver) is 1635 * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler 1636 * error. 1637 */ 1638static struct v3dv_shader_variant * 1639pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage, 1640 struct v3d_key *key, 1641 size_t key_size, 1642 const VkAllocationCallbacks *pAllocator, 1643 VkResult *out_vk_result) 1644{ 1645 int64_t stage_start = os_time_get_nano(); 1646 1647 struct v3dv_pipeline *pipeline = p_stage->pipeline; 1648 struct v3dv_physical_device *physical_device = 1649 &pipeline->device->instance->physicalDevice; 1650 const struct v3d_compiler *compiler = physical_device->compiler; 1651 1652 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 1653 v3d_debug_flag_for_shader_stage 1654 (broadcom_shader_stage_to_gl(p_stage->stage))))) { 1655 fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n", 1656 broadcom_shader_stage_name(p_stage->stage), 1657 p_stage->program_id); 1658 nir_print_shader(p_stage->nir, stderr); 1659 fprintf(stderr, "\n"); 1660 } 1661 1662 uint64_t *qpu_insts; 1663 uint32_t qpu_insts_size; 1664 struct v3d_prog_data *prog_data; 1665 uint32_t prog_data_size = 1666 v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage)); 1667 1668 qpu_insts = v3d_compile(compiler, 1669 key, &prog_data, 1670 p_stage->nir, 1671 shader_debug_output, NULL, 1672 p_stage->program_id, 0, 1673 &qpu_insts_size); 1674 1675 struct v3dv_shader_variant *variant = NULL; 1676 1677 if (!qpu_insts) { 1678 fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n", 1679 gl_shader_stage_name(p_stage->stage), 1680 p_stage->program_id); 1681 *out_vk_result = VK_ERROR_UNKNOWN; 1682 } else { 1683 variant = 1684 v3dv_shader_variant_create(pipeline->device, p_stage->stage, 1685 prog_data, prog_data_size, 1686 0, /* assembly_offset, no final value yet */ 1687 qpu_insts, qpu_insts_size, 1688 out_vk_result); 1689 } 1690 /* At this point we don't need anymore the nir shader, but we are freeing 1691 * all the temporary p_stage structs used during the pipeline creation when 1692 * we finish it, so let's not worry about freeing the nir here. 1693 */ 1694 1695 p_stage->feedback.duration += os_time_get_nano() - stage_start; 1696 1697 return variant; 1698} 1699 1700static void 1701link_shaders(nir_shader *producer, nir_shader *consumer) 1702{ 1703 assert(producer); 1704 assert(consumer); 1705 1706 if (producer->options->lower_to_scalar) { 1707 NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out); 1708 NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); 1709 } 1710 1711 nir_lower_io_arrays_to_elements(producer, consumer); 1712 1713 nir_optimize(producer, false); 1714 nir_optimize(consumer, false); 1715 1716 if (nir_link_opt_varyings(producer, consumer)) 1717 nir_optimize(consumer, false); 1718 1719 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); 1720 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); 1721 1722 if (nir_remove_unused_varyings(producer, consumer)) { 1723 NIR_PASS(_, producer, nir_lower_global_vars_to_local); 1724 NIR_PASS(_, consumer, nir_lower_global_vars_to_local); 1725 1726 nir_optimize(producer, false); 1727 nir_optimize(consumer, false); 1728 1729 /* Optimizations can cause varyings to become unused. 1730 * nir_compact_varyings() depends on all dead varyings being removed so 1731 * we need to call nir_remove_dead_variables() again here. 1732 */ 1733 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); 1734 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); 1735 } 1736} 1737 1738static void 1739pipeline_lower_nir(struct v3dv_pipeline *pipeline, 1740 struct v3dv_pipeline_stage *p_stage, 1741 struct v3dv_pipeline_layout *layout) 1742{ 1743 int64_t stage_start = os_time_get_nano(); 1744 1745 assert(pipeline->shared_data && 1746 pipeline->shared_data->maps[p_stage->stage]); 1747 1748 nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir)); 1749 1750 /* We add this because we need a valid sampler for nir_lower_tex to do 1751 * unpacking of the texture operation result, even for the case where there 1752 * is no sampler state. 1753 * 1754 * We add two of those, one for the case we need a 16bit return_size, and 1755 * another for the case we need a 32bit return size. 1756 */ 1757 struct v3dv_descriptor_maps *maps = 1758 pipeline->shared_data->maps[p_stage->stage]; 1759 1760 UNUSED unsigned index; 1761 index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16); 1762 assert(index == V3DV_NO_SAMPLER_16BIT_IDX); 1763 1764 index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32); 1765 assert(index == V3DV_NO_SAMPLER_32BIT_IDX); 1766 1767 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ 1768 bool needs_default_sampler_state = false; 1769 NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout, 1770 &needs_default_sampler_state); 1771 1772 /* If in the end we didn't need to use the default sampler states and the 1773 * shader doesn't need any other samplers, get rid of them so we can 1774 * recognize that this program doesn't use any samplers at all. 1775 */ 1776 if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2) 1777 maps->sampler_map.num_desc = 0; 1778 1779 p_stage->feedback.duration += os_time_get_nano() - stage_start; 1780} 1781 1782/** 1783 * The SPIR-V compiler will insert a sized compact array for 1784 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[], 1785 * where the size of the array determines the number of active clip planes. 1786 */ 1787static uint32_t 1788get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage) 1789{ 1790 assert(p_stage->stage == BROADCOM_SHADER_VERTEX); 1791 const nir_shader *shader = p_stage->nir; 1792 assert(shader); 1793 1794 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { 1795 if (var->data.location == VARYING_SLOT_CLIP_DIST0) { 1796 assert(var->data.compact); 1797 return (1 << glsl_get_length(var->type)) - 1; 1798 } 1799 } 1800 return 0; 1801} 1802 1803static nir_shader * 1804pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage, 1805 struct v3dv_pipeline *pipeline, 1806 struct v3dv_pipeline_cache *cache) 1807{ 1808 int64_t stage_start = os_time_get_nano(); 1809 1810 nir_shader *nir = NULL; 1811 1812 nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache, 1813 &v3dv_nir_options, 1814 p_stage->shader_sha1); 1815 1816 if (nir) { 1817 assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage)); 1818 1819 /* A NIR cach hit doesn't avoid the large majority of pipeline stage 1820 * creation so the cache hit is not recorded in the pipeline feedback 1821 * flags 1822 */ 1823 1824 p_stage->feedback.duration += os_time_get_nano() - stage_start; 1825 1826 return nir; 1827 } 1828 1829 nir = shader_module_compile_to_nir(pipeline->device, p_stage); 1830 1831 if (nir) { 1832 struct v3dv_pipeline_cache *default_cache = 1833 &pipeline->device->default_pipeline_cache; 1834 1835 v3dv_pipeline_cache_upload_nir(pipeline, cache, nir, 1836 p_stage->shader_sha1); 1837 1838 /* Ensure that the variant is on the default cache, as cmd_buffer could 1839 * need to change the current variant 1840 */ 1841 if (default_cache != cache) { 1842 v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir, 1843 p_stage->shader_sha1); 1844 } 1845 1846 p_stage->feedback.duration += os_time_get_nano() - stage_start; 1847 1848 return nir; 1849 } 1850 1851 /* FIXME: this shouldn't happen, raise error? */ 1852 return NULL; 1853} 1854 1855static VkResult 1856pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline, 1857 const VkAllocationCallbacks *pAllocator, 1858 const VkGraphicsPipelineCreateInfo *pCreateInfo) 1859{ 1860 assert(pipeline->vs_bin != NULL); 1861 if (pipeline->vs_bin->nir == NULL) { 1862 assert(pipeline->vs->nir); 1863 pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir); 1864 } 1865 1866 VkResult vk_result; 1867 struct v3d_vs_key key; 1868 pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs); 1869 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] = 1870 pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key), 1871 pAllocator, &vk_result); 1872 if (vk_result != VK_SUCCESS) 1873 return vk_result; 1874 1875 pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin); 1876 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] = 1877 pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key), 1878 pAllocator, &vk_result); 1879 1880 return vk_result; 1881} 1882 1883static VkResult 1884pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline, 1885 const VkAllocationCallbacks *pAllocator, 1886 const VkGraphicsPipelineCreateInfo *pCreateInfo) 1887{ 1888 assert(pipeline->gs); 1889 1890 assert(pipeline->gs_bin != NULL); 1891 if (pipeline->gs_bin->nir == NULL) { 1892 assert(pipeline->gs->nir); 1893 pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir); 1894 } 1895 1896 VkResult vk_result; 1897 struct v3d_gs_key key; 1898 pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs); 1899 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] = 1900 pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key), 1901 pAllocator, &vk_result); 1902 if (vk_result != VK_SUCCESS) 1903 return vk_result; 1904 1905 pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin); 1906 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] = 1907 pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key), 1908 pAllocator, &vk_result); 1909 1910 return vk_result; 1911} 1912 1913static VkResult 1914pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline, 1915 const VkAllocationCallbacks *pAllocator, 1916 const VkGraphicsPipelineCreateInfo *pCreateInfo) 1917{ 1918 struct v3dv_pipeline_stage *p_stage = pipeline->vs; 1919 1920 p_stage = pipeline->fs; 1921 1922 struct v3d_fs_key key; 1923 1924 pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage, 1925 pipeline->gs != NULL, 1926 get_ucp_enable_mask(pipeline->vs)); 1927 1928 VkResult vk_result; 1929 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] = 1930 pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key), 1931 pAllocator, &vk_result); 1932 1933 return vk_result; 1934} 1935 1936static void 1937pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, 1938 struct v3dv_pipeline_key *key, 1939 const VkGraphicsPipelineCreateInfo *pCreateInfo) 1940{ 1941 memset(key, 0, sizeof(*key)); 1942 key->robust_buffer_access = 1943 pipeline->device->features.robustBufferAccess; 1944 1945 const bool raster_enabled = 1946 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 1947 1948 const VkPipelineInputAssemblyStateCreateInfo *ia_info = 1949 pCreateInfo->pInputAssemblyState; 1950 key->topology = vk_to_pipe_prim_type[ia_info->topology]; 1951 1952 const VkPipelineColorBlendStateCreateInfo *cb_info = 1953 raster_enabled ? pCreateInfo->pColorBlendState : NULL; 1954 1955 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? 1956 vk_to_pipe_logicop[cb_info->logicOp] : 1957 PIPE_LOGICOP_COPY; 1958 1959 /* Multisample rasterization state must be ignored if rasterization 1960 * is disabled. 1961 */ 1962 const VkPipelineMultisampleStateCreateInfo *ms_info = 1963 raster_enabled ? pCreateInfo->pMultisampleState : NULL; 1964 if (ms_info) { 1965 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || 1966 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); 1967 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; 1968 1969 if (key->msaa) { 1970 key->sample_coverage = 1971 pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1; 1972 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; 1973 key->sample_alpha_to_one = ms_info->alphaToOneEnable; 1974 } 1975 } 1976 1977 const struct v3dv_render_pass *pass = 1978 v3dv_render_pass_from_handle(pCreateInfo->renderPass); 1979 const struct v3dv_subpass *subpass = pipeline->subpass; 1980 for (uint32_t i = 0; i < subpass->color_count; i++) { 1981 const uint32_t att_idx = subpass->color_attachments[i].attachment; 1982 if (att_idx == VK_ATTACHMENT_UNUSED) 1983 continue; 1984 1985 key->cbufs |= 1 << i; 1986 1987 VkFormat fb_format = pass->attachments[att_idx].desc.format; 1988 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); 1989 1990 /* If logic operations are enabled then we might emit color reads and we 1991 * need to know the color buffer format and swizzle for that 1992 */ 1993 if (key->logicop_func != PIPE_LOGICOP_COPY) { 1994 key->color_fmt[i].format = fb_pipe_format; 1995 memcpy(key->color_fmt[i].swizzle, 1996 v3dv_get_format_swizzle(pipeline->device, fb_format), 1997 sizeof(key->color_fmt[i].swizzle)); 1998 } 1999 2000 const struct util_format_description *desc = 2001 vk_format_description(fb_format); 2002 2003 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 2004 desc->channel[0].size == 32) { 2005 key->f32_color_rb |= 1 << i; 2006 } 2007 } 2008 2009 const VkPipelineVertexInputStateCreateInfo *vi_info = 2010 pCreateInfo->pVertexInputState; 2011 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 2012 const VkVertexInputAttributeDescription *desc = 2013 &vi_info->pVertexAttributeDescriptions[i]; 2014 assert(desc->location < MAX_VERTEX_ATTRIBS); 2015 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM) 2016 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); 2017 } 2018 2019 assert(pipeline->subpass); 2020 key->has_multiview = pipeline->subpass->view_mask != 0; 2021} 2022 2023static void 2024pipeline_populate_compute_key(struct v3dv_pipeline *pipeline, 2025 struct v3dv_pipeline_key *key, 2026 const VkComputePipelineCreateInfo *pCreateInfo) 2027{ 2028 /* We use the same pipeline key for graphics and compute, but we don't need 2029 * to add a field to flag compute keys because this key is not used alone 2030 * to search in the cache, we also use the SPIR-V or the serialized NIR for 2031 * example, which already flags compute shaders. 2032 */ 2033 memset(key, 0, sizeof(*key)); 2034 key->robust_buffer_access = 2035 pipeline->device->features.robustBufferAccess; 2036} 2037 2038static struct v3dv_pipeline_shared_data * 2039v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20], 2040 struct v3dv_pipeline *pipeline, 2041 bool is_graphics_pipeline) 2042{ 2043 /* We create new_entry using the device alloc. Right now shared_data is ref 2044 * and unref by both the pipeline and the pipeline cache, so we can't 2045 * ensure that the cache or pipeline alloc will be available on the last 2046 * unref. 2047 */ 2048 struct v3dv_pipeline_shared_data *new_entry = 2049 vk_zalloc2(&pipeline->device->vk.alloc, NULL, 2050 sizeof(struct v3dv_pipeline_shared_data), 8, 2051 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2052 2053 if (new_entry == NULL) 2054 return NULL; 2055 2056 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 2057 /* We don't need specific descriptor maps for binning stages we use the 2058 * map for the render stage. 2059 */ 2060 if (broadcom_shader_stage_is_binning(stage)) 2061 continue; 2062 2063 if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) || 2064 (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) { 2065 continue; 2066 } 2067 2068 if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) { 2069 /* We always inject a custom GS if we have multiview */ 2070 if (!pipeline->subpass->view_mask) 2071 continue; 2072 } 2073 2074 struct v3dv_descriptor_maps *new_maps = 2075 vk_zalloc2(&pipeline->device->vk.alloc, NULL, 2076 sizeof(struct v3dv_descriptor_maps), 8, 2077 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2078 2079 if (new_maps == NULL) 2080 goto fail; 2081 2082 new_entry->maps[stage] = new_maps; 2083 } 2084 2085 new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] = 2086 new_entry->maps[BROADCOM_SHADER_VERTEX]; 2087 2088 new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] = 2089 new_entry->maps[BROADCOM_SHADER_GEOMETRY]; 2090 2091 new_entry->ref_cnt = 1; 2092 memcpy(new_entry->sha1_key, sha1_key, 20); 2093 2094 return new_entry; 2095 2096fail: 2097 if (new_entry != NULL) { 2098 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 2099 if (new_entry->maps[stage] != NULL) 2100 vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]); 2101 } 2102 } 2103 2104 vk_free(&pipeline->device->vk.alloc, new_entry); 2105 2106 return NULL; 2107} 2108 2109static void 2110write_creation_feedback(struct v3dv_pipeline *pipeline, 2111 const void *next, 2112 const VkPipelineCreationFeedback *pipeline_feedback, 2113 uint32_t stage_count, 2114 const VkPipelineShaderStageCreateInfo *stages) 2115{ 2116 const VkPipelineCreationFeedbackCreateInfo *create_feedback = 2117 vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 2118 2119 if (create_feedback) { 2120 typed_memcpy(create_feedback->pPipelineCreationFeedback, 2121 pipeline_feedback, 2122 1); 2123 2124 assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount); 2125 2126 for (uint32_t i = 0; i < stage_count; i++) { 2127 gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage); 2128 switch (s) { 2129 case MESA_SHADER_VERTEX: 2130 create_feedback->pPipelineStageCreationFeedbacks[i] = 2131 pipeline->vs->feedback; 2132 2133 create_feedback->pPipelineStageCreationFeedbacks[i].duration += 2134 pipeline->vs_bin->feedback.duration; 2135 break; 2136 2137 case MESA_SHADER_GEOMETRY: 2138 create_feedback->pPipelineStageCreationFeedbacks[i] = 2139 pipeline->gs->feedback; 2140 2141 create_feedback->pPipelineStageCreationFeedbacks[i].duration += 2142 pipeline->gs_bin->feedback.duration; 2143 break; 2144 2145 case MESA_SHADER_FRAGMENT: 2146 create_feedback->pPipelineStageCreationFeedbacks[i] = 2147 pipeline->fs->feedback; 2148 break; 2149 2150 case MESA_SHADER_COMPUTE: 2151 create_feedback->pPipelineStageCreationFeedbacks[i] = 2152 pipeline->cs->feedback; 2153 break; 2154 2155 default: 2156 unreachable("not supported shader stage"); 2157 } 2158 } 2159 } 2160} 2161 2162static enum shader_prim 2163multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline) 2164{ 2165 switch (pipeline->topology) { 2166 case PIPE_PRIM_POINTS: 2167 return SHADER_PRIM_POINTS; 2168 case PIPE_PRIM_LINES: 2169 case PIPE_PRIM_LINE_STRIP: 2170 return SHADER_PRIM_LINES; 2171 case PIPE_PRIM_TRIANGLES: 2172 case PIPE_PRIM_TRIANGLE_STRIP: 2173 case PIPE_PRIM_TRIANGLE_FAN: 2174 return SHADER_PRIM_TRIANGLES; 2175 default: 2176 /* Since we don't allow GS with multiview, we can only see non-adjacency 2177 * primitives. 2178 */ 2179 unreachable("Unexpected pipeline primitive type"); 2180 } 2181} 2182 2183static enum shader_prim 2184multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline) 2185{ 2186 switch (pipeline->topology) { 2187 case PIPE_PRIM_POINTS: 2188 return SHADER_PRIM_POINTS; 2189 case PIPE_PRIM_LINES: 2190 case PIPE_PRIM_LINE_STRIP: 2191 return SHADER_PRIM_LINE_STRIP; 2192 case PIPE_PRIM_TRIANGLES: 2193 case PIPE_PRIM_TRIANGLE_STRIP: 2194 case PIPE_PRIM_TRIANGLE_FAN: 2195 return SHADER_PRIM_TRIANGLE_STRIP; 2196 default: 2197 /* Since we don't allow GS with multiview, we can only see non-adjacency 2198 * primitives. 2199 */ 2200 unreachable("Unexpected pipeline primitive type"); 2201 } 2202} 2203 2204static bool 2205pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline, 2206 struct v3dv_pipeline_cache *cache, 2207 const VkAllocationCallbacks *pAllocator) 2208{ 2209 /* Create the passthrough GS from the VS output interface */ 2210 pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); 2211 nir_shader *vs_nir = pipeline->vs->nir; 2212 2213 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); 2214 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, 2215 "multiview broadcast gs"); 2216 nir_shader *nir = b.shader; 2217 nir->info.inputs_read = vs_nir->info.outputs_written; 2218 nir->info.outputs_written = vs_nir->info.outputs_written | 2219 (1ull << VARYING_SLOT_LAYER); 2220 2221 uint32_t vertex_count = u_vertices_per_prim(pipeline->topology); 2222 nir->info.gs.input_primitive = 2223 multiview_gs_input_primitive_from_pipeline(pipeline); 2224 nir->info.gs.output_primitive = 2225 multiview_gs_output_primitive_from_pipeline(pipeline); 2226 nir->info.gs.vertices_in = vertex_count; 2227 nir->info.gs.vertices_out = nir->info.gs.vertices_in; 2228 nir->info.gs.invocations = 1; 2229 nir->info.gs.active_stream_mask = 0x1; 2230 2231 /* Make a list of GS input/output variables from the VS outputs */ 2232 nir_variable *in_vars[100]; 2233 nir_variable *out_vars[100]; 2234 uint32_t var_count = 0; 2235 nir_foreach_shader_out_variable(out_vs_var, vs_nir) { 2236 char name[8]; 2237 snprintf(name, ARRAY_SIZE(name), "in_%d", var_count); 2238 2239 in_vars[var_count] = 2240 nir_variable_create(nir, nir_var_shader_in, 2241 glsl_array_type(out_vs_var->type, vertex_count, 0), 2242 name); 2243 in_vars[var_count]->data.location = out_vs_var->data.location; 2244 in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac; 2245 in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; 2246 2247 snprintf(name, ARRAY_SIZE(name), "out_%d", var_count); 2248 out_vars[var_count] = 2249 nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name); 2250 out_vars[var_count]->data.location = out_vs_var->data.location; 2251 out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; 2252 2253 var_count++; 2254 } 2255 2256 /* Add the gl_Layer output variable */ 2257 nir_variable *out_layer = 2258 nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), 2259 "out_Layer"); 2260 out_layer->data.location = VARYING_SLOT_LAYER; 2261 2262 /* Get the view index value that we will write to gl_Layer */ 2263 nir_ssa_def *layer = 2264 nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32); 2265 2266 /* Emit all output vertices */ 2267 for (uint32_t vi = 0; vi < vertex_count; vi++) { 2268 /* Emit all output varyings */ 2269 for (uint32_t i = 0; i < var_count; i++) { 2270 nir_deref_instr *in_value = 2271 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi); 2272 nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value); 2273 } 2274 2275 /* Emit gl_Layer write */ 2276 nir_store_var(&b, out_layer, layer, 0x1); 2277 2278 nir_emit_vertex(&b, 0); 2279 } 2280 nir_end_primitive(&b, 0); 2281 2282 /* Make sure we run our pre-process NIR passes so we produce NIR compatible 2283 * with what we expect from SPIR-V modules. 2284 */ 2285 preprocess_nir(nir); 2286 2287 /* Attach the geometry shader to the pipeline */ 2288 struct v3dv_device *device = pipeline->device; 2289 struct v3dv_physical_device *physical_device = 2290 &device->instance->physicalDevice; 2291 2292 struct v3dv_pipeline_stage *p_stage = 2293 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 2294 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2295 2296 if (p_stage == NULL) { 2297 ralloc_free(nir); 2298 return false; 2299 } 2300 2301 p_stage->pipeline = pipeline; 2302 p_stage->stage = BROADCOM_SHADER_GEOMETRY; 2303 p_stage->entrypoint = "main"; 2304 p_stage->module = 0; 2305 p_stage->nir = nir; 2306 pipeline_compute_sha1_from_nir(p_stage); 2307 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); 2308 2309 pipeline->has_gs = true; 2310 pipeline->gs = p_stage; 2311 pipeline->active_stages |= MESA_SHADER_GEOMETRY; 2312 2313 pipeline->gs_bin = 2314 pipeline_stage_create_binning(pipeline->gs, pAllocator); 2315 if (pipeline->gs_bin == NULL) 2316 return false; 2317 2318 return true; 2319} 2320 2321static void 2322pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline) 2323{ 2324 for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) { 2325 struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i]; 2326 if (variant && variant->prog_data.base->has_global_address) { 2327 pipeline->uses_buffer_device_address = true; 2328 return; 2329 } 2330 } 2331 2332 pipeline->uses_buffer_device_address = false; 2333} 2334 2335/* 2336 * It compiles a pipeline. Note that it also allocate internal object, but if 2337 * some allocations success, but other fails, the method is not freeing the 2338 * successful ones. 2339 * 2340 * This is done to simplify the code, as what we do in this case is just call 2341 * the pipeline destroy method, and this would handle freeing the internal 2342 * objects allocated. We just need to be careful setting to NULL the objects 2343 * not allocated. 2344 */ 2345static VkResult 2346pipeline_compile_graphics(struct v3dv_pipeline *pipeline, 2347 struct v3dv_pipeline_cache *cache, 2348 const VkGraphicsPipelineCreateInfo *pCreateInfo, 2349 const VkAllocationCallbacks *pAllocator) 2350{ 2351 VkPipelineCreationFeedback pipeline_feedback = { 2352 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 2353 }; 2354 int64_t pipeline_start = os_time_get_nano(); 2355 2356 struct v3dv_device *device = pipeline->device; 2357 struct v3dv_physical_device *physical_device = 2358 &device->instance->physicalDevice; 2359 2360 /* First pass to get some common info from the shader, and create the 2361 * individual pipeline_stage objects 2362 */ 2363 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { 2364 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; 2365 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 2366 2367 struct v3dv_pipeline_stage *p_stage = 2368 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 2369 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2370 2371 if (p_stage == NULL) 2372 return VK_ERROR_OUT_OF_HOST_MEMORY; 2373 2374 /* Note that we are assigning program_id slightly differently that 2375 * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin 2376 * would have a different program_id, while v3d would have the same for 2377 * both. For the case of v3dv, it is more natural to have an id this way, 2378 * as right now we are using it for debugging, not for shader-db. 2379 */ 2380 p_stage->program_id = 2381 p_atomic_inc_return(&physical_device->next_program_id); 2382 2383 p_stage->pipeline = pipeline; 2384 p_stage->stage = gl_shader_stage_to_broadcom(stage); 2385 p_stage->entrypoint = sinfo->pName; 2386 p_stage->module = vk_shader_module_from_handle(sinfo->module); 2387 p_stage->spec_info = sinfo->pSpecializationInfo; 2388 2389 vk_pipeline_hash_shader_stage(&pCreateInfo->pStages[i], p_stage->shader_sha1); 2390 2391 pipeline->active_stages |= sinfo->stage; 2392 2393 /* We will try to get directly the compiled shader variant, so let's not 2394 * worry about getting the nir shader for now. 2395 */ 2396 p_stage->nir = NULL; 2397 2398 switch(stage) { 2399 case MESA_SHADER_VERTEX: 2400 pipeline->vs = p_stage; 2401 pipeline->vs_bin = 2402 pipeline_stage_create_binning(pipeline->vs, pAllocator); 2403 if (pipeline->vs_bin == NULL) 2404 return VK_ERROR_OUT_OF_HOST_MEMORY; 2405 break; 2406 2407 case MESA_SHADER_GEOMETRY: 2408 pipeline->has_gs = true; 2409 pipeline->gs = p_stage; 2410 pipeline->gs_bin = 2411 pipeline_stage_create_binning(pipeline->gs, pAllocator); 2412 if (pipeline->gs_bin == NULL) 2413 return VK_ERROR_OUT_OF_HOST_MEMORY; 2414 break; 2415 2416 case MESA_SHADER_FRAGMENT: 2417 pipeline->fs = p_stage; 2418 break; 2419 2420 default: 2421 unreachable("not supported shader stage"); 2422 } 2423 } 2424 2425 /* Add a no-op fragment shader if needed */ 2426 if (!pipeline->fs) { 2427 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 2428 &v3dv_nir_options, 2429 "noop_fs"); 2430 2431 struct v3dv_pipeline_stage *p_stage = 2432 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, 2433 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2434 2435 if (p_stage == NULL) 2436 return VK_ERROR_OUT_OF_HOST_MEMORY; 2437 2438 p_stage->pipeline = pipeline; 2439 p_stage->stage = BROADCOM_SHADER_FRAGMENT; 2440 p_stage->entrypoint = "main"; 2441 p_stage->module = 0; 2442 p_stage->nir = b.shader; 2443 pipeline_compute_sha1_from_nir(p_stage); 2444 p_stage->program_id = 2445 p_atomic_inc_return(&physical_device->next_program_id); 2446 2447 pipeline->fs = p_stage; 2448 pipeline->active_stages |= MESA_SHADER_FRAGMENT; 2449 } 2450 2451 /* If multiview is enabled, we inject a custom passthrough geometry shader 2452 * to broadcast draw calls to the appropriate views. 2453 */ 2454 assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs)); 2455 if (pipeline->subpass->view_mask) { 2456 if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator)) 2457 return VK_ERROR_OUT_OF_HOST_MEMORY; 2458 } 2459 2460 /* First we try to get the variants from the pipeline cache (unless we are 2461 * required to capture internal representations, since in that case we need 2462 * compile). 2463 */ 2464 bool needs_executable_info = 2465 pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; 2466 if (!needs_executable_info) { 2467 struct v3dv_pipeline_key pipeline_key; 2468 pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo); 2469 pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1); 2470 2471 bool cache_hit = false; 2472 2473 pipeline->shared_data = 2474 v3dv_pipeline_cache_search_for_pipeline(cache, 2475 pipeline->sha1, 2476 &cache_hit); 2477 2478 if (pipeline->shared_data != NULL) { 2479 /* A correct pipeline must have at least a VS and FS */ 2480 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]); 2481 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); 2482 assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); 2483 assert(!pipeline->gs || 2484 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]); 2485 assert(!pipeline->gs || 2486 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); 2487 2488 if (cache_hit && cache != &pipeline->device->default_pipeline_cache) 2489 pipeline_feedback.flags |= 2490 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 2491 2492 goto success; 2493 } 2494 } 2495 2496 if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) 2497 return VK_PIPELINE_COMPILE_REQUIRED; 2498 2499 /* Otherwise we try to get the NIR shaders (either from the original SPIR-V 2500 * shader or the pipeline cache) and compile. 2501 */ 2502 pipeline->shared_data = 2503 v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true); 2504 if (!pipeline->shared_data) 2505 return VK_ERROR_OUT_OF_HOST_MEMORY; 2506 2507 pipeline->vs->feedback.flags |= 2508 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 2509 if (pipeline->gs) 2510 pipeline->gs->feedback.flags |= 2511 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 2512 pipeline->fs->feedback.flags |= 2513 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 2514 2515 if (!pipeline->vs->nir) 2516 pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); 2517 if (pipeline->gs && !pipeline->gs->nir) 2518 pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache); 2519 if (!pipeline->fs->nir) 2520 pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache); 2521 2522 /* Linking + pipeline lowerings */ 2523 if (pipeline->gs) { 2524 link_shaders(pipeline->gs->nir, pipeline->fs->nir); 2525 link_shaders(pipeline->vs->nir, pipeline->gs->nir); 2526 } else { 2527 link_shaders(pipeline->vs->nir, pipeline->fs->nir); 2528 } 2529 2530 pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout); 2531 lower_fs_io(pipeline->fs->nir); 2532 2533 if (pipeline->gs) { 2534 pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout); 2535 lower_gs_io(pipeline->gs->nir); 2536 } 2537 2538 pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout); 2539 lower_vs_io(pipeline->vs->nir); 2540 2541 /* Compiling to vir */ 2542 VkResult vk_result; 2543 2544 /* We should have got all the variants or no variants from the cache */ 2545 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); 2546 vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo); 2547 if (vk_result != VK_SUCCESS) 2548 return vk_result; 2549 2550 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] && 2551 !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); 2552 2553 if (pipeline->gs) { 2554 vk_result = 2555 pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo); 2556 if (vk_result != VK_SUCCESS) 2557 return vk_result; 2558 } 2559 2560 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] && 2561 !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); 2562 2563 vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo); 2564 if (vk_result != VK_SUCCESS) 2565 return vk_result; 2566 2567 if (!upload_assembly(pipeline)) 2568 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 2569 2570 v3dv_pipeline_cache_upload_pipeline(pipeline, cache); 2571 2572 success: 2573 2574 pipeline_check_buffer_device_address(pipeline); 2575 2576 pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 2577 write_creation_feedback(pipeline, 2578 pCreateInfo->pNext, 2579 &pipeline_feedback, 2580 pCreateInfo->stageCount, 2581 pCreateInfo->pStages); 2582 2583 /* Since we have the variants in the pipeline shared data we can now free 2584 * the pipeline stages. 2585 */ 2586 if (!needs_executable_info) 2587 pipeline_free_stages(device, pipeline, pAllocator); 2588 2589 pipeline_check_spill_size(pipeline); 2590 2591 return compute_vpm_config(pipeline); 2592} 2593 2594static VkResult 2595compute_vpm_config(struct v3dv_pipeline *pipeline) 2596{ 2597 struct v3dv_shader_variant *vs_variant = 2598 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 2599 struct v3dv_shader_variant *vs_bin_variant = 2600 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 2601 struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs; 2602 struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs; 2603 2604 struct v3d_gs_prog_data *gs = NULL; 2605 struct v3d_gs_prog_data *gs_bin = NULL; 2606 if (pipeline->has_gs) { 2607 struct v3dv_shader_variant *gs_variant = 2608 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 2609 struct v3dv_shader_variant *gs_bin_variant = 2610 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 2611 gs = gs_variant->prog_data.gs; 2612 gs_bin = gs_bin_variant->prog_data.gs; 2613 } 2614 2615 if (!v3d_compute_vpm_config(&pipeline->device->devinfo, 2616 vs_bin, vs, gs_bin, gs, 2617 &pipeline->vpm_cfg_bin, 2618 &pipeline->vpm_cfg)) { 2619 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 2620 } 2621 2622 return VK_SUCCESS; 2623} 2624 2625static unsigned 2626v3dv_dynamic_state_mask(VkDynamicState state) 2627{ 2628 switch(state) { 2629 case VK_DYNAMIC_STATE_VIEWPORT: 2630 return V3DV_DYNAMIC_VIEWPORT; 2631 case VK_DYNAMIC_STATE_SCISSOR: 2632 return V3DV_DYNAMIC_SCISSOR; 2633 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: 2634 return V3DV_DYNAMIC_STENCIL_COMPARE_MASK; 2635 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: 2636 return V3DV_DYNAMIC_STENCIL_WRITE_MASK; 2637 case VK_DYNAMIC_STATE_STENCIL_REFERENCE: 2638 return V3DV_DYNAMIC_STENCIL_REFERENCE; 2639 case VK_DYNAMIC_STATE_BLEND_CONSTANTS: 2640 return V3DV_DYNAMIC_BLEND_CONSTANTS; 2641 case VK_DYNAMIC_STATE_DEPTH_BIAS: 2642 return V3DV_DYNAMIC_DEPTH_BIAS; 2643 case VK_DYNAMIC_STATE_LINE_WIDTH: 2644 return V3DV_DYNAMIC_LINE_WIDTH; 2645 case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: 2646 return V3DV_DYNAMIC_COLOR_WRITE_ENABLE; 2647 2648 /* Depth bounds testing is not available in in V3D 4.2 so here we are just 2649 * ignoring this dynamic state. We are already asserting at pipeline creation 2650 * time that depth bounds testing is not enabled. 2651 */ 2652 case VK_DYNAMIC_STATE_DEPTH_BOUNDS: 2653 return 0; 2654 2655 default: 2656 unreachable("Unhandled dynamic state"); 2657 } 2658} 2659 2660static void 2661pipeline_init_dynamic_state( 2662 struct v3dv_pipeline *pipeline, 2663 const VkPipelineDynamicStateCreateInfo *pDynamicState, 2664 const VkPipelineViewportStateCreateInfo *pViewportState, 2665 const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState, 2666 const VkPipelineColorBlendStateCreateInfo *pColorBlendState, 2667 const VkPipelineRasterizationStateCreateInfo *pRasterizationState, 2668 const VkPipelineColorWriteCreateInfoEXT *pColorWriteState) 2669{ 2670 /* Initialize to default values */ 2671 struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state; 2672 memset(dynamic, 0, sizeof(*dynamic)); 2673 dynamic->stencil_compare_mask.front = ~0; 2674 dynamic->stencil_compare_mask.back = ~0; 2675 dynamic->stencil_write_mask.front = ~0; 2676 dynamic->stencil_write_mask.back = ~0; 2677 dynamic->line_width = 1.0f; 2678 dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1; 2679 2680 /* Create a mask of enabled dynamic states */ 2681 uint32_t dynamic_states = 0; 2682 if (pDynamicState) { 2683 uint32_t count = pDynamicState->dynamicStateCount; 2684 for (uint32_t s = 0; s < count; s++) { 2685 dynamic_states |= 2686 v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]); 2687 } 2688 } 2689 2690 /* For any pipeline states that are not dynamic, set the dynamic state 2691 * from the static pipeline state. 2692 */ 2693 if (pViewportState) { 2694 if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) { 2695 dynamic->viewport.count = pViewportState->viewportCount; 2696 typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports, 2697 pViewportState->viewportCount); 2698 2699 for (uint32_t i = 0; i < dynamic->viewport.count; i++) { 2700 v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i], 2701 dynamic->viewport.scale[i], 2702 dynamic->viewport.translate[i]); 2703 } 2704 } 2705 2706 if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) { 2707 dynamic->scissor.count = pViewportState->scissorCount; 2708 typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors, 2709 pViewportState->scissorCount); 2710 } 2711 } 2712 2713 if (pDepthStencilState) { 2714 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) { 2715 dynamic->stencil_compare_mask.front = 2716 pDepthStencilState->front.compareMask; 2717 dynamic->stencil_compare_mask.back = 2718 pDepthStencilState->back.compareMask; 2719 } 2720 2721 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) { 2722 dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask; 2723 dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask; 2724 } 2725 2726 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) { 2727 dynamic->stencil_reference.front = pDepthStencilState->front.reference; 2728 dynamic->stencil_reference.back = pDepthStencilState->back.reference; 2729 } 2730 } 2731 2732 if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) { 2733 memcpy(dynamic->blend_constants, pColorBlendState->blendConstants, 2734 sizeof(dynamic->blend_constants)); 2735 } 2736 2737 if (pRasterizationState) { 2738 if (pRasterizationState->depthBiasEnable && 2739 !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) { 2740 dynamic->depth_bias.constant_factor = 2741 pRasterizationState->depthBiasConstantFactor; 2742 dynamic->depth_bias.depth_bias_clamp = 2743 pRasterizationState->depthBiasClamp; 2744 dynamic->depth_bias.slope_factor = 2745 pRasterizationState->depthBiasSlopeFactor; 2746 } 2747 if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH)) 2748 dynamic->line_width = pRasterizationState->lineWidth; 2749 } 2750 2751 if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) { 2752 dynamic->color_write_enable = 0; 2753 for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++) 2754 dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; 2755 } 2756 2757 pipeline->dynamic_state.mask = dynamic_states; 2758} 2759 2760static bool 2761stencil_op_is_no_op(const VkStencilOpState *stencil) 2762{ 2763 return stencil->depthFailOp == VK_STENCIL_OP_KEEP && 2764 stencil->compareOp == VK_COMPARE_OP_ALWAYS; 2765} 2766 2767static void 2768enable_depth_bias(struct v3dv_pipeline *pipeline, 2769 const VkPipelineRasterizationStateCreateInfo *rs_info) 2770{ 2771 pipeline->depth_bias.enabled = false; 2772 pipeline->depth_bias.is_z16 = false; 2773 2774 if (!rs_info || !rs_info->depthBiasEnable) 2775 return; 2776 2777 /* Check the depth/stencil attachment description for the subpass used with 2778 * this pipeline. 2779 */ 2780 assert(pipeline->pass && pipeline->subpass); 2781 struct v3dv_render_pass *pass = pipeline->pass; 2782 struct v3dv_subpass *subpass = pipeline->subpass; 2783 2784 if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) 2785 return; 2786 2787 assert(subpass->ds_attachment.attachment < pass->attachment_count); 2788 struct v3dv_render_pass_attachment *att = 2789 &pass->attachments[subpass->ds_attachment.attachment]; 2790 2791 if (att->desc.format == VK_FORMAT_D16_UNORM) 2792 pipeline->depth_bias.is_z16 = true; 2793 2794 pipeline->depth_bias.enabled = true; 2795} 2796 2797static void 2798pipeline_set_ez_state(struct v3dv_pipeline *pipeline, 2799 const VkPipelineDepthStencilStateCreateInfo *ds_info) 2800{ 2801 if (!ds_info || !ds_info->depthTestEnable) { 2802 pipeline->ez_state = V3D_EZ_DISABLED; 2803 return; 2804 } 2805 2806 switch (ds_info->depthCompareOp) { 2807 case VK_COMPARE_OP_LESS: 2808 case VK_COMPARE_OP_LESS_OR_EQUAL: 2809 pipeline->ez_state = V3D_EZ_LT_LE; 2810 break; 2811 case VK_COMPARE_OP_GREATER: 2812 case VK_COMPARE_OP_GREATER_OR_EQUAL: 2813 pipeline->ez_state = V3D_EZ_GT_GE; 2814 break; 2815 case VK_COMPARE_OP_NEVER: 2816 case VK_COMPARE_OP_EQUAL: 2817 pipeline->ez_state = V3D_EZ_UNDECIDED; 2818 break; 2819 default: 2820 pipeline->ez_state = V3D_EZ_DISABLED; 2821 pipeline->incompatible_ez_test = true; 2822 break; 2823 } 2824 2825 /* If stencil is enabled and is not a no-op, we need to disable EZ */ 2826 if (ds_info->stencilTestEnable && 2827 (!stencil_op_is_no_op(&ds_info->front) || 2828 !stencil_op_is_no_op(&ds_info->back))) { 2829 pipeline->ez_state = V3D_EZ_DISABLED; 2830 } 2831 2832 /* If the FS writes Z, then it may update against the chosen EZ direction */ 2833 struct v3dv_shader_variant *fs_variant = 2834 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 2835 if (fs_variant && fs_variant->prog_data.fs->writes_z && 2836 !fs_variant->prog_data.fs->writes_z_from_fep) { 2837 pipeline->ez_state = V3D_EZ_DISABLED; 2838 } 2839} 2840 2841static bool 2842pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) 2843{ 2844 for (uint8_t i = 0; i < pipeline->va_count; i++) { 2845 if (vk_format_is_int(pipeline->va[i].vk_format)) 2846 return true; 2847 } 2848 return false; 2849} 2850 2851/* @pipeline can be NULL. We assume in that case that all the attributes have 2852 * a float format (we only create an all-float BO once and we reuse it with 2853 * all float pipelines), otherwise we look at the actual type of each 2854 * attribute used with the specific pipeline passed in. 2855 */ 2856struct v3dv_bo * 2857v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device, 2858 struct v3dv_pipeline *pipeline) 2859{ 2860 uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4; 2861 struct v3dv_bo *bo; 2862 2863 bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true); 2864 2865 if (!bo) { 2866 fprintf(stderr, "failed to allocate memory for the default " 2867 "attribute values\n"); 2868 return NULL; 2869 } 2870 2871 bool ok = v3dv_bo_map(device, bo, size); 2872 if (!ok) { 2873 fprintf(stderr, "failed to map default attribute values buffer\n"); 2874 return false; 2875 } 2876 2877 uint32_t *attrs = bo->map; 2878 uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0; 2879 for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) { 2880 attrs[i * 4 + 0] = 0; 2881 attrs[i * 4 + 1] = 0; 2882 attrs[i * 4 + 2] = 0; 2883 VkFormat attr_format = 2884 pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED; 2885 if (i < va_count && vk_format_is_int(attr_format)) { 2886 attrs[i * 4 + 3] = 1; 2887 } else { 2888 attrs[i * 4 + 3] = fui(1.0); 2889 } 2890 } 2891 2892 v3dv_bo_unmap(device, bo); 2893 2894 return bo; 2895} 2896 2897static void 2898pipeline_set_sample_mask(struct v3dv_pipeline *pipeline, 2899 const VkPipelineMultisampleStateCreateInfo *ms_info) 2900{ 2901 pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1; 2902 2903 /* Ignore pSampleMask if we are not enabling multisampling. The hardware 2904 * requires this to be 0xf or 0x0 if using a single sample. 2905 */ 2906 if (ms_info && ms_info->pSampleMask && 2907 ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) { 2908 pipeline->sample_mask &= ms_info->pSampleMask[0]; 2909 } 2910} 2911 2912static void 2913pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline, 2914 const VkPipelineMultisampleStateCreateInfo *ms_info) 2915{ 2916 pipeline->sample_rate_shading = 2917 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT && 2918 ms_info->sampleShadingEnable; 2919} 2920 2921static VkResult 2922pipeline_init(struct v3dv_pipeline *pipeline, 2923 struct v3dv_device *device, 2924 struct v3dv_pipeline_cache *cache, 2925 const VkGraphicsPipelineCreateInfo *pCreateInfo, 2926 const VkAllocationCallbacks *pAllocator) 2927{ 2928 VkResult result = VK_SUCCESS; 2929 2930 pipeline->device = device; 2931 2932 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout); 2933 pipeline->layout = layout; 2934 2935 V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass); 2936 assert(pCreateInfo->subpass < render_pass->subpass_count); 2937 pipeline->pass = render_pass; 2938 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; 2939 2940 const VkPipelineInputAssemblyStateCreateInfo *ia_info = 2941 pCreateInfo->pInputAssemblyState; 2942 pipeline->topology = vk_to_pipe_prim_type[ia_info->topology]; 2943 2944 /* If rasterization is not enabled, various CreateInfo structs must be 2945 * ignored. 2946 */ 2947 const bool raster_enabled = 2948 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 2949 2950 const VkPipelineViewportStateCreateInfo *vp_info = 2951 raster_enabled ? pCreateInfo->pViewportState : NULL; 2952 2953 const VkPipelineDepthStencilStateCreateInfo *ds_info = 2954 raster_enabled ? pCreateInfo->pDepthStencilState : NULL; 2955 2956 const VkPipelineRasterizationStateCreateInfo *rs_info = 2957 raster_enabled ? pCreateInfo->pRasterizationState : NULL; 2958 2959 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info = 2960 rs_info ? vk_find_struct_const( 2961 rs_info->pNext, 2962 PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) : 2963 NULL; 2964 2965 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info = 2966 rs_info ? vk_find_struct_const( 2967 rs_info->pNext, 2968 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) : 2969 NULL; 2970 2971 const VkPipelineColorBlendStateCreateInfo *cb_info = 2972 raster_enabled ? pCreateInfo->pColorBlendState : NULL; 2973 2974 const VkPipelineMultisampleStateCreateInfo *ms_info = 2975 raster_enabled ? pCreateInfo->pMultisampleState : NULL; 2976 2977 const VkPipelineColorWriteCreateInfoEXT *cw_info = 2978 cb_info ? vk_find_struct_const(cb_info->pNext, 2979 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) : 2980 NULL; 2981 2982 pipeline_init_dynamic_state(pipeline, 2983 pCreateInfo->pDynamicState, 2984 vp_info, ds_info, cb_info, rs_info, cw_info); 2985 2986 /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that 2987 * feature and it shouldn't be used by any pipeline. 2988 */ 2989 assert(!ds_info || !ds_info->depthBoundsTestEnable); 2990 2991 v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info, 2992 rs_info, pv_info, ls_info, 2993 ms_info); 2994 2995 enable_depth_bias(pipeline, rs_info); 2996 pipeline_set_sample_mask(pipeline, ms_info); 2997 pipeline_set_sample_rate_shading(pipeline, ms_info); 2998 2999 pipeline->primitive_restart = 3000 pCreateInfo->pInputAssemblyState->primitiveRestartEnable; 3001 3002 result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator); 3003 3004 if (result != VK_SUCCESS) { 3005 /* Caller would already destroy the pipeline, and we didn't allocate any 3006 * extra info. We don't need to do anything else. 3007 */ 3008 return result; 3009 } 3010 3011 const VkPipelineVertexInputStateCreateInfo *vi_info = 3012 pCreateInfo->pVertexInputState; 3013 3014 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info = 3015 vk_find_struct_const(vi_info->pNext, 3016 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 3017 3018 v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info); 3019 3020 if (pipeline_has_integer_vertex_attrib(pipeline)) { 3021 pipeline->default_attribute_values = 3022 v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline); 3023 if (!pipeline->default_attribute_values) 3024 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 3025 } else { 3026 pipeline->default_attribute_values = NULL; 3027 } 3028 3029 /* This must be done after the pipeline has been compiled */ 3030 pipeline_set_ez_state(pipeline, ds_info); 3031 3032 return result; 3033} 3034 3035static VkResult 3036graphics_pipeline_create(VkDevice _device, 3037 VkPipelineCache _cache, 3038 const VkGraphicsPipelineCreateInfo *pCreateInfo, 3039 const VkAllocationCallbacks *pAllocator, 3040 VkPipeline *pPipeline) 3041{ 3042 V3DV_FROM_HANDLE(v3dv_device, device, _device); 3043 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 3044 3045 struct v3dv_pipeline *pipeline; 3046 VkResult result; 3047 3048 /* Use the default pipeline cache if none is specified */ 3049 if (cache == NULL && device->instance->default_pipeline_cache_enabled) 3050 cache = &device->default_pipeline_cache; 3051 3052 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), 3053 VK_OBJECT_TYPE_PIPELINE); 3054 3055 if (pipeline == NULL) 3056 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 3057 3058 result = pipeline_init(pipeline, device, cache, 3059 pCreateInfo, 3060 pAllocator); 3061 3062 if (result != VK_SUCCESS) { 3063 v3dv_destroy_pipeline(pipeline, device, pAllocator); 3064 if (result == VK_PIPELINE_COMPILE_REQUIRED) 3065 *pPipeline = VK_NULL_HANDLE; 3066 return result; 3067 } 3068 3069 *pPipeline = v3dv_pipeline_to_handle(pipeline); 3070 3071 return VK_SUCCESS; 3072} 3073 3074VKAPI_ATTR VkResult VKAPI_CALL 3075v3dv_CreateGraphicsPipelines(VkDevice _device, 3076 VkPipelineCache pipelineCache, 3077 uint32_t count, 3078 const VkGraphicsPipelineCreateInfo *pCreateInfos, 3079 const VkAllocationCallbacks *pAllocator, 3080 VkPipeline *pPipelines) 3081{ 3082 V3DV_FROM_HANDLE(v3dv_device, device, _device); 3083 VkResult result = VK_SUCCESS; 3084 3085 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 3086 mtx_lock(&device->pdevice->mutex); 3087 3088 uint32_t i = 0; 3089 for (; i < count; i++) { 3090 VkResult local_result; 3091 3092 local_result = graphics_pipeline_create(_device, 3093 pipelineCache, 3094 &pCreateInfos[i], 3095 pAllocator, 3096 &pPipelines[i]); 3097 3098 if (local_result != VK_SUCCESS) { 3099 result = local_result; 3100 pPipelines[i] = VK_NULL_HANDLE; 3101 3102 if (pCreateInfos[i].flags & 3103 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 3104 break; 3105 } 3106 } 3107 3108 for (; i < count; i++) 3109 pPipelines[i] = VK_NULL_HANDLE; 3110 3111 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 3112 mtx_unlock(&device->pdevice->mutex); 3113 3114 return result; 3115} 3116 3117static void 3118shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 3119{ 3120 assert(glsl_type_is_vector_or_scalar(type)); 3121 3122 uint32_t comp_size = glsl_type_is_boolean(type) 3123 ? 4 : glsl_get_bit_size(type) / 8; 3124 unsigned length = glsl_get_vector_elements(type); 3125 *size = comp_size * length, 3126 *align = comp_size * (length == 3 ? 4 : length); 3127} 3128 3129static void 3130lower_cs_shared(struct nir_shader *nir) 3131{ 3132 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, 3133 nir_var_mem_shared, shared_type_info); 3134 NIR_PASS(_, nir, nir_lower_explicit_io, 3135 nir_var_mem_shared, nir_address_format_32bit_offset); 3136} 3137 3138static VkResult 3139pipeline_compile_compute(struct v3dv_pipeline *pipeline, 3140 struct v3dv_pipeline_cache *cache, 3141 const VkComputePipelineCreateInfo *info, 3142 const VkAllocationCallbacks *alloc) 3143{ 3144 VkPipelineCreationFeedback pipeline_feedback = { 3145 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 3146 }; 3147 int64_t pipeline_start = os_time_get_nano(); 3148 3149 struct v3dv_device *device = pipeline->device; 3150 struct v3dv_physical_device *physical_device = 3151 &device->instance->physicalDevice; 3152 3153 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage; 3154 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 3155 3156 struct v3dv_pipeline_stage *p_stage = 3157 vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8, 3158 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 3159 if (!p_stage) 3160 return VK_ERROR_OUT_OF_HOST_MEMORY; 3161 3162 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); 3163 p_stage->pipeline = pipeline; 3164 p_stage->stage = gl_shader_stage_to_broadcom(stage); 3165 p_stage->entrypoint = sinfo->pName; 3166 p_stage->module = vk_shader_module_from_handle(sinfo->module); 3167 p_stage->spec_info = sinfo->pSpecializationInfo; 3168 p_stage->feedback = (VkPipelineCreationFeedback) { 0 }; 3169 3170 vk_pipeline_hash_shader_stage(&info->stage, p_stage->shader_sha1); 3171 3172 p_stage->nir = NULL; 3173 3174 pipeline->cs = p_stage; 3175 pipeline->active_stages |= sinfo->stage; 3176 3177 /* First we try to get the variants from the pipeline cache (unless we are 3178 * required to capture internal representations, since in that case we need 3179 * compile). 3180 */ 3181 bool needs_executable_info = 3182 info->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; 3183 if (!needs_executable_info) { 3184 struct v3dv_pipeline_key pipeline_key; 3185 pipeline_populate_compute_key(pipeline, &pipeline_key, info); 3186 pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1); 3187 3188 bool cache_hit = false; 3189 pipeline->shared_data = 3190 v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit); 3191 3192 if (pipeline->shared_data != NULL) { 3193 assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); 3194 if (cache_hit && cache != &pipeline->device->default_pipeline_cache) 3195 pipeline_feedback.flags |= 3196 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 3197 3198 goto success; 3199 } 3200 } 3201 3202 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) 3203 return VK_PIPELINE_COMPILE_REQUIRED; 3204 3205 pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1, 3206 pipeline, 3207 false); 3208 if (!pipeline->shared_data) 3209 return VK_ERROR_OUT_OF_HOST_MEMORY; 3210 3211 p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 3212 3213 /* If not found on cache, compile it */ 3214 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); 3215 assert(p_stage->nir); 3216 3217 nir_optimize(p_stage->nir, false); 3218 pipeline_lower_nir(pipeline, p_stage, pipeline->layout); 3219 lower_cs_shared(p_stage->nir); 3220 3221 VkResult result = VK_SUCCESS; 3222 3223 struct v3d_key key; 3224 memset(&key, 0, sizeof(key)); 3225 pipeline_populate_v3d_key(&key, p_stage, 0, 3226 pipeline->device->features.robustBufferAccess); 3227 pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] = 3228 pipeline_compile_shader_variant(p_stage, &key, sizeof(key), 3229 alloc, &result); 3230 3231 if (result != VK_SUCCESS) 3232 return result; 3233 3234 if (!upload_assembly(pipeline)) 3235 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 3236 3237 v3dv_pipeline_cache_upload_pipeline(pipeline, cache); 3238 3239success: 3240 3241 pipeline_check_buffer_device_address(pipeline); 3242 3243 pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 3244 write_creation_feedback(pipeline, 3245 info->pNext, 3246 &pipeline_feedback, 3247 1, 3248 &info->stage); 3249 3250 /* As we got the variants in pipeline->shared_data, after compiling we 3251 * don't need the pipeline_stages. 3252 */ 3253 if (!needs_executable_info) 3254 pipeline_free_stages(device, pipeline, alloc); 3255 3256 pipeline_check_spill_size(pipeline); 3257 3258 return VK_SUCCESS; 3259} 3260 3261static VkResult 3262compute_pipeline_init(struct v3dv_pipeline *pipeline, 3263 struct v3dv_device *device, 3264 struct v3dv_pipeline_cache *cache, 3265 const VkComputePipelineCreateInfo *info, 3266 const VkAllocationCallbacks *alloc) 3267{ 3268 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout); 3269 3270 pipeline->device = device; 3271 pipeline->layout = layout; 3272 3273 VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc); 3274 3275 return result; 3276} 3277 3278static VkResult 3279compute_pipeline_create(VkDevice _device, 3280 VkPipelineCache _cache, 3281 const VkComputePipelineCreateInfo *pCreateInfo, 3282 const VkAllocationCallbacks *pAllocator, 3283 VkPipeline *pPipeline) 3284{ 3285 V3DV_FROM_HANDLE(v3dv_device, device, _device); 3286 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 3287 3288 struct v3dv_pipeline *pipeline; 3289 VkResult result; 3290 3291 /* Use the default pipeline cache if none is specified */ 3292 if (cache == NULL && device->instance->default_pipeline_cache_enabled) 3293 cache = &device->default_pipeline_cache; 3294 3295 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), 3296 VK_OBJECT_TYPE_PIPELINE); 3297 if (pipeline == NULL) 3298 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 3299 3300 result = compute_pipeline_init(pipeline, device, cache, 3301 pCreateInfo, pAllocator); 3302 if (result != VK_SUCCESS) { 3303 v3dv_destroy_pipeline(pipeline, device, pAllocator); 3304 if (result == VK_PIPELINE_COMPILE_REQUIRED) 3305 *pPipeline = VK_NULL_HANDLE; 3306 return result; 3307 } 3308 3309 *pPipeline = v3dv_pipeline_to_handle(pipeline); 3310 3311 return VK_SUCCESS; 3312} 3313 3314VKAPI_ATTR VkResult VKAPI_CALL 3315v3dv_CreateComputePipelines(VkDevice _device, 3316 VkPipelineCache pipelineCache, 3317 uint32_t createInfoCount, 3318 const VkComputePipelineCreateInfo *pCreateInfos, 3319 const VkAllocationCallbacks *pAllocator, 3320 VkPipeline *pPipelines) 3321{ 3322 V3DV_FROM_HANDLE(v3dv_device, device, _device); 3323 VkResult result = VK_SUCCESS; 3324 3325 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 3326 mtx_lock(&device->pdevice->mutex); 3327 3328 uint32_t i = 0; 3329 for (; i < createInfoCount; i++) { 3330 VkResult local_result; 3331 local_result = compute_pipeline_create(_device, 3332 pipelineCache, 3333 &pCreateInfos[i], 3334 pAllocator, 3335 &pPipelines[i]); 3336 3337 if (local_result != VK_SUCCESS) { 3338 result = local_result; 3339 pPipelines[i] = VK_NULL_HANDLE; 3340 3341 if (pCreateInfos[i].flags & 3342 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 3343 break; 3344 } 3345 } 3346 3347 for (; i < createInfoCount; i++) 3348 pPipelines[i] = VK_NULL_HANDLE; 3349 3350 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) 3351 mtx_unlock(&device->pdevice->mutex); 3352 3353 return result; 3354} 3355 3356static nir_shader * 3357pipeline_get_nir(struct v3dv_pipeline *pipeline, 3358 enum broadcom_shader_stage stage) 3359{ 3360 switch (stage) { 3361 case BROADCOM_SHADER_VERTEX: 3362 if (pipeline->vs) 3363 return pipeline->vs->nir; 3364 break; 3365 case BROADCOM_SHADER_VERTEX_BIN: 3366 if(pipeline->vs_bin) 3367 return pipeline->vs_bin->nir; 3368 break; 3369 case BROADCOM_SHADER_GEOMETRY: 3370 if(pipeline->gs) 3371 return pipeline->gs->nir; 3372 break; 3373 case BROADCOM_SHADER_GEOMETRY_BIN: 3374 if (pipeline->gs_bin) 3375 return pipeline->gs_bin->nir; 3376 break; 3377 case BROADCOM_SHADER_FRAGMENT: 3378 if (pipeline->fs) 3379 return pipeline->fs->nir; 3380 break; 3381 case BROADCOM_SHADER_COMPUTE: 3382 if(pipeline->cs) 3383 return pipeline->cs->nir; 3384 break; 3385 default: 3386 unreachable("Unsupported shader stage"); 3387 } 3388 3389 return NULL; 3390} 3391 3392static struct v3d_prog_data * 3393pipeline_get_prog_data(struct v3dv_pipeline *pipeline, 3394 enum broadcom_shader_stage stage) 3395{ 3396 if (pipeline->shared_data->variants[stage]) 3397 return pipeline->shared_data->variants[stage]->prog_data.base; 3398 return NULL; 3399} 3400 3401static uint64_t * 3402pipeline_get_qpu(struct v3dv_pipeline *pipeline, 3403 enum broadcom_shader_stage stage, 3404 uint32_t *qpu_size) 3405{ 3406 struct v3dv_shader_variant *variant = 3407 pipeline->shared_data->variants[stage]; 3408 if (!variant) { 3409 *qpu_size = 0; 3410 return NULL; 3411 } 3412 3413 /* We expect the QPU BO to have been mapped before calling here */ 3414 struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo; 3415 assert(qpu_bo && qpu_bo->map_size >= variant->assembly_offset + 3416 variant->qpu_insts_size); 3417 3418 *qpu_size = variant->qpu_insts_size; 3419 uint64_t *qpu = (uint64_t *) 3420 (((uint8_t *) qpu_bo->map) + variant->assembly_offset); 3421 return qpu; 3422} 3423 3424/* FIXME: we use the same macro in various drivers, maybe move it to 3425 * the comon vk_util.h? 3426 */ 3427#define WRITE_STR(field, ...) ({ \ 3428 memset(field, 0, sizeof(field)); \ 3429 UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \ 3430 assert(_i > 0 && _i < sizeof(field)); \ 3431}) 3432 3433static bool 3434write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, 3435 const char *data) 3436{ 3437 ir->isText = VK_TRUE; 3438 3439 size_t data_len = strlen(data) + 1; 3440 3441 if (ir->pData == NULL) { 3442 ir->dataSize = data_len; 3443 return true; 3444 } 3445 3446 strncpy(ir->pData, data, ir->dataSize); 3447 if (ir->dataSize < data_len) 3448 return false; 3449 3450 ir->dataSize = data_len; 3451 return true; 3452} 3453 3454static void 3455append(char **str, size_t *offset, const char *fmt, ...) 3456{ 3457 va_list args; 3458 va_start(args, fmt); 3459 ralloc_vasprintf_rewrite_tail(str, offset, fmt, args); 3460 va_end(args); 3461} 3462 3463static void 3464pipeline_collect_executable_data(struct v3dv_pipeline *pipeline) 3465{ 3466 if (pipeline->executables.mem_ctx) 3467 return; 3468 3469 pipeline->executables.mem_ctx = ralloc_context(NULL); 3470 util_dynarray_init(&pipeline->executables.data, 3471 pipeline->executables.mem_ctx); 3472 3473 /* Don't crash for failed/bogus pipelines */ 3474 if (!pipeline->shared_data || !pipeline->shared_data->assembly_bo) 3475 return; 3476 3477 /* Map the assembly BO so we can read the pipeline's QPU code */ 3478 struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo; 3479 3480 if (!v3dv_bo_map(pipeline->device, qpu_bo, qpu_bo->size)) { 3481 fprintf(stderr, "failed to map QPU buffer\n"); 3482 return; 3483 } 3484 3485 for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) { 3486 VkShaderStageFlags vk_stage = 3487 mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s)); 3488 if (!(vk_stage & pipeline->active_stages)) 3489 continue; 3490 3491 nir_shader *nir = pipeline_get_nir(pipeline, s); 3492 char *nir_str = nir ? 3493 nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL; 3494 3495 char *qpu_str = NULL; 3496 uint32_t qpu_size; 3497 uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size); 3498 if (qpu) { 3499 uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t); 3500 qpu_str = rzalloc_size(pipeline->executables.mem_ctx, 3501 qpu_inst_count * 96); 3502 size_t offset = 0; 3503 for (int i = 0; i < qpu_inst_count; i++) { 3504 const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]); 3505 append(&qpu_str, &offset, "%s\n", str); 3506 ralloc_free((void *)str); 3507 } 3508 } 3509 3510 struct v3dv_pipeline_executable_data data = { 3511 .stage = s, 3512 .nir_str = nir_str, 3513 .qpu_str = qpu_str, 3514 }; 3515 util_dynarray_append(&pipeline->executables.data, 3516 struct v3dv_pipeline_executable_data, data); 3517 } 3518 3519 v3dv_bo_unmap(pipeline->device, qpu_bo); 3520} 3521 3522static const struct v3dv_pipeline_executable_data * 3523pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index) 3524{ 3525 assert(index < util_dynarray_num_elements(&pipeline->executables.data, 3526 struct v3dv_pipeline_executable_data)); 3527 return util_dynarray_element(&pipeline->executables.data, 3528 struct v3dv_pipeline_executable_data, 3529 index); 3530} 3531 3532VKAPI_ATTR VkResult VKAPI_CALL 3533v3dv_GetPipelineExecutableInternalRepresentationsKHR( 3534 VkDevice device, 3535 const VkPipelineExecutableInfoKHR *pExecutableInfo, 3536 uint32_t *pInternalRepresentationCount, 3537 VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations) 3538{ 3539 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline); 3540 3541 pipeline_collect_executable_data(pipeline); 3542 3543 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out, 3544 pInternalRepresentations, pInternalRepresentationCount); 3545 3546 bool incomplete = false; 3547 const struct v3dv_pipeline_executable_data *exe = 3548 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3549 3550 if (exe->nir_str) { 3551 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, 3552 &out, ir) { 3553 WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage)); 3554 WRITE_STR(ir->description, "Final NIR form"); 3555 if (!write_ir_text(ir, exe->nir_str)) 3556 incomplete = true; 3557 } 3558 } 3559 3560 if (exe->qpu_str) { 3561 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, 3562 &out, ir) { 3563 WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage)); 3564 WRITE_STR(ir->description, "Final QPU assembly"); 3565 if (!write_ir_text(ir, exe->qpu_str)) 3566 incomplete = true; 3567 } 3568 } 3569 3570 return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out); 3571} 3572 3573VKAPI_ATTR VkResult VKAPI_CALL 3574v3dv_GetPipelineExecutablePropertiesKHR( 3575 VkDevice device, 3576 const VkPipelineInfoKHR *pPipelineInfo, 3577 uint32_t *pExecutableCount, 3578 VkPipelineExecutablePropertiesKHR *pProperties) 3579{ 3580 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline); 3581 3582 pipeline_collect_executable_data(pipeline); 3583 3584 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, 3585 pProperties, pExecutableCount); 3586 3587 util_dynarray_foreach(&pipeline->executables.data, 3588 struct v3dv_pipeline_executable_data, exe) { 3589 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { 3590 gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage); 3591 props->stages = mesa_to_vk_shader_stage(mesa_stage); 3592 3593 WRITE_STR(props->name, "%s (%s)", 3594 _mesa_shader_stage_to_abbrev(mesa_stage), 3595 broadcom_shader_stage_is_binning(exe->stage) ? 3596 "Binning" : "Render"); 3597 3598 WRITE_STR(props->description, "%s", 3599 _mesa_shader_stage_to_string(mesa_stage)); 3600 3601 props->subgroupSize = V3D_CHANNELS; 3602 } 3603 } 3604 3605 return vk_outarray_status(&out); 3606} 3607 3608VKAPI_ATTR VkResult VKAPI_CALL 3609v3dv_GetPipelineExecutableStatisticsKHR( 3610 VkDevice device, 3611 const VkPipelineExecutableInfoKHR *pExecutableInfo, 3612 uint32_t *pStatisticCount, 3613 VkPipelineExecutableStatisticKHR *pStatistics) 3614{ 3615 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline); 3616 3617 pipeline_collect_executable_data(pipeline); 3618 3619 const struct v3dv_pipeline_executable_data *exe = 3620 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3621 3622 struct v3d_prog_data *prog_data = 3623 pipeline_get_prog_data(pipeline, exe->stage); 3624 3625 struct v3dv_shader_variant *variant = 3626 pipeline->shared_data->variants[exe->stage]; 3627 uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t); 3628 3629 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, 3630 pStatistics, pStatisticCount); 3631 3632 if (qpu_inst_count > 0) { 3633 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3634 WRITE_STR(stat->name, "Compile Strategy"); 3635 WRITE_STR(stat->description, "Chosen compile strategy index"); 3636 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3637 stat->value.u64 = prog_data->compile_strategy_idx; 3638 } 3639 3640 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3641 WRITE_STR(stat->name, "Instruction Count"); 3642 WRITE_STR(stat->description, "Number of QPU instructions"); 3643 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3644 stat->value.u64 = qpu_inst_count; 3645 } 3646 3647 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3648 WRITE_STR(stat->name, "Thread Count"); 3649 WRITE_STR(stat->description, "Number of QPU threads dispatched"); 3650 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3651 stat->value.u64 = prog_data->threads; 3652 } 3653 3654 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3655 WRITE_STR(stat->name, "Spill Size"); 3656 WRITE_STR(stat->description, "Size of the spill buffer in bytes"); 3657 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3658 stat->value.u64 = prog_data->spill_size; 3659 } 3660 3661 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3662 WRITE_STR(stat->name, "TMU Spills"); 3663 WRITE_STR(stat->description, "Number of times a register was spilled " 3664 "to memory"); 3665 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3666 stat->value.u64 = prog_data->spill_size; 3667 } 3668 3669 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3670 WRITE_STR(stat->name, "TMU Fills"); 3671 WRITE_STR(stat->description, "Number of times a register was filled " 3672 "from memory"); 3673 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3674 stat->value.u64 = prog_data->spill_size; 3675 } 3676 3677 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 3678 WRITE_STR(stat->name, "QPU Read Stalls"); 3679 WRITE_STR(stat->description, "Number of cycles the QPU stalls for a " 3680 "register read dependency"); 3681 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3682 stat->value.u64 = prog_data->qpu_read_stalls; 3683 } 3684 } 3685 3686 return vk_outarray_status(&out); 3687} 3688