1/* 2 * Copyright © 2019 Red Hat. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "lvp_private.h" 25#include "vk_pipeline.h" 26#include "vk_render_pass.h" 27#include "vk_util.h" 28#include "glsl_types.h" 29#include "util/os_time.h" 30#include "spirv/nir_spirv.h" 31#include "nir/nir_builder.h" 32#include "lvp_lower_vulkan_resource.h" 33#include "pipe/p_state.h" 34#include "pipe/p_context.h" 35#include "tgsi/tgsi_from_mesa.h" 36#include "nir/nir_xfb_info.h" 37 38#define SPIR_V_MAGIC_NUMBER 0x07230203 39 40#define LVP_PIPELINE_DUP(dst, src, type, count) do { \ 41 type *temp = ralloc_array(mem_ctx, type, count); \ 42 if (!temp) return VK_ERROR_OUT_OF_HOST_MEMORY; \ 43 memcpy(temp, (src), sizeof(type) * count); \ 44 dst = temp; \ 45 } while(0) 46 47void 48lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline) 49{ 50 if (pipeline->shader_cso[PIPE_SHADER_VERTEX]) 51 device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]); 52 if (pipeline->shader_cso[PIPE_SHADER_FRAGMENT]) 53 device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]); 54 if (pipeline->shader_cso[PIPE_SHADER_GEOMETRY]) 55 device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]); 56 if (pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]) 57 device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]); 58 if (pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]) 59 device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]); 60 if (pipeline->shader_cso[PIPE_SHADER_COMPUTE]) 61 device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]); 62 63 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) 64 ralloc_free(pipeline->pipeline_nir[i]); 65 66 if (pipeline->layout) 67 vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk); 68 69 ralloc_free(pipeline->mem_ctx); 70 vk_free(&device->vk.alloc, pipeline->state_data); 71 vk_object_base_finish(&pipeline->base); 72 vk_free(&device->vk.alloc, pipeline); 73} 74 75VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline( 76 VkDevice _device, 77 VkPipeline _pipeline, 78 const VkAllocationCallbacks* pAllocator) 79{ 80 LVP_FROM_HANDLE(lvp_device, device, _device); 81 LVP_FROM_HANDLE(lvp_pipeline, pipeline, _pipeline); 82 83 if (!_pipeline) 84 return; 85 86 simple_mtx_lock(&device->queue.pipeline_lock); 87 util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline); 88 simple_mtx_unlock(&device->queue.pipeline_lock); 89} 90 91static inline unsigned 92st_shader_stage_to_ptarget(gl_shader_stage stage) 93{ 94 switch (stage) { 95 case MESA_SHADER_VERTEX: 96 return PIPE_SHADER_VERTEX; 97 case MESA_SHADER_FRAGMENT: 98 return PIPE_SHADER_FRAGMENT; 99 case MESA_SHADER_GEOMETRY: 100 return PIPE_SHADER_GEOMETRY; 101 case MESA_SHADER_TESS_CTRL: 102 return PIPE_SHADER_TESS_CTRL; 103 case MESA_SHADER_TESS_EVAL: 104 return PIPE_SHADER_TESS_EVAL; 105 case MESA_SHADER_COMPUTE: 106 return PIPE_SHADER_COMPUTE; 107 default: 108 break; 109 } 110 111 assert(!"should not be reached"); 112 return PIPE_SHADER_VERTEX; 113} 114 115static void 116shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) 117{ 118 assert(glsl_type_is_vector_or_scalar(type)); 119 120 uint32_t comp_size = glsl_type_is_boolean(type) 121 ? 4 : glsl_get_bit_size(type) / 8; 122 unsigned length = glsl_get_vector_elements(type); 123 *size = comp_size * length, 124 *align = comp_size; 125} 126 127static void 128set_image_access(struct lvp_pipeline *pipeline, nir_shader *nir, 129 nir_intrinsic_instr *instr, 130 bool reads, bool writes) 131{ 132 nir_variable *var = nir_intrinsic_get_var(instr, 0); 133 /* calculate the variable's offset in the layout */ 134 uint64_t value = 0; 135 const struct lvp_descriptor_set_binding_layout *binding = 136 get_binding_layout(pipeline->layout, var->data.descriptor_set, var->data.binding); 137 for (unsigned s = 0; s < var->data.descriptor_set; s++) { 138 if (pipeline->layout->vk.set_layouts[s]) 139 value += get_set_layout(pipeline->layout, s)->stage[nir->info.stage].image_count; 140 } 141 value += binding->stage[nir->info.stage].image_index; 142 const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; 143 uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value; 144 145 if (reads) 146 pipeline->access[nir->info.stage].images_read |= mask; 147 if (writes) 148 pipeline->access[nir->info.stage].images_written |= mask; 149} 150 151static void 152set_buffer_access(struct lvp_pipeline *pipeline, nir_shader *nir, 153 nir_intrinsic_instr *instr) 154{ 155 nir_variable *var = nir_intrinsic_get_var(instr, 0); 156 if (!var) { 157 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 158 if (deref->modes != nir_var_mem_ssbo) 159 return; 160 nir_binding b = nir_chase_binding(instr->src[0]); 161 var = nir_get_binding_variable(nir, b); 162 if (!var) 163 return; 164 } 165 if (var->data.mode != nir_var_mem_ssbo) 166 return; 167 /* calculate the variable's offset in the layout */ 168 uint64_t value = 0; 169 const struct lvp_descriptor_set_binding_layout *binding = 170 get_binding_layout(pipeline->layout, var->data.descriptor_set, var->data.binding); 171 for (unsigned s = 0; s < var->data.descriptor_set; s++) { 172 if (pipeline->layout->vk.set_layouts[s]) 173 value += get_set_layout(pipeline->layout, s)->stage[nir->info.stage].shader_buffer_count; 174 } 175 value += binding->stage[nir->info.stage].shader_buffer_index; 176 /* Structs have been lowered already, so get_aoa_size is sufficient. */ 177 const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; 178 uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value; 179 pipeline->access[nir->info.stage].buffers_written |= mask; 180} 181 182static void 183scan_intrinsic(struct lvp_pipeline *pipeline, nir_shader *nir, nir_intrinsic_instr *instr) 184{ 185 switch (instr->intrinsic) { 186 case nir_intrinsic_image_deref_sparse_load: 187 case nir_intrinsic_image_deref_load: 188 case nir_intrinsic_image_deref_size: 189 case nir_intrinsic_image_deref_samples: 190 set_image_access(pipeline, nir, instr, true, false); 191 break; 192 case nir_intrinsic_image_deref_store: 193 set_image_access(pipeline, nir, instr, false, true); 194 break; 195 case nir_intrinsic_image_deref_atomic_add: 196 case nir_intrinsic_image_deref_atomic_imin: 197 case nir_intrinsic_image_deref_atomic_umin: 198 case nir_intrinsic_image_deref_atomic_imax: 199 case nir_intrinsic_image_deref_atomic_umax: 200 case nir_intrinsic_image_deref_atomic_and: 201 case nir_intrinsic_image_deref_atomic_or: 202 case nir_intrinsic_image_deref_atomic_xor: 203 case nir_intrinsic_image_deref_atomic_exchange: 204 case nir_intrinsic_image_deref_atomic_comp_swap: 205 case nir_intrinsic_image_deref_atomic_fadd: 206 set_image_access(pipeline, nir, instr, true, true); 207 break; 208 case nir_intrinsic_deref_atomic_add: 209 case nir_intrinsic_deref_atomic_and: 210 case nir_intrinsic_deref_atomic_comp_swap: 211 case nir_intrinsic_deref_atomic_exchange: 212 case nir_intrinsic_deref_atomic_fadd: 213 case nir_intrinsic_deref_atomic_fcomp_swap: 214 case nir_intrinsic_deref_atomic_fmax: 215 case nir_intrinsic_deref_atomic_fmin: 216 case nir_intrinsic_deref_atomic_imax: 217 case nir_intrinsic_deref_atomic_imin: 218 case nir_intrinsic_deref_atomic_or: 219 case nir_intrinsic_deref_atomic_umax: 220 case nir_intrinsic_deref_atomic_umin: 221 case nir_intrinsic_deref_atomic_xor: 222 case nir_intrinsic_store_deref: 223 set_buffer_access(pipeline, nir, instr); 224 break; 225 default: break; 226 } 227} 228 229static void 230scan_pipeline_info(struct lvp_pipeline *pipeline, nir_shader *nir) 231{ 232 nir_foreach_function(function, nir) { 233 if (function->impl) 234 nir_foreach_block(block, function->impl) { 235 nir_foreach_instr(instr, block) { 236 if (instr->type == nir_instr_type_intrinsic) 237 scan_intrinsic(pipeline, nir, nir_instr_as_intrinsic(instr)); 238 } 239 } 240 } 241 242} 243 244static bool 245remove_scoped_barriers_impl(nir_builder *b, nir_instr *instr, void *data) 246{ 247 if (instr->type != nir_instr_type_intrinsic) 248 return false; 249 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 250 if (intr->intrinsic != nir_intrinsic_scoped_barrier) 251 return false; 252 if (data) { 253 if (nir_intrinsic_memory_scope(intr) == NIR_SCOPE_WORKGROUP || 254 nir_intrinsic_memory_scope(intr) == NIR_SCOPE_DEVICE) 255 return false; 256 } 257 nir_instr_remove(instr); 258 return true; 259} 260 261static bool 262remove_scoped_barriers(nir_shader *nir, bool is_compute) 263{ 264 return nir_shader_instructions_pass(nir, remove_scoped_barriers_impl, nir_metadata_dominance, (void*)is_compute); 265} 266 267static bool 268lower_demote_impl(nir_builder *b, nir_instr *instr, void *data) 269{ 270 if (instr->type != nir_instr_type_intrinsic) 271 return false; 272 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 273 if (intr->intrinsic == nir_intrinsic_demote || intr->intrinsic == nir_intrinsic_terminate) { 274 intr->intrinsic = nir_intrinsic_discard; 275 return true; 276 } 277 if (intr->intrinsic == nir_intrinsic_demote_if || intr->intrinsic == nir_intrinsic_terminate_if) { 278 intr->intrinsic = nir_intrinsic_discard_if; 279 return true; 280 } 281 return false; 282} 283 284static bool 285lower_demote(nir_shader *nir) 286{ 287 return nir_shader_instructions_pass(nir, lower_demote_impl, nir_metadata_dominance, NULL); 288} 289 290static bool 291find_tex(const nir_instr *instr, const void *data_cb) 292{ 293 if (instr->type == nir_instr_type_tex) 294 return true; 295 return false; 296} 297 298static nir_ssa_def * 299fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb) 300{ 301 nir_tex_instr *tex_instr = nir_instr_as_tex(instr); 302 unsigned offset = 0; 303 304 int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset); 305 if (idx == -1) 306 return NULL; 307 308 if (!nir_src_is_const(tex_instr->src[idx].src)) 309 return NULL; 310 offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0); 311 312 nir_tex_instr_remove_src(tex_instr, idx); 313 tex_instr->texture_index += offset; 314 return NIR_LOWER_INSTR_PROGRESS; 315} 316 317static bool 318lvp_nir_fixup_indirect_tex(nir_shader *shader) 319{ 320 return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL); 321} 322 323static void 324optimize(nir_shader *nir) 325{ 326 bool progress = false; 327 do { 328 progress = false; 329 330 NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true); 331 NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp); 332 NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp); 333 NIR_PASS(progress, nir, nir_opt_deref); 334 NIR_PASS(progress, nir, nir_lower_vars_to_ssa); 335 336 NIR_PASS(progress, nir, nir_opt_copy_prop_vars); 337 338 NIR_PASS(progress, nir, nir_copy_prop); 339 NIR_PASS(progress, nir, nir_opt_dce); 340 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); 341 342 NIR_PASS(progress, nir, nir_opt_algebraic); 343 NIR_PASS(progress, nir, nir_opt_constant_folding); 344 345 NIR_PASS(progress, nir, nir_opt_remove_phis); 346 bool trivial_continues = false; 347 NIR_PASS(trivial_continues, nir, nir_opt_trivial_continues); 348 progress |= trivial_continues; 349 if (trivial_continues) { 350 /* If nir_opt_trivial_continues makes progress, then we need to clean 351 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll 352 * to make progress. 353 */ 354 NIR_PASS(progress, nir, nir_copy_prop); 355 NIR_PASS(progress, nir, nir_opt_dce); 356 NIR_PASS(progress, nir, nir_opt_remove_phis); 357 } 358 NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false); 359 NIR_PASS(progress, nir, nir_opt_dead_cf); 360 NIR_PASS(progress, nir, nir_opt_conditional_discard); 361 NIR_PASS(progress, nir, nir_opt_remove_phis); 362 NIR_PASS(progress, nir, nir_opt_cse); 363 NIR_PASS(progress, nir, nir_opt_undef); 364 365 NIR_PASS(progress, nir, nir_opt_deref); 366 NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); 367 NIR_PASS(progress, nir, nir_opt_loop_unroll); 368 NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex); 369 } while (progress); 370} 371 372void 373lvp_shader_optimize(nir_shader *nir) 374{ 375 optimize(nir); 376 NIR_PASS_V(nir, nir_lower_var_copies); 377 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); 378 NIR_PASS_V(nir, nir_opt_dce); 379 nir_sweep(nir); 380} 381 382static VkResult 383lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, 384 const VkPipelineShaderStageCreateInfo *sinfo) 385{ 386 struct lvp_device *pdevice = pipeline->device; 387 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 388 const nir_shader_compiler_options *drv_options = pdevice->pscreen->get_compiler_options(pipeline->device->pscreen, PIPE_SHADER_IR_NIR, st_shader_stage_to_ptarget(stage)); 389 VkResult result; 390 nir_shader *nir; 391 392 const struct spirv_to_nir_options spirv_options = { 393 .environment = NIR_SPIRV_VULKAN, 394 .caps = { 395 .float64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_DOUBLES) == 1), 396 .int16 = true, 397 .int64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_INT64) == 1), 398 .tessellation = true, 399 .float_controls = true, 400 .image_ms_array = true, 401 .image_read_without_format = true, 402 .image_write_without_format = true, 403 .storage_image_ms = true, 404 .geometry_streams = true, 405 .storage_8bit = true, 406 .storage_16bit = true, 407 .variable_pointers = true, 408 .stencil_export = true, 409 .post_depth_coverage = true, 410 .transform_feedback = true, 411 .device_group = true, 412 .draw_parameters = true, 413 .shader_viewport_index_layer = true, 414 .multiview = true, 415 .physical_storage_buffer_address = true, 416 .int64_atomics = true, 417 .subgroup_arithmetic = true, 418 .subgroup_basic = true, 419 .subgroup_ballot = true, 420 .subgroup_quad = true, 421#if LLVM_VERSION_MAJOR >= 10 422 .subgroup_shuffle = true, 423#endif 424 .subgroup_vote = true, 425 .vk_memory_model = true, 426 .vk_memory_model_device_scope = true, 427 .int8 = true, 428 .float16 = true, 429 .demote_to_helper_invocation = true, 430 }, 431 .ubo_addr_format = nir_address_format_32bit_index_offset, 432 .ssbo_addr_format = nir_address_format_32bit_index_offset, 433 .phys_ssbo_addr_format = nir_address_format_64bit_global, 434 .push_const_addr_format = nir_address_format_logical, 435 .shared_addr_format = nir_address_format_32bit_offset, 436 }; 437 438 result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, sinfo, 439 &spirv_options, drv_options, 440 NULL, &nir); 441 if (result != VK_SUCCESS) 442 return result; 443 444 if (nir->info.stage != MESA_SHADER_TESS_CTRL) 445 NIR_PASS_V(nir, remove_scoped_barriers, nir->info.stage == MESA_SHADER_COMPUTE); 446 447 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { 448 .frag_coord = true, 449 .point_coord = true, 450 }; 451 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); 452 453 struct nir_lower_subgroups_options subgroup_opts = {0}; 454 subgroup_opts.lower_quad = true; 455 subgroup_opts.ballot_components = 1; 456 subgroup_opts.ballot_bit_size = 32; 457 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts); 458 459 if (stage == MESA_SHADER_FRAGMENT) 460 lvp_lower_input_attachments(nir, false); 461 NIR_PASS_V(nir, nir_lower_is_helper_invocation); 462 NIR_PASS_V(nir, lower_demote); 463 NIR_PASS_V(nir, nir_lower_system_values); 464 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); 465 466 NIR_PASS_V(nir, nir_remove_dead_variables, 467 nir_var_uniform | nir_var_image, NULL); 468 469 scan_pipeline_info(pipeline, nir); 470 471 optimize(nir); 472 lvp_lower_pipeline_layout(pipeline->device, pipeline->layout, nir); 473 474 NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true); 475 NIR_PASS_V(nir, nir_split_var_copies); 476 NIR_PASS_V(nir, nir_lower_global_vars_to_local); 477 478 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const, 479 nir_address_format_32bit_offset); 480 481 NIR_PASS_V(nir, nir_lower_explicit_io, 482 nir_var_mem_ubo | nir_var_mem_ssbo, 483 nir_address_format_32bit_index_offset); 484 485 NIR_PASS_V(nir, nir_lower_explicit_io, 486 nir_var_mem_global, 487 nir_address_format_64bit_global); 488 489 if (nir->info.stage == MESA_SHADER_COMPUTE) { 490 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); 491 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); 492 } 493 494 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 495 496 if (nir->info.stage == MESA_SHADER_VERTEX || 497 nir->info.stage == MESA_SHADER_GEOMETRY) { 498 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 499 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { 500 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true); 501 } 502 503 // TODO: also optimize the tex srcs. see radeonSI for reference */ 504 /* Skip if there are potentially conflicting rounding modes */ 505 struct nir_fold_16bit_tex_image_options fold_16bit_options = { 506 .rounding_mode = nir_rounding_mode_undef, 507 .fold_tex_dest = true, 508 }; 509 NIR_PASS_V(nir, nir_fold_16bit_tex_image, &fold_16bit_options); 510 511 lvp_shader_optimize(nir); 512 513 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 514 515 if (nir->info.stage != MESA_SHADER_VERTEX) 516 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage); 517 else { 518 nir->num_inputs = util_last_bit64(nir->info.inputs_read); 519 nir_foreach_shader_in_variable(var, nir) { 520 var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0; 521 } 522 } 523 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, 524 nir->info.stage); 525 526 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 527 if (impl->ssa_alloc > 100) //skip for small shaders 528 pipeline->inlines[stage].must_inline = lvp_find_inlinable_uniforms(pipeline, nir); 529 pipeline->pipeline_nir[stage] = nir; 530 531 return VK_SUCCESS; 532} 533 534static void 535merge_tess_info(struct shader_info *tes_info, 536 const struct shader_info *tcs_info) 537{ 538 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: 539 * 540 * "PointMode. Controls generation of points rather than triangles 541 * or lines. This functionality defaults to disabled, and is 542 * enabled if either shader stage includes the execution mode. 543 * 544 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, 545 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, 546 * and OutputVertices, it says: 547 * 548 * "One mode must be set in at least one of the tessellation 549 * shader stages." 550 * 551 * So, the fields can be set in either the TCS or TES, but they must 552 * agree if set in both. Our backend looks at TES, so bitwise-or in 553 * the values from the TCS. 554 */ 555 assert(tcs_info->tess.tcs_vertices_out == 0 || 556 tes_info->tess.tcs_vertices_out == 0 || 557 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); 558 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; 559 560 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 561 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 562 tcs_info->tess.spacing == tes_info->tess.spacing); 563 tes_info->tess.spacing |= tcs_info->tess.spacing; 564 565 assert(tcs_info->tess._primitive_mode == 0 || 566 tes_info->tess._primitive_mode == 0 || 567 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode); 568 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode; 569 tes_info->tess.ccw |= tcs_info->tess.ccw; 570 tes_info->tess.point_mode |= tcs_info->tess.point_mode; 571} 572 573static void 574lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline) 575{ 576 gl_shader_stage stage = MESA_SHADER_VERTEX; 577 if (pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]) 578 stage = MESA_SHADER_GEOMETRY; 579 else if (pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]) 580 stage = MESA_SHADER_TESS_EVAL; 581 pipeline->last_vertex = stage; 582 583 nir_xfb_info *xfb_info = pipeline->pipeline_nir[stage]->xfb_info; 584 if (xfb_info) { 585 uint8_t output_mapping[VARYING_SLOT_TESS_MAX]; 586 memset(output_mapping, 0, sizeof(output_mapping)); 587 588 nir_foreach_shader_out_variable(var, pipeline->pipeline_nir[stage]) { 589 unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4) 590 : glsl_count_attribute_slots(var->type, false); 591 for (unsigned i = 0; i < slots; i++) 592 output_mapping[var->data.location + i] = var->data.driver_location + i; 593 } 594 595 pipeline->stream_output.num_outputs = xfb_info->output_count; 596 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 597 if (xfb_info->buffers_written & (1 << i)) { 598 pipeline->stream_output.stride[i] = xfb_info->buffers[i].stride / 4; 599 } 600 } 601 for (unsigned i = 0; i < xfb_info->output_count; i++) { 602 pipeline->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer; 603 pipeline->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4; 604 pipeline->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location]; 605 pipeline->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask); 606 pipeline->stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1; 607 pipeline->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer]; 608 } 609 610 } 611} 612 613void * 614lvp_pipeline_compile_stage(struct lvp_pipeline *pipeline, nir_shader *nir) 615{ 616 struct lvp_device *device = pipeline->device; 617 if (nir->info.stage == MESA_SHADER_COMPUTE) { 618 struct pipe_compute_state shstate = {0}; 619 shstate.prog = nir; 620 shstate.ir_type = PIPE_SHADER_IR_NIR; 621 shstate.req_local_mem = nir->info.shared_size; 622 return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate); 623 } else { 624 struct pipe_shader_state shstate = {0}; 625 shstate.type = PIPE_SHADER_IR_NIR; 626 shstate.ir.nir = nir; 627 if (nir->info.stage == pipeline->last_vertex) 628 memcpy(&shstate.stream_output, &pipeline->stream_output, sizeof(shstate.stream_output)); 629 630 switch (nir->info.stage) { 631 case MESA_SHADER_FRAGMENT: 632 return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate); 633 case MESA_SHADER_VERTEX: 634 return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate); 635 case MESA_SHADER_GEOMETRY: 636 return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate); 637 case MESA_SHADER_TESS_CTRL: 638 return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate); 639 case MESA_SHADER_TESS_EVAL: 640 return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate); 641 default: 642 unreachable("illegal shader"); 643 break; 644 } 645 } 646 return NULL; 647} 648 649void * 650lvp_pipeline_compile(struct lvp_pipeline *pipeline, nir_shader *nir) 651{ 652 struct lvp_device *device = pipeline->device; 653 device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir); 654 return lvp_pipeline_compile_stage(pipeline, nir); 655} 656 657#ifndef NDEBUG 658static bool 659layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b) 660{ 661 const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b; 662 uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout); 663 uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding); 664 /* base equal */ 665 if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset)) 666 return false; 667 668 /* bindings equal */ 669 if (a->binding_count != b->binding_count) 670 return false; 671 size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout); 672 const struct lvp_descriptor_set_binding_layout *la = a->binding; 673 const struct lvp_descriptor_set_binding_layout *lb = b->binding; 674 if (memcmp(la, lb, binding_size)) { 675 for (unsigned i = 0; i < a->binding_count; i++) { 676 if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers))) 677 return false; 678 } 679 } 680 681 /* immutable sampler equal */ 682 if (a->immutable_sampler_count != b->immutable_sampler_count) 683 return false; 684 if (a->immutable_sampler_count) { 685 size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *); 686 if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) { 687 struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset); 688 struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset); 689 for (unsigned i = 0; i < a->immutable_sampler_count; i++) { 690 if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler))) 691 return false; 692 } 693 } 694 } 695 return true; 696} 697#endif 698 699static void 700merge_layouts(struct lvp_pipeline *dst, struct lvp_pipeline_layout *src) 701{ 702 if (!src) 703 return; 704 if (!dst->layout) { 705 /* no layout created yet: copy onto ralloc ctx allocation for auto-free */ 706 dst->layout = ralloc(dst->mem_ctx, struct lvp_pipeline_layout); 707 memcpy(dst->layout, src, sizeof(struct lvp_pipeline_layout)); 708 return; 709 } 710#ifndef NDEBUG 711 /* verify that layouts match */ 712 const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src; 713 const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout; 714 for (unsigned i = 0; i < smaller->vk.set_count; i++) { 715 if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] || 716 smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i]) 717 continue; 718 719 const struct lvp_descriptor_set_layout *smaller_set_layout = 720 vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]); 721 const struct lvp_descriptor_set_layout *bigger_set_layout = 722 vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]); 723 724 assert(!smaller_set_layout->binding_count || 725 !bigger_set_layout->binding_count || 726 layouts_equal(smaller_set_layout, bigger_set_layout)); 727 } 728#endif 729 for (unsigned i = 0; i < src->vk.set_count; i++) { 730 if (!dst->layout->vk.set_layouts[i]) 731 dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i]; 732 } 733 dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count, 734 src->vk.set_count); 735 dst->layout->push_constant_size += src->push_constant_size; 736 dst->layout->push_constant_stages |= src->push_constant_stages; 737} 738 739static VkResult 740lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, 741 struct lvp_device *device, 742 struct lvp_pipeline_cache *cache, 743 const VkGraphicsPipelineCreateInfo *pCreateInfo) 744{ 745 VkResult result; 746 747 const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo, 748 GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT); 749 const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo, 750 PIPELINE_LIBRARY_CREATE_INFO_KHR); 751 const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | 752 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; 753 if (libinfo) 754 pipeline->stages = libinfo->flags; 755 else if (!libstate) 756 pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | 757 VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | 758 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | 759 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT; 760 pipeline->mem_ctx = ralloc_context(NULL); 761 762 if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) 763 pipeline->library = true; 764 765 struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout); 766 if (layout) 767 vk_pipeline_layout_ref(&layout->vk); 768 769 if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT)) 770 /* this is a regular pipeline with no partials: directly reuse */ 771 pipeline->layout = layout; 772 else if (pipeline->stages & layout_stages) { 773 if ((pipeline->stages & layout_stages) == layout_stages) 774 /* this has all the layout stages: directly reuse */ 775 pipeline->layout = layout; 776 else { 777 /* this is a partial: copy for later merging to avoid modifying another layout */ 778 merge_layouts(pipeline, layout); 779 } 780 } 781 782 if (libstate) { 783 for (unsigned i = 0; i < libstate->libraryCount; i++) { 784 LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]); 785 vk_graphics_pipeline_state_merge(&pipeline->graphics_state, 786 &p->graphics_state); 787 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { 788 pipeline->line_smooth = p->line_smooth; 789 pipeline->disable_multisample = p->disable_multisample; 790 pipeline->line_rectangular = p->line_rectangular; 791 pipeline->last_vertex = p->last_vertex; 792 memcpy(&pipeline->stream_output, &p->stream_output, sizeof(p->stream_output)); 793 memcpy(&pipeline->access, &p->access, sizeof(p->access)); 794 } 795 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) 796 pipeline->force_min_sample = p->force_min_sample; 797 if (p->stages & layout_stages) { 798 if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT)) 799 merge_layouts(pipeline, p->layout); 800 } 801 pipeline->stages |= p->stages; 802 } 803 } 804 805 result = vk_graphics_pipeline_state_fill(&device->vk, 806 &pipeline->graphics_state, 807 pCreateInfo, NULL, NULL, NULL, 808 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, 809 &pipeline->state_data); 810 if (result != VK_SUCCESS) 811 return result; 812 813 assert(pipeline->library || pipeline->stages == (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | 814 VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | 815 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | 816 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)); 817 818 pipeline->device = device; 819 820 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { 821 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; 822 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 823 if (stage == MESA_SHADER_FRAGMENT) { 824 if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) 825 continue; 826 } else { 827 if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT)) 828 continue; 829 } 830 result = lvp_shader_compile_to_ir(pipeline, sinfo); 831 if (result != VK_SUCCESS) 832 goto fail; 833 834 switch (stage) { 835 case MESA_SHADER_GEOMETRY: 836 pipeline->gs_output_lines = pipeline->pipeline_nir[MESA_SHADER_GEOMETRY] && 837 pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]->info.gs.output_primitive == SHADER_PRIM_LINES; 838 break; 839 case MESA_SHADER_FRAGMENT: 840 if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading) 841 pipeline->force_min_sample = true; 842 break; 843 default: break; 844 } 845 } 846 if (pCreateInfo->stageCount && pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]) { 847 nir_lower_patch_vertices(pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL], pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL); 848 merge_tess_info(&pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info, &pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info); 849 if (pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) 850 pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw; 851 } 852 if (libstate) { 853 for (unsigned i = 0; i < libstate->libraryCount; i++) { 854 LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]); 855 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { 856 if (p->pipeline_nir[MESA_SHADER_FRAGMENT]) 857 pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = nir_shader_clone(pipeline->mem_ctx, p->pipeline_nir[MESA_SHADER_FRAGMENT]); 858 } 859 if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { 860 for (unsigned j = MESA_SHADER_VERTEX; j < MESA_SHADER_FRAGMENT; j++) { 861 if (p->pipeline_nir[j]) 862 pipeline->pipeline_nir[j] = nir_shader_clone(pipeline->mem_ctx, p->pipeline_nir[j]); 863 } 864 } 865 } 866 } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { 867 const struct vk_rasterization_state *rs = pipeline->graphics_state.rs; 868 if (rs) { 869 /* always draw bresenham if !smooth */ 870 pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; 871 pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT || 872 rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; 873 pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; 874 } else 875 pipeline->line_rectangular = true; 876 lvp_pipeline_xfb_init(pipeline); 877 } 878 879 if (!pipeline->library) { 880 bool has_fragment_shader = false; 881 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) { 882 if (!pipeline->pipeline_nir[i]) 883 continue; 884 885 gl_shader_stage stage = i; 886 assert(stage == pipeline->pipeline_nir[i]->info.stage); 887 enum pipe_shader_type pstage = pipe_shader_type_from_mesa(stage); 888 if (!pipeline->inlines[stage].can_inline) 889 pipeline->shader_cso[pstage] = lvp_pipeline_compile(pipeline, 890 nir_shader_clone(NULL, pipeline->pipeline_nir[stage])); 891 if (stage == MESA_SHADER_FRAGMENT) 892 has_fragment_shader = true; 893 } 894 895 if (has_fragment_shader == false) { 896 /* create a dummy fragment shader for this pipeline. */ 897 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, 898 "dummy_frag"); 899 900 pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader; 901 struct pipe_shader_state shstate = {0}; 902 shstate.type = PIPE_SHADER_IR_NIR; 903 shstate.ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]); 904 pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate); 905 } 906 } 907 return VK_SUCCESS; 908 909fail: 910 for (unsigned i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) { 911 if (pipeline->pipeline_nir[i]) 912 ralloc_free(pipeline->pipeline_nir[i]); 913 } 914 vk_free(&device->vk.alloc, pipeline->state_data); 915 916 return result; 917} 918 919static VkResult 920lvp_graphics_pipeline_create( 921 VkDevice _device, 922 VkPipelineCache _cache, 923 const VkGraphicsPipelineCreateInfo *pCreateInfo, 924 VkPipeline *pPipeline) 925{ 926 LVP_FROM_HANDLE(lvp_device, device, _device); 927 LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache); 928 struct lvp_pipeline *pipeline; 929 VkResult result; 930 931 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); 932 933 pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8, 934 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 935 if (pipeline == NULL) 936 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 937 938 vk_object_base_init(&device->vk, &pipeline->base, 939 VK_OBJECT_TYPE_PIPELINE); 940 uint64_t t0 = os_time_get_nano(); 941 result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo); 942 if (result != VK_SUCCESS) { 943 vk_free(&device->vk.alloc, pipeline); 944 return result; 945 } 946 947 VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 948 if (feedback) { 949 feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0; 950 feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 951 memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount); 952 } 953 954 *pPipeline = lvp_pipeline_to_handle(pipeline); 955 956 return VK_SUCCESS; 957} 958 959VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines( 960 VkDevice _device, 961 VkPipelineCache pipelineCache, 962 uint32_t count, 963 const VkGraphicsPipelineCreateInfo* pCreateInfos, 964 const VkAllocationCallbacks* pAllocator, 965 VkPipeline* pPipelines) 966{ 967 VkResult result = VK_SUCCESS; 968 unsigned i = 0; 969 970 for (; i < count; i++) { 971 VkResult r = VK_PIPELINE_COMPILE_REQUIRED; 972 if (!(pCreateInfos[i].flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)) 973 r = lvp_graphics_pipeline_create(_device, 974 pipelineCache, 975 &pCreateInfos[i], 976 &pPipelines[i]); 977 if (r != VK_SUCCESS) { 978 result = r; 979 pPipelines[i] = VK_NULL_HANDLE; 980 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 981 break; 982 } 983 } 984 if (result != VK_SUCCESS) { 985 for (; i < count; i++) 986 pPipelines[i] = VK_NULL_HANDLE; 987 } 988 989 return result; 990} 991 992static VkResult 993lvp_compute_pipeline_init(struct lvp_pipeline *pipeline, 994 struct lvp_device *device, 995 struct lvp_pipeline_cache *cache, 996 const VkComputePipelineCreateInfo *pCreateInfo) 997{ 998 pipeline->device = device; 999 pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout); 1000 vk_pipeline_layout_ref(&pipeline->layout->vk); 1001 pipeline->force_min_sample = false; 1002 1003 pipeline->mem_ctx = ralloc_context(NULL); 1004 pipeline->is_compute_pipeline = true; 1005 1006 VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage); 1007 if (result != VK_SUCCESS) 1008 return result; 1009 1010 if (!pipeline->inlines[MESA_SHADER_COMPUTE].can_inline) 1011 pipeline->shader_cso[PIPE_SHADER_COMPUTE] = lvp_pipeline_compile(pipeline, nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_COMPUTE])); 1012 return VK_SUCCESS; 1013} 1014 1015static VkResult 1016lvp_compute_pipeline_create( 1017 VkDevice _device, 1018 VkPipelineCache _cache, 1019 const VkComputePipelineCreateInfo *pCreateInfo, 1020 VkPipeline *pPipeline) 1021{ 1022 LVP_FROM_HANDLE(lvp_device, device, _device); 1023 LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache); 1024 struct lvp_pipeline *pipeline; 1025 VkResult result; 1026 1027 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); 1028 1029 pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8, 1030 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1031 if (pipeline == NULL) 1032 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1033 1034 vk_object_base_init(&device->vk, &pipeline->base, 1035 VK_OBJECT_TYPE_PIPELINE); 1036 uint64_t t0 = os_time_get_nano(); 1037 result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo); 1038 if (result != VK_SUCCESS) { 1039 vk_free(&device->vk.alloc, pipeline); 1040 return result; 1041 } 1042 1043 const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 1044 if (feedback) { 1045 feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0; 1046 feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 1047 memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount); 1048 } 1049 1050 *pPipeline = lvp_pipeline_to_handle(pipeline); 1051 1052 return VK_SUCCESS; 1053} 1054 1055VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines( 1056 VkDevice _device, 1057 VkPipelineCache pipelineCache, 1058 uint32_t count, 1059 const VkComputePipelineCreateInfo* pCreateInfos, 1060 const VkAllocationCallbacks* pAllocator, 1061 VkPipeline* pPipelines) 1062{ 1063 VkResult result = VK_SUCCESS; 1064 unsigned i = 0; 1065 1066 for (; i < count; i++) { 1067 VkResult r = VK_PIPELINE_COMPILE_REQUIRED; 1068 if (!(pCreateInfos[i].flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)) 1069 r = lvp_compute_pipeline_create(_device, 1070 pipelineCache, 1071 &pCreateInfos[i], 1072 &pPipelines[i]); 1073 if (r != VK_SUCCESS) { 1074 result = r; 1075 pPipelines[i] = VK_NULL_HANDLE; 1076 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 1077 break; 1078 } 1079 } 1080 if (result != VK_SUCCESS) { 1081 for (; i < count; i++) 1082 pPipelines[i] = VK_NULL_HANDLE; 1083 } 1084 1085 1086 return result; 1087} 1088