1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "dzn_private.h" 25 26#include "spirv/nir_spirv.h" 27 28#include "dxil_nir.h" 29#include "nir_to_dxil.h" 30#include "dxil_spirv_nir.h" 31#include "spirv_to_dxil.h" 32 33#include "dxil_validator.h" 34 35#include "vk_alloc.h" 36#include "vk_util.h" 37#include "vk_format.h" 38#include "vk_pipeline.h" 39#include "vk_pipeline_cache.h" 40 41#include "util/u_debug.h" 42 43#define d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, __id, __type, __desc) \ 44 __type *__desc; \ 45 do { \ 46 struct { \ 47 D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type; \ 48 __type desc; \ 49 } *__wrapper; \ 50 (__stream)->SizeInBytes = ALIGN_POT((__stream)->SizeInBytes, alignof(void *)); \ 51 __wrapper = (void *)((uint8_t *)(__stream)->pPipelineStateSubobjectStream + (__stream)->SizeInBytes); \ 52 (__stream)->SizeInBytes += sizeof(*__wrapper); \ 53 assert((__stream)->SizeInBytes <= __maxstreamsz); \ 54 __wrapper->type = D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ ## __id; \ 55 __desc = &__wrapper->desc; \ 56 memset(__desc, 0, sizeof(*__desc)); \ 57 } while (0) 58 59#define d3d12_gfx_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \ 60 d3d12_pipeline_state_stream_new_desc(__stream, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc) 61 62#define d3d12_compute_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \ 63 d3d12_pipeline_state_stream_new_desc(__stream, MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc) 64 65static bool 66gfx_pipeline_variant_key_equal(const void *a, const void *b) 67{ 68 return !memcmp(a, b, sizeof(struct dzn_graphics_pipeline_variant_key)); 69} 70 71static uint32_t 72gfx_pipeline_variant_key_hash(const void *key) 73{ 74 return _mesa_hash_data(key, sizeof(struct dzn_graphics_pipeline_variant_key)); 75} 76 77struct dzn_cached_blob { 78 struct vk_pipeline_cache_object base; 79 uint8_t hash[SHA1_DIGEST_LENGTH]; 80 const void *data; 81 size_t size; 82}; 83 84static bool 85dzn_cached_blob_serialize(struct vk_pipeline_cache_object *object, 86 struct blob *blob) 87{ 88 struct dzn_cached_blob *cached_blob = 89 container_of(object, struct dzn_cached_blob, base); 90 91 blob_write_bytes(blob, cached_blob->data, cached_blob->size); 92 return true; 93} 94 95static void 96dzn_cached_blob_destroy(struct vk_pipeline_cache_object *object) 97{ 98 struct dzn_cached_blob *shader = 99 container_of(object, struct dzn_cached_blob, base); 100 101 vk_free(&shader->base.device->alloc, shader); 102} 103 104static struct vk_pipeline_cache_object * 105dzn_cached_blob_create(struct vk_device *device, 106 const void *hash, 107 const void *data, 108 size_t data_size); 109 110static struct vk_pipeline_cache_object * 111dzn_cached_blob_deserialize(struct vk_device *device, 112 const void *key_data, 113 size_t key_size, 114 struct blob_reader *blob) 115{ 116 size_t data_size = blob->end - blob->current; 117 assert(key_size == SHA1_DIGEST_LENGTH); 118 119 return dzn_cached_blob_create(device, key_data, 120 blob_read_bytes(blob, data_size), 121 data_size); 122} 123 124const struct vk_pipeline_cache_object_ops dzn_cached_blob_ops = { 125 .serialize = dzn_cached_blob_serialize, 126 .deserialize = dzn_cached_blob_deserialize, 127 .destroy = dzn_cached_blob_destroy, 128}; 129 130 131static struct vk_pipeline_cache_object * 132dzn_cached_blob_create(struct vk_device *device, 133 const void *hash, 134 const void *data, 135 size_t data_size) 136{ 137 VK_MULTIALLOC(ma); 138 VK_MULTIALLOC_DECL(&ma, struct dzn_cached_blob, blob, 1); 139 VK_MULTIALLOC_DECL(&ma, uint8_t, copy, data_size); 140 141 if (!vk_multialloc_alloc(&ma, &device->alloc, 142 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) 143 return NULL; 144 145 memcpy(blob->hash, hash, sizeof(blob->hash)); 146 147 vk_pipeline_cache_object_init(device, &blob->base, 148 &dzn_cached_blob_ops, 149 blob->hash, sizeof(blob->hash)); 150 151 if (data) 152 memcpy(copy, data, data_size); 153 blob->data = copy; 154 blob->size = data_size; 155 156 return &blob->base; 157} 158 159static VkResult 160dzn_graphics_pipeline_prepare_for_variants(struct dzn_device *device, 161 struct dzn_graphics_pipeline *pipeline) 162{ 163 if (pipeline->variants) 164 return VK_SUCCESS; 165 166 pipeline->variants = 167 _mesa_hash_table_create(NULL, 168 gfx_pipeline_variant_key_hash, 169 gfx_pipeline_variant_key_equal); 170 if (!pipeline->variants) 171 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 172 173 return VK_SUCCESS; 174} 175 176static dxil_spirv_shader_stage 177to_dxil_shader_stage(VkShaderStageFlagBits in) 178{ 179 switch (in) { 180 case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX; 181 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL; 182 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL; 183 case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY; 184 case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT; 185 case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE; 186 default: unreachable("Unsupported stage"); 187 } 188} 189 190static VkResult 191dzn_pipeline_get_nir_shader(struct dzn_device *device, 192 const struct dzn_pipeline_layout *layout, 193 struct vk_pipeline_cache *cache, 194 const uint8_t *hash, 195 const VkPipelineShaderStageCreateInfo *stage_info, 196 gl_shader_stage stage, 197 enum dxil_spirv_yz_flip_mode yz_flip_mode, 198 uint16_t y_flip_mask, uint16_t z_flip_mask, 199 bool force_sample_rate_shading, 200 enum pipe_format *vi_conversions, 201 const nir_shader_compiler_options *nir_opts, 202 nir_shader **nir) 203{ 204 if (cache) { 205 *nir = vk_pipeline_cache_lookup_nir(cache, hash, SHA1_DIGEST_LENGTH, 206 nir_opts, NULL, NULL); 207 if (*nir) 208 return VK_SUCCESS; 209 } 210 211 VK_FROM_HANDLE(vk_shader_module, module, stage_info->module); 212 struct spirv_to_nir_options spirv_opts = { 213 .caps = { 214 .draw_parameters = true, 215 }, 216 .ubo_addr_format = nir_address_format_32bit_index_offset, 217 .ssbo_addr_format = nir_address_format_32bit_index_offset, 218 .shared_addr_format = nir_address_format_32bit_offset_as_64bit, 219 220 /* use_deref_buffer_array_length + nir_lower_explicit_io force 221 * get_ssbo_size to take in the return from load_vulkan_descriptor 222 * instead of vulkan_resource_index. This makes it much easier to 223 * get the DXIL handle for the SSBO. 224 */ 225 .use_deref_buffer_array_length = true 226 }; 227 228 VkResult result = 229 vk_shader_module_to_nir(&device->vk, module, stage, 230 stage_info->pName, stage_info->pSpecializationInfo, 231 &spirv_opts, nir_opts, NULL, nir); 232 if (result != VK_SUCCESS) 233 return result; 234 235 struct dxil_spirv_runtime_conf conf = { 236 .runtime_data_cbv = { 237 .register_space = DZN_REGISTER_SPACE_SYSVALS, 238 .base_shader_register = 0, 239 }, 240 .push_constant_cbv = { 241 .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT, 242 .base_shader_register = 0, 243 }, 244 .zero_based_vertex_instance_id = false, 245 .yz_flip = { 246 .mode = yz_flip_mode, 247 .y_mask = y_flip_mask, 248 .z_mask = z_flip_mask, 249 }, 250 .read_only_images_as_srvs = true, 251 .force_sample_rate_shading = force_sample_rate_shading, 252 }; 253 254 bool requires_runtime_data; 255 dxil_spirv_nir_passes(*nir, &conf, &requires_runtime_data); 256 257 if (stage == MESA_SHADER_VERTEX) { 258 bool needs_conv = false; 259 for (uint32_t i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) { 260 if (vi_conversions[i] != PIPE_FORMAT_NONE) 261 needs_conv = true; 262 } 263 264 if (needs_conv) 265 NIR_PASS_V(*nir, dxil_nir_lower_vs_vertex_conversion, vi_conversions); 266 } 267 268 if (cache) 269 vk_pipeline_cache_add_nir(cache, hash, SHA1_DIGEST_LENGTH, *nir); 270 271 return VK_SUCCESS; 272} 273 274static bool 275adjust_resource_index_binding(struct nir_builder *builder, nir_instr *instr, 276 void *cb_data) 277{ 278 if (instr->type != nir_instr_type_intrinsic) 279 return false; 280 281 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 282 283 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index) 284 return false; 285 286 const struct dzn_pipeline_layout *layout = cb_data; 287 unsigned set = nir_intrinsic_desc_set(intrin); 288 unsigned binding = nir_intrinsic_binding(intrin); 289 290 if (set >= layout->set_count || 291 binding >= layout->binding_translation[set].binding_count) 292 return false; 293 294 binding = layout->binding_translation[set].base_reg[binding]; 295 nir_intrinsic_set_binding(intrin, binding); 296 297 return true; 298} 299 300static bool 301adjust_var_bindings(nir_shader *shader, 302 const struct dzn_pipeline_layout *layout, 303 uint8_t *bindings_hash) 304{ 305 uint32_t modes = nir_var_image | nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo; 306 struct mesa_sha1 bindings_hash_ctx; 307 308 if (bindings_hash) 309 _mesa_sha1_init(&bindings_hash_ctx); 310 311 nir_foreach_variable_with_modes(var, shader, modes) { 312 if (var->data.mode == nir_var_uniform) { 313 const struct glsl_type *type = glsl_without_array(var->type); 314 315 if (!glsl_type_is_sampler(type) && !glsl_type_is_texture(type)) 316 continue; 317 } 318 319 unsigned s = var->data.descriptor_set, b = var->data.binding; 320 321 if (s >= layout->set_count) 322 continue; 323 324 assert(b < layout->binding_translation[s].binding_count); 325 var->data.binding = layout->binding_translation[s].base_reg[b]; 326 327 if (bindings_hash) { 328 _mesa_sha1_update(&bindings_hash_ctx, &s, sizeof(s)); 329 _mesa_sha1_update(&bindings_hash_ctx, &b, sizeof(b)); 330 _mesa_sha1_update(&bindings_hash_ctx, &var->data.binding, sizeof(var->data.binding)); 331 } 332 } 333 334 if (bindings_hash) 335 _mesa_sha1_final(&bindings_hash_ctx, bindings_hash); 336 337 return nir_shader_instructions_pass(shader, adjust_resource_index_binding, 338 nir_metadata_all, (void *)layout); 339} 340 341static VkResult 342dzn_pipeline_compile_shader(struct dzn_device *device, 343 nir_shader *nir, 344 D3D12_SHADER_BYTECODE *slot) 345{ 346 struct dzn_instance *instance = 347 container_of(device->vk.physical->instance, struct dzn_instance, vk); 348 struct nir_to_dxil_options opts = { 349 .environment = DXIL_ENVIRONMENT_VULKAN, 350 .shader_model_max = SHADER_MODEL_6_2, 351#ifdef _WIN32 352 .validator_version_max = dxil_get_validator_version(instance->dxil_validator), 353#endif 354 }; 355 struct blob dxil_blob; 356 VkResult result = VK_SUCCESS; 357 358 if (instance->debug_flags & DZN_DEBUG_NIR) 359 nir_print_shader(nir, stderr); 360 361 if (nir_to_dxil(nir, &opts, &dxil_blob)) { 362 blob_finish_get_buffer(&dxil_blob, (void **)&slot->pShaderBytecode, 363 (size_t *)&slot->BytecodeLength); 364 } else { 365 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 366 } 367 368 if (dxil_blob.allocated) 369 blob_finish(&dxil_blob); 370 371 if (result != VK_SUCCESS) 372 return result; 373 374#ifdef _WIN32 375 char *err; 376 bool res = dxil_validate_module(instance->dxil_validator, 377 (void *)slot->pShaderBytecode, 378 slot->BytecodeLength, &err); 379 380 if (instance->debug_flags & DZN_DEBUG_DXIL) { 381 char *disasm = dxil_disasm_module(instance->dxil_validator, 382 (void *)slot->pShaderBytecode, 383 slot->BytecodeLength); 384 if (disasm) { 385 fprintf(stderr, 386 "== BEGIN SHADER ============================================\n" 387 "%s\n" 388 "== END SHADER ==============================================\n", 389 disasm); 390 ralloc_free(disasm); 391 } 392 } 393 394 if (!res) { 395 if (err) { 396 fprintf(stderr, 397 "== VALIDATION ERROR =============================================\n" 398 "%s\n" 399 "== END ==========================================================\n", 400 err); 401 ralloc_free(err); 402 } 403 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 404 } 405#endif 406 407 return VK_SUCCESS; 408} 409 410static D3D12_SHADER_BYTECODE * 411dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC *stream, 412 gl_shader_stage in) 413{ 414 switch (in) { 415 case MESA_SHADER_VERTEX: { 416 d3d12_gfx_pipeline_state_stream_new_desc(stream, VS, D3D12_SHADER_BYTECODE, desc); 417 return desc; 418 } 419 case MESA_SHADER_TESS_CTRL: { 420 d3d12_gfx_pipeline_state_stream_new_desc(stream, DS, D3D12_SHADER_BYTECODE, desc); 421 return desc; 422 } 423 case MESA_SHADER_TESS_EVAL: { 424 d3d12_gfx_pipeline_state_stream_new_desc(stream, HS, D3D12_SHADER_BYTECODE, desc); 425 return desc; 426 } 427 case MESA_SHADER_GEOMETRY: { 428 d3d12_gfx_pipeline_state_stream_new_desc(stream, GS, D3D12_SHADER_BYTECODE, desc); 429 return desc; 430 } 431 case MESA_SHADER_FRAGMENT: { 432 d3d12_gfx_pipeline_state_stream_new_desc(stream, PS, D3D12_SHADER_BYTECODE, desc); 433 return desc; 434 } 435 default: unreachable("Unsupported stage"); 436 } 437} 438 439struct dzn_cached_dxil_shader_header { 440 gl_shader_stage stage; 441 size_t size; 442 uint8_t data[0]; 443}; 444 445static VkResult 446dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache *cache, 447 const uint8_t *dxil_hash, 448 gl_shader_stage *stage, 449 D3D12_SHADER_BYTECODE *bc) 450{ 451 *stage = MESA_SHADER_NONE; 452 453 if (!cache) 454 return VK_SUCCESS; 455 456 struct vk_pipeline_cache_object *cache_obj = NULL; 457 458 cache_obj = 459 vk_pipeline_cache_lookup_object(cache, dxil_hash, SHA1_DIGEST_LENGTH, 460 &dzn_cached_blob_ops, 461 NULL); 462 if (!cache_obj) 463 return VK_SUCCESS; 464 465 struct dzn_cached_blob *cached_blob = 466 container_of(cache_obj, struct dzn_cached_blob, base); 467 VkResult ret = VK_SUCCESS; 468 469 assert(sizeof(struct dzn_cached_dxil_shader_header) <= cached_blob->size); 470 471 const struct dzn_cached_dxil_shader_header *info = 472 (struct dzn_cached_dxil_shader_header *)(cached_blob->data); 473 474 assert(sizeof(struct dzn_cached_dxil_shader_header) + info->size <= cached_blob->size); 475 assert(info->stage > MESA_SHADER_NONE && info->stage < MESA_VULKAN_SHADER_STAGES); 476 assert(info->size > 0); 477 478 void *code = malloc(info->size); 479 if (!code) { 480 ret = vk_error(cache->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); 481 goto out; 482 } 483 484 memcpy(code, info->data, info->size); 485 486 bc->pShaderBytecode = code; 487 bc->BytecodeLength = info->size; 488 *stage = info->stage; 489 490out: 491 vk_pipeline_cache_object_unref(cache_obj); 492 return ret; 493} 494 495static void 496dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache *cache, 497 const uint8_t *dxil_hash, 498 gl_shader_stage stage, 499 const D3D12_SHADER_BYTECODE *bc) 500{ 501 size_t size = sizeof(struct dzn_cached_dxil_shader_header) + 502 bc->BytecodeLength; 503 504 struct vk_pipeline_cache_object *cache_obj = 505 dzn_cached_blob_create(cache->base.device, dxil_hash, NULL, size); 506 if (!cache_obj) 507 return; 508 509 struct dzn_cached_blob *cached_blob = 510 container_of(cache_obj, struct dzn_cached_blob, base); 511 struct dzn_cached_dxil_shader_header *info = 512 (struct dzn_cached_dxil_shader_header *)(cached_blob->data); 513 info->stage = stage; 514 info->size = bc->BytecodeLength; 515 memcpy(info->data, bc->pShaderBytecode, bc->BytecodeLength); 516 517 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); 518 vk_pipeline_cache_object_unref(cache_obj); 519} 520 521struct dzn_cached_gfx_pipeline_header { 522 uint32_t stages; 523 uint32_t input_count; 524}; 525 526static VkResult 527dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline *pipeline, 528 struct vk_pipeline_cache *cache, 529 const uint8_t *pipeline_hash, 530 bool *cache_hit) 531{ 532 *cache_hit = false; 533 534 if (!cache) 535 return VK_SUCCESS; 536 537 struct vk_pipeline_cache_object *cache_obj = NULL; 538 539 cache_obj = 540 vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH, 541 &dzn_cached_blob_ops, 542 NULL); 543 if (!cache_obj) 544 return VK_SUCCESS; 545 546 struct dzn_cached_blob *cached_blob = 547 container_of(cache_obj, struct dzn_cached_blob, base); 548 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc = 549 &pipeline->templates.stream_desc; 550 551 const struct dzn_cached_gfx_pipeline_header *info = 552 (const struct dzn_cached_gfx_pipeline_header *)(cached_blob->data); 553 size_t offset = sizeof(*info); 554 555 assert(cached_blob->size >= sizeof(*info)); 556 557 if (info->input_count > 0) { 558 offset = ALIGN_POT(offset, alignof(D3D12_INPUT_LAYOUT_DESC)); 559 const D3D12_INPUT_ELEMENT_DESC *inputs = 560 (const D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset); 561 562 assert(cached_blob->size >= offset + sizeof(*inputs) * info->input_count); 563 564 memcpy(pipeline->templates.inputs, inputs, 565 sizeof(*inputs) * info->input_count); 566 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc); 567 desc->pInputElementDescs = pipeline->templates.inputs; 568 desc->NumElements = info->input_count; 569 offset += sizeof(*inputs) * info->input_count; 570 } 571 572 assert(cached_blob->size == offset + util_bitcount(info->stages) * SHA1_DIGEST_LENGTH); 573 574 u_foreach_bit(s, info->stages) { 575 uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset; 576 gl_shader_stage stage; 577 578 D3D12_SHADER_BYTECODE *slot = 579 dzn_pipeline_get_gfx_shader_slot(stream_desc, s); 580 581 VkResult ret = 582 dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, slot); 583 if (ret != VK_SUCCESS) 584 return ret; 585 586 assert(stage == s); 587 offset += SHA1_DIGEST_LENGTH; 588 } 589 590 *cache_hit = true; 591 592 vk_pipeline_cache_object_unref(cache_obj); 593 return VK_SUCCESS; 594} 595 596static void 597dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline *pipeline, 598 struct vk_pipeline_cache *cache, 599 uint32_t vertex_input_count, 600 const uint8_t *pipeline_hash, 601 const uint8_t *const *dxil_hashes) 602{ 603 size_t offset = 604 ALIGN_POT(sizeof(struct dzn_cached_gfx_pipeline_header), alignof(D3D12_INPUT_ELEMENT_DESC)) + 605 (sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_input_count); 606 uint32_t stages = 0; 607 608 for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { 609 if (pipeline->templates.shaders[i].bc) { 610 stages |= BITFIELD_BIT(i); 611 offset += SHA1_DIGEST_LENGTH; 612 } 613 } 614 615 struct vk_pipeline_cache_object *cache_obj = 616 dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, offset); 617 if (!cache_obj) 618 return; 619 620 struct dzn_cached_blob *cached_blob = 621 container_of(cache_obj, struct dzn_cached_blob, base); 622 623 offset = 0; 624 struct dzn_cached_gfx_pipeline_header *info = 625 (struct dzn_cached_gfx_pipeline_header *)(cached_blob->data); 626 627 info->input_count = vertex_input_count; 628 info->stages = stages; 629 630 offset = ALIGN_POT(offset + sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC)); 631 632 D3D12_INPUT_ELEMENT_DESC *inputs = 633 (D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset); 634 memcpy(inputs, pipeline->templates.inputs, 635 sizeof(*inputs) * vertex_input_count); 636 offset += sizeof(*inputs) * vertex_input_count; 637 638 u_foreach_bit(s, stages) { 639 uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset; 640 641 memcpy(dxil_hash, dxil_hashes[s], SHA1_DIGEST_LENGTH); 642 offset += SHA1_DIGEST_LENGTH; 643 } 644 645 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); 646 vk_pipeline_cache_object_unref(cache_obj); 647} 648 649static void 650dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC *attribs, 651 enum pipe_format *vi_conversions, 652 uint8_t *result) 653{ 654 struct mesa_sha1 ctx; 655 656 _mesa_sha1_init(&ctx); 657 _mesa_sha1_update(&ctx, attribs, sizeof(*attribs) * MAX_VERTEX_GENERIC_ATTRIBS); 658 _mesa_sha1_update(&ctx, vi_conversions, sizeof(*vi_conversions) * MAX_VERTEX_GENERIC_ATTRIBS); 659 _mesa_sha1_final(&ctx, result); 660} 661 662static VkResult 663dzn_graphics_pipeline_compile_shaders(struct dzn_device *device, 664 struct dzn_graphics_pipeline *pipeline, 665 struct vk_pipeline_cache *cache, 666 const struct dzn_pipeline_layout *layout, 667 D3D12_PIPELINE_STATE_STREAM_DESC *out, 668 D3D12_INPUT_ELEMENT_DESC *attribs, 669 enum pipe_format *vi_conversions, 670 const VkGraphicsPipelineCreateInfo *info) 671{ 672 const VkPipelineViewportStateCreateInfo *vp_info = 673 info->pRasterizationState->rasterizerDiscardEnable ? 674 NULL : info->pViewportState; 675 struct { 676 const VkPipelineShaderStageCreateInfo *info; 677 uint8_t spirv_hash[SHA1_DIGEST_LENGTH]; 678 uint8_t dxil_hash[SHA1_DIGEST_LENGTH]; 679 } stages[MESA_VULKAN_SHADER_STAGES] = { 0 }; 680 const uint8_t *dxil_hashes[MESA_VULKAN_SHADER_STAGES] = { 0 }; 681 uint8_t attribs_hash[SHA1_DIGEST_LENGTH]; 682 uint8_t pipeline_hash[SHA1_DIGEST_LENGTH]; 683 gl_shader_stage yz_flip_stage = MESA_SHADER_NONE; 684 uint32_t active_stage_mask = 0; 685 VkResult ret; 686 687 /* First step: collect stage info in a table indexed by gl_shader_stage 688 * so we can iterate over stages in pipeline order or reverse pipeline 689 * order. 690 */ 691 for (uint32_t i = 0; i < info->stageCount; i++) { 692 gl_shader_stage stage = 693 vk_to_mesa_shader_stage(info->pStages[i].stage); 694 695 assert(stage <= MESA_SHADER_FRAGMENT); 696 697 if ((stage == MESA_SHADER_VERTEX || 698 stage == MESA_SHADER_TESS_EVAL || 699 stage == MESA_SHADER_GEOMETRY) && 700 yz_flip_stage < stage) 701 yz_flip_stage = stage; 702 703 if (stage == MESA_SHADER_FRAGMENT && 704 info->pRasterizationState && 705 (info->pRasterizationState->rasterizerDiscardEnable || 706 info->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) { 707 /* Disable rasterization (AKA leave fragment shader NULL) when 708 * front+back culling or discard is set. 709 */ 710 continue; 711 } 712 713 stages[stage].info = &info->pStages[i]; 714 active_stage_mask |= BITFIELD_BIT(stage); 715 } 716 717 enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE; 718 uint16_t y_flip_mask = 0, z_flip_mask = 0; 719 720 if (pipeline->vp.dynamic) { 721 yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL; 722 } else if (vp_info) { 723 for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) { 724 if (vp_info->pViewports[i].height > 0) 725 y_flip_mask |= BITFIELD_BIT(i); 726 727 if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth) 728 z_flip_mask |= BITFIELD_BIT(i); 729 } 730 731 if (y_flip_mask && z_flip_mask) 732 yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL; 733 else if (z_flip_mask) 734 yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL; 735 else if (y_flip_mask) 736 yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL; 737 } 738 739 bool force_sample_rate_shading = 740 info->pMultisampleState && 741 info->pMultisampleState->sampleShadingEnable; 742 743 if (cache) { 744 dzn_graphics_pipeline_hash_attribs(attribs, vi_conversions, attribs_hash); 745 746 struct mesa_sha1 pipeline_hash_ctx; 747 748 _mesa_sha1_init(&pipeline_hash_ctx); 749 _mesa_sha1_update(&pipeline_hash_ctx, attribs_hash, sizeof(attribs_hash)); 750 _mesa_sha1_update(&pipeline_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode)); 751 _mesa_sha1_update(&pipeline_hash_ctx, &y_flip_mask, sizeof(y_flip_mask)); 752 _mesa_sha1_update(&pipeline_hash_ctx, &z_flip_mask, sizeof(z_flip_mask)); 753 _mesa_sha1_update(&pipeline_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading)); 754 755 u_foreach_bit(stage, active_stage_mask) { 756 vk_pipeline_hash_shader_stage(stages[stage].info, stages[stage].spirv_hash); 757 _mesa_sha1_update(&pipeline_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash)); 758 _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[stage].hash, sizeof(layout->stages[stage].hash)); 759 } 760 _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash); 761 762 bool cache_hit; 763 ret = dzn_pipeline_cache_lookup_gfx_pipeline(pipeline, cache, pipeline_hash, 764 &cache_hit); 765 if (ret != VK_SUCCESS) 766 return ret; 767 768 if (cache_hit) 769 return VK_SUCCESS; 770 } 771 772 /* Second step: get NIR shaders for all stages. */ 773 nir_shader_compiler_options nir_opts = *dxil_get_nir_compiler_options(); 774 nir_opts.lower_base_vertex = true; 775 u_foreach_bit(stage, active_stage_mask) { 776 struct mesa_sha1 nir_hash_ctx; 777 uint8_t nir_hash[SHA1_DIGEST_LENGTH]; 778 779 if (cache) { 780 _mesa_sha1_init(&nir_hash_ctx); 781 if (stage == MESA_SHADER_VERTEX) 782 _mesa_sha1_update(&nir_hash_ctx, attribs_hash, sizeof(attribs_hash)); 783 if (stage == yz_flip_stage) { 784 _mesa_sha1_update(&nir_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode)); 785 _mesa_sha1_update(&nir_hash_ctx, &y_flip_mask, sizeof(y_flip_mask)); 786 _mesa_sha1_update(&nir_hash_ctx, &z_flip_mask, sizeof(z_flip_mask)); 787 } 788 _mesa_sha1_update(&nir_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash)); 789 _mesa_sha1_final(&nir_hash_ctx, nir_hash); 790 } 791 792 ret = dzn_pipeline_get_nir_shader(device, layout, 793 cache, nir_hash, 794 stages[stage].info, stage, 795 stage == yz_flip_stage ? yz_flip_mode : DXIL_SPIRV_YZ_FLIP_NONE, 796 y_flip_mask, z_flip_mask, 797 stage == MESA_SHADER_FRAGMENT ? force_sample_rate_shading : false, 798 vi_conversions, 799 &nir_opts, &pipeline->templates.shaders[stage].nir); 800 if (ret != VK_SUCCESS) 801 return ret; 802 } 803 804 /* Third step: link those NIR shaders. We iterate in reverse order 805 * so we can eliminate outputs that are never read by the next stage. 806 */ 807 uint32_t link_mask = active_stage_mask; 808 while (link_mask != 0) { 809 gl_shader_stage stage = util_last_bit(link_mask) - 1; 810 link_mask &= ~BITFIELD_BIT(stage); 811 gl_shader_stage prev_stage = util_last_bit(link_mask) - 1; 812 813 assert(pipeline->templates.shaders[stage].nir); 814 dxil_spirv_nir_link(pipeline->templates.shaders[stage].nir, 815 prev_stage != MESA_SHADER_NONE ? 816 pipeline->templates.shaders[prev_stage].nir : NULL); 817 } 818 819 u_foreach_bit(stage, active_stage_mask) { 820 uint8_t bindings_hash[SHA1_DIGEST_LENGTH]; 821 822 NIR_PASS_V(pipeline->templates.shaders[stage].nir, adjust_var_bindings, layout, 823 cache ? bindings_hash : NULL); 824 825 if (cache) { 826 struct mesa_sha1 dxil_hash_ctx; 827 828 _mesa_sha1_init(&dxil_hash_ctx); 829 830 if (stage == MESA_SHADER_VERTEX) 831 _mesa_sha1_update(&dxil_hash_ctx, attribs_hash, sizeof(attribs_hash)); 832 833 if (stage == yz_flip_stage) { 834 _mesa_sha1_update(&dxil_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode)); 835 _mesa_sha1_update(&dxil_hash_ctx, &y_flip_mask, sizeof(y_flip_mask)); 836 _mesa_sha1_update(&dxil_hash_ctx, &z_flip_mask, sizeof(z_flip_mask)); 837 } 838 839 if (stage == MESA_SHADER_FRAGMENT) 840 _mesa_sha1_update(&dxil_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading)); 841 842 _mesa_sha1_update(&dxil_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash)); 843 _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash)); 844 _mesa_sha1_final(&dxil_hash_ctx, stages[stage].dxil_hash); 845 dxil_hashes[stage] = stages[stage].dxil_hash; 846 847 gl_shader_stage cached_stage; 848 D3D12_SHADER_BYTECODE bc; 849 ret = dzn_pipeline_cache_lookup_dxil_shader(cache, stages[stage].dxil_hash, &cached_stage, &bc); 850 if (ret != VK_SUCCESS) 851 return ret; 852 853 if (cached_stage != MESA_SHADER_NONE) { 854 assert(cached_stage == stage); 855 D3D12_SHADER_BYTECODE *slot = 856 dzn_pipeline_get_gfx_shader_slot(out, stage); 857 *slot = bc; 858 pipeline->templates.shaders[stage].bc = slot; 859 } 860 } 861 } 862 863 uint32_t vert_input_count = 0; 864 if (pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) { 865 /* Now, declare one D3D12_INPUT_ELEMENT_DESC per VS input variable, so 866 * we can handle location overlaps properly. 867 */ 868 nir_foreach_shader_in_variable(var, pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) { 869 assert(var->data.location >= VERT_ATTRIB_GENERIC0); 870 unsigned loc = var->data.location - VERT_ATTRIB_GENERIC0; 871 assert(vert_input_count < D3D12_VS_INPUT_REGISTER_COUNT); 872 assert(loc < MAX_VERTEX_GENERIC_ATTRIBS); 873 874 pipeline->templates.inputs[vert_input_count] = attribs[loc]; 875 pipeline->templates.inputs[vert_input_count].SemanticIndex = vert_input_count; 876 var->data.driver_location = vert_input_count++; 877 } 878 879 if (vert_input_count > 0) { 880 d3d12_gfx_pipeline_state_stream_new_desc(out, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc); 881 desc->pInputElementDescs = pipeline->templates.inputs; 882 desc->NumElements = vert_input_count; 883 } 884 } 885 886 /* Last step: translate NIR shaders into DXIL modules */ 887 u_foreach_bit(stage, active_stage_mask) { 888 /* Cache hit, we can skip the compilation. */ 889 if (pipeline->templates.shaders[stage].bc) 890 continue; 891 892 if (stage == MESA_SHADER_FRAGMENT) { 893 gl_shader_stage prev_stage = 894 util_last_bit(active_stage_mask & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1; 895 /* Disable rasterization if the last geometry stage doesn't 896 * write the position. 897 */ 898 if (prev_stage == MESA_SHADER_NONE || 899 !(pipeline->templates.shaders[prev_stage].nir->info.outputs_written & VARYING_BIT_POS)) 900 continue; 901 } 902 903 D3D12_SHADER_BYTECODE *slot = 904 dzn_pipeline_get_gfx_shader_slot(out, stage); 905 906 ret = dzn_pipeline_compile_shader(device, pipeline->templates.shaders[stage].nir, slot); 907 if (ret != VK_SUCCESS) 908 return ret; 909 910 pipeline->templates.shaders[stage].bc = slot; 911 912 if (cache) 913 dzn_pipeline_cache_add_dxil_shader(cache, stages[stage].dxil_hash, stage, slot); 914 } 915 916 if (cache) 917 dzn_pipeline_cache_add_gfx_pipeline(pipeline, cache, vert_input_count, pipeline_hash, 918 dxil_hashes); 919 920 return VK_SUCCESS; 921} 922 923VkFormat 924dzn_graphics_pipeline_patch_vi_format(VkFormat format) 925{ 926 switch (format) { 927 case VK_FORMAT_A2R10G10B10_SNORM_PACK32: 928 case VK_FORMAT_A2R10G10B10_UNORM_PACK32: 929 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32: 930 case VK_FORMAT_A2R10G10B10_USCALED_PACK32: 931 case VK_FORMAT_A2B10G10R10_SNORM_PACK32: 932 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: 933 case VK_FORMAT_A2B10G10R10_USCALED_PACK32: 934 return VK_FORMAT_R32_UINT; 935 case VK_FORMAT_R8G8B8A8_SSCALED: 936 return VK_FORMAT_R8G8B8A8_SINT; 937 case VK_FORMAT_R8G8B8A8_USCALED: 938 return VK_FORMAT_R8G8B8A8_UINT; 939 case VK_FORMAT_R16G16B16A16_USCALED: 940 return VK_FORMAT_R16G16B16A16_UINT; 941 case VK_FORMAT_R16G16B16A16_SSCALED: 942 return VK_FORMAT_R16G16B16A16_SINT; 943 default: 944 return format; 945 } 946} 947 948static VkResult 949dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline, 950 const VkGraphicsPipelineCreateInfo *in, 951 D3D12_INPUT_ELEMENT_DESC *inputs, 952 enum pipe_format *vi_conversions) 953{ 954 const VkPipelineVertexInputStateCreateInfo *in_vi = 955 in->pVertexInputState; 956 const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisors = 957 (const VkPipelineVertexInputDivisorStateCreateInfoEXT *) 958 vk_find_struct_const(in_vi, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 959 960 if (!in_vi->vertexAttributeDescriptionCount) 961 return VK_SUCCESS; 962 963 D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS]; 964 965 pipeline->vb.count = 0; 966 for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) { 967 const struct VkVertexInputBindingDescription *bdesc = 968 &in_vi->pVertexBindingDescriptions[i]; 969 970 pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1); 971 pipeline->vb.strides[bdesc->binding] = bdesc->stride; 972 if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) { 973 slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; 974 } else { 975 assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX); 976 slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; 977 } 978 } 979 980 for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) { 981 const VkVertexInputAttributeDescription *attr = 982 &in_vi->pVertexAttributeDescriptions[i]; 983 const VkVertexInputBindingDivisorDescriptionEXT *divisor = NULL; 984 985 if (slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA && 986 divisors) { 987 for (uint32_t d = 0; d < divisors->vertexBindingDivisorCount; d++) { 988 if (attr->binding == divisors->pVertexBindingDivisors[d].binding) { 989 divisor = &divisors->pVertexBindingDivisors[d]; 990 break; 991 } 992 } 993 } 994 995 VkFormat patched_format = dzn_graphics_pipeline_patch_vi_format(attr->format); 996 if (patched_format != attr->format) 997 vi_conversions[attr->location] = vk_format_to_pipe_format(attr->format); 998 999 /* nir_to_dxil() name all vertex inputs as TEXCOORDx */ 1000 inputs[attr->location] = (D3D12_INPUT_ELEMENT_DESC) { 1001 .SemanticName = "TEXCOORD", 1002 .Format = dzn_buffer_get_dxgi_format(patched_format), 1003 .InputSlot = attr->binding, 1004 .InputSlotClass = slot_class[attr->binding], 1005 .InstanceDataStepRate = 1006 divisor ? divisor->divisor : 1007 slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0, 1008 .AlignedByteOffset = attr->offset, 1009 }; 1010 } 1011 1012 return VK_SUCCESS; 1013} 1014 1015static D3D12_PRIMITIVE_TOPOLOGY_TYPE 1016to_prim_topology_type(VkPrimitiveTopology in) 1017{ 1018 switch (in) { 1019 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: 1020 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; 1021 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: 1022 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: 1023 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: 1024 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: 1025 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; 1026 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: 1027 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: 1028 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: 1029 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: 1030 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: 1031 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; 1032 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: 1033 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; 1034 default: unreachable("Invalid primitive topology"); 1035 } 1036} 1037 1038static D3D12_PRIMITIVE_TOPOLOGY 1039to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points) 1040{ 1041 switch (in) { 1042 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; 1043 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST; 1044 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; 1045 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; 1046 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ; 1047 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; 1048 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; 1049 /* Triangle fans are emulated using an intermediate index buffer. */ 1050 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; 1051 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; 1052 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ; 1053 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: 1054 assert(patch_control_points); 1055 return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1); 1056 default: unreachable("Invalid primitive topology"); 1057 } 1058} 1059 1060static VkResult 1061dzn_graphics_pipeline_translate_ia(struct dzn_device *device, 1062 struct dzn_graphics_pipeline *pipeline, 1063 D3D12_PIPELINE_STATE_STREAM_DESC *out, 1064 const VkGraphicsPipelineCreateInfo *in) 1065{ 1066 const VkPipelineInputAssemblyStateCreateInfo *in_ia = 1067 in->pInputAssemblyState; 1068 bool has_tes = false; 1069 for (uint32_t i = 0; i < in->stageCount; i++) { 1070 if (in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT || 1071 in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) { 1072 has_tes = true; 1073 break; 1074 } 1075 } 1076 const VkPipelineTessellationStateCreateInfo *in_tes = 1077 has_tes ? in->pTessellationState : NULL; 1078 VkResult ret = VK_SUCCESS; 1079 1080 d3d12_gfx_pipeline_state_stream_new_desc(out, PRIMITIVE_TOPOLOGY, D3D12_PRIMITIVE_TOPOLOGY_TYPE, prim_top_type); 1081 *prim_top_type = to_prim_topology_type(in_ia->topology); 1082 pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; 1083 pipeline->ia.topology = 1084 to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0); 1085 1086 if (in_ia->primitiveRestartEnable) { 1087 d3d12_gfx_pipeline_state_stream_new_desc(out, IB_STRIP_CUT_VALUE, D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, ib_strip_cut); 1088 pipeline->templates.desc_offsets.ib_strip_cut = 1089 (uintptr_t)ib_strip_cut - (uintptr_t)out->pPipelineStateSubobjectStream; 1090 *ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; 1091 ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline); 1092 } 1093 1094 return ret; 1095} 1096 1097static D3D12_FILL_MODE 1098translate_polygon_mode(VkPolygonMode in) 1099{ 1100 switch (in) { 1101 case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID; 1102 case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME; 1103 default: unreachable("Unsupported polygon mode"); 1104 } 1105} 1106 1107static D3D12_CULL_MODE 1108translate_cull_mode(VkCullModeFlags in) 1109{ 1110 switch (in) { 1111 case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE; 1112 case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT; 1113 case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK; 1114 /* Front+back face culling is equivalent to 'rasterization disabled' */ 1115 case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE; 1116 default: unreachable("Unsupported cull mode"); 1117 } 1118} 1119 1120static int32_t 1121translate_depth_bias(double depth_bias) 1122{ 1123 if (depth_bias > INT32_MAX) 1124 return INT32_MAX; 1125 else if (depth_bias < INT32_MIN) 1126 return INT32_MIN; 1127 1128 return depth_bias; 1129} 1130 1131static void 1132dzn_graphics_pipeline_translate_rast(struct dzn_graphics_pipeline *pipeline, 1133 D3D12_PIPELINE_STATE_STREAM_DESC *out, 1134 const VkGraphicsPipelineCreateInfo *in) 1135{ 1136 const VkPipelineRasterizationStateCreateInfo *in_rast = 1137 in->pRasterizationState; 1138 const VkPipelineViewportStateCreateInfo *in_vp = 1139 in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState; 1140 1141 if (in_vp) { 1142 pipeline->vp.count = in_vp->viewportCount; 1143 if (in_vp->pViewports) { 1144 for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++) 1145 dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]); 1146 } 1147 1148 pipeline->scissor.count = in_vp->scissorCount; 1149 if (in_vp->pScissors) { 1150 for (uint32_t i = 0; i < in_vp->scissorCount; i++) 1151 dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]); 1152 } 1153 } 1154 1155 d3d12_gfx_pipeline_state_stream_new_desc(out, RASTERIZER, D3D12_RASTERIZER_DESC, desc); 1156 pipeline->templates.desc_offsets.rast = 1157 (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream; 1158 desc->DepthClipEnable = !in_rast->depthClampEnable; 1159 desc->FillMode = translate_polygon_mode(in_rast->polygonMode); 1160 desc->CullMode = translate_cull_mode(in_rast->cullMode); 1161 desc->FrontCounterClockwise = 1162 in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE; 1163 if (in_rast->depthBiasEnable) { 1164 desc->DepthBias = translate_depth_bias(in_rast->depthBiasConstantFactor); 1165 desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor; 1166 desc->DepthBiasClamp = in_rast->depthBiasClamp; 1167 } 1168 1169 assert(in_rast->lineWidth == 1.0f); 1170} 1171 1172static void 1173dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline, 1174 D3D12_PIPELINE_STATE_STREAM_DESC *out, 1175 const VkGraphicsPipelineCreateInfo *in) 1176{ 1177 const VkPipelineRasterizationStateCreateInfo *in_rast = 1178 in->pRasterizationState; 1179 const VkPipelineMultisampleStateCreateInfo *in_ms = 1180 in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState; 1181 1182 if (!in_ms) 1183 return; 1184 1185 /* TODO: minSampleShading (use VRS), alphaToOneEnable */ 1186 d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_DESC, DXGI_SAMPLE_DESC, desc); 1187 desc->Count = in_ms ? in_ms->rasterizationSamples : 1; 1188 desc->Quality = 0; 1189 1190 if (!in_ms->pSampleMask) 1191 return; 1192 1193 d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_MASK, UINT, mask); 1194 *mask = *in_ms->pSampleMask; 1195} 1196 1197static D3D12_STENCIL_OP 1198translate_stencil_op(VkStencilOp in) 1199{ 1200 switch (in) { 1201 case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP; 1202 case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO; 1203 case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE; 1204 case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT; 1205 case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT; 1206 case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR; 1207 case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR; 1208 case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT; 1209 default: unreachable("Invalid stencil op"); 1210 } 1211} 1212 1213static void 1214translate_stencil_test(struct dzn_graphics_pipeline *pipeline, 1215 D3D12_DEPTH_STENCIL_DESC1 *out, 1216 const VkGraphicsPipelineCreateInfo *in) 1217{ 1218 const VkPipelineDepthStencilStateCreateInfo *in_zsa = 1219 in->pDepthStencilState; 1220 1221 bool front_test_uses_ref = 1222 !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) && 1223 in_zsa->front.compareOp != VK_COMPARE_OP_NEVER && 1224 in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS && 1225 (pipeline->zsa.stencil_test.dynamic_compare_mask || 1226 in_zsa->front.compareMask != 0); 1227 bool back_test_uses_ref = 1228 !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) && 1229 in_zsa->back.compareOp != VK_COMPARE_OP_NEVER && 1230 in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS && 1231 (pipeline->zsa.stencil_test.dynamic_compare_mask || 1232 in_zsa->back.compareMask != 0); 1233 1234 if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask) 1235 pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX; 1236 else if (front_test_uses_ref) 1237 pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask; 1238 else 1239 pipeline->zsa.stencil_test.front.compare_mask = 0; 1240 1241 if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask) 1242 pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX; 1243 else if (back_test_uses_ref) 1244 pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask; 1245 else 1246 pipeline->zsa.stencil_test.back.compare_mask = 0; 1247 1248 bool diff_wr_mask = 1249 in->pRasterizationState->cullMode == VK_CULL_MODE_NONE && 1250 (pipeline->zsa.stencil_test.dynamic_write_mask || 1251 in_zsa->back.writeMask != in_zsa->front.writeMask); 1252 bool diff_ref = 1253 in->pRasterizationState->cullMode == VK_CULL_MODE_NONE && 1254 (pipeline->zsa.stencil_test.dynamic_ref || 1255 in_zsa->back.reference != in_zsa->front.reference); 1256 bool diff_cmp_mask = 1257 back_test_uses_ref && front_test_uses_ref && 1258 (pipeline->zsa.stencil_test.dynamic_compare_mask || 1259 pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask); 1260 1261 if (diff_cmp_mask || diff_wr_mask) 1262 pipeline->zsa.stencil_test.independent_front_back = true; 1263 1264 bool back_wr_uses_ref = 1265 !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) && 1266 ((in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS && 1267 in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) || 1268 (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER && 1269 (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) && 1270 in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) || 1271 (in_zsa->depthTestEnable && 1272 in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS && 1273 in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE)); 1274 bool front_wr_uses_ref = 1275 !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) && 1276 ((in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS && 1277 in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) || 1278 (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER && 1279 (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) && 1280 in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) || 1281 (in_zsa->depthTestEnable && 1282 in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS && 1283 in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE)); 1284 1285 pipeline->zsa.stencil_test.front.write_mask = 1286 (pipeline->zsa.stencil_test.dynamic_write_mask || 1287 (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ? 1288 0 : in_zsa->front.writeMask; 1289 pipeline->zsa.stencil_test.back.write_mask = 1290 (pipeline->zsa.stencil_test.dynamic_write_mask || 1291 (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ? 1292 0 : in_zsa->back.writeMask; 1293 1294 pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref; 1295 pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref; 1296 1297 if (diff_ref && 1298 pipeline->zsa.stencil_test.front.uses_ref && 1299 pipeline->zsa.stencil_test.back.uses_ref) 1300 pipeline->zsa.stencil_test.independent_front_back = true; 1301 1302 pipeline->zsa.stencil_test.front.ref = 1303 pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference; 1304 pipeline->zsa.stencil_test.back.ref = 1305 pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference; 1306 1307 /* FIXME: We don't support independent {compare,write}_mask and stencil 1308 * reference. Until we have proper support for independent front/back 1309 * stencil test, let's prioritize the front setup when both are active. 1310 */ 1311 out->StencilReadMask = 1312 front_test_uses_ref ? 1313 pipeline->zsa.stencil_test.front.compare_mask : 1314 back_test_uses_ref ? 1315 pipeline->zsa.stencil_test.back.compare_mask : 0; 1316 out->StencilWriteMask = 1317 pipeline->zsa.stencil_test.front.write_mask ? 1318 pipeline->zsa.stencil_test.front.write_mask : 1319 pipeline->zsa.stencil_test.back.write_mask; 1320 1321 assert(!pipeline->zsa.stencil_test.independent_front_back); 1322} 1323 1324static void 1325dzn_graphics_pipeline_translate_zsa(struct dzn_graphics_pipeline *pipeline, 1326 D3D12_PIPELINE_STATE_STREAM_DESC *out, 1327 const VkGraphicsPipelineCreateInfo *in) 1328{ 1329 const VkPipelineRasterizationStateCreateInfo *in_rast = 1330 in->pRasterizationState; 1331 const VkPipelineDepthStencilStateCreateInfo *in_zsa = 1332 in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState; 1333 1334 if (!in_zsa) 1335 return; 1336 1337 d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, desc); 1338 pipeline->templates.desc_offsets.ds = 1339 (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream; 1340 1341 desc->DepthEnable = 1342 in_zsa->depthTestEnable || in_zsa->depthBoundsTestEnable; 1343 desc->DepthWriteMask = 1344 in_zsa->depthWriteEnable ? 1345 D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; 1346 desc->DepthFunc = 1347 in_zsa->depthTestEnable ? 1348 dzn_translate_compare_op(in_zsa->depthCompareOp) : 1349 D3D12_COMPARISON_FUNC_ALWAYS; 1350 pipeline->zsa.depth_bounds.enable = in_zsa->depthBoundsTestEnable; 1351 pipeline->zsa.depth_bounds.min = in_zsa->minDepthBounds; 1352 pipeline->zsa.depth_bounds.max = in_zsa->maxDepthBounds; 1353 desc->DepthBoundsTestEnable = in_zsa->depthBoundsTestEnable; 1354 desc->StencilEnable = in_zsa->stencilTestEnable; 1355 if (in_zsa->stencilTestEnable) { 1356 desc->FrontFace.StencilFailOp = 1357 translate_stencil_op(in_zsa->front.failOp); 1358 desc->FrontFace.StencilDepthFailOp = 1359 translate_stencil_op(in_zsa->front.depthFailOp); 1360 desc->FrontFace.StencilPassOp = 1361 translate_stencil_op(in_zsa->front.passOp); 1362 desc->FrontFace.StencilFunc = 1363 dzn_translate_compare_op(in_zsa->front.compareOp); 1364 desc->BackFace.StencilFailOp = 1365 translate_stencil_op(in_zsa->back.failOp); 1366 desc->BackFace.StencilDepthFailOp = 1367 translate_stencil_op(in_zsa->back.depthFailOp); 1368 desc->BackFace.StencilPassOp = 1369 translate_stencil_op(in_zsa->back.passOp); 1370 desc->BackFace.StencilFunc = 1371 dzn_translate_compare_op(in_zsa->back.compareOp); 1372 1373 pipeline->zsa.stencil_test.enable = true; 1374 1375 translate_stencil_test(pipeline, desc, in); 1376 } 1377} 1378 1379static D3D12_BLEND 1380translate_blend_factor(VkBlendFactor in, bool is_alpha) 1381{ 1382 switch (in) { 1383 case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO; 1384 case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE; 1385 case VK_BLEND_FACTOR_SRC_COLOR: 1386 return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR; 1387 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: 1388 return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR; 1389 case VK_BLEND_FACTOR_DST_COLOR: 1390 return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR; 1391 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: 1392 return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR; 1393 case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; 1394 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; 1395 case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; 1396 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; 1397 /* FIXME: no way to isolate the alpla and color constants */ 1398 case VK_BLEND_FACTOR_CONSTANT_COLOR: 1399 case VK_BLEND_FACTOR_CONSTANT_ALPHA: 1400 return D3D12_BLEND_BLEND_FACTOR; 1401 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: 1402 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: 1403 return D3D12_BLEND_INV_BLEND_FACTOR; 1404 case VK_BLEND_FACTOR_SRC1_COLOR: 1405 return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR; 1406 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: 1407 return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR; 1408 case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA; 1409 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA; 1410 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; 1411 default: unreachable("Invalid blend factor"); 1412 } 1413} 1414 1415static D3D12_BLEND_OP 1416translate_blend_op(VkBlendOp in) 1417{ 1418 switch (in) { 1419 case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD; 1420 case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; 1421 case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; 1422 case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN; 1423 case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX; 1424 default: unreachable("Invalid blend op"); 1425 } 1426} 1427 1428static D3D12_LOGIC_OP 1429translate_logic_op(VkLogicOp in) 1430{ 1431 switch (in) { 1432 case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR; 1433 case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND; 1434 case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; 1435 case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY; 1436 case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; 1437 case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP; 1438 case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR; 1439 case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR; 1440 case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR; 1441 case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV; 1442 case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT; 1443 case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; 1444 case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; 1445 case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; 1446 case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND; 1447 case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET; 1448 default: unreachable("Invalid logic op"); 1449 } 1450} 1451 1452static void 1453dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline, 1454 D3D12_PIPELINE_STATE_STREAM_DESC *out, 1455 const VkGraphicsPipelineCreateInfo *in) 1456{ 1457 const VkPipelineRasterizationStateCreateInfo *in_rast = 1458 in->pRasterizationState; 1459 const VkPipelineColorBlendStateCreateInfo *in_blend = 1460 in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState; 1461 const VkPipelineMultisampleStateCreateInfo *in_ms = 1462 in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState; 1463 1464 if (!in_blend || !in_ms) 1465 return; 1466 1467 d3d12_gfx_pipeline_state_stream_new_desc(out, BLEND, D3D12_BLEND_DESC, desc); 1468 D3D12_LOGIC_OP logicop = 1469 in_blend->logicOpEnable ? 1470 translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP; 1471 desc->AlphaToCoverageEnable = in_ms->alphaToCoverageEnable; 1472 memcpy(pipeline->blend.constants, in_blend->blendConstants, 1473 sizeof(pipeline->blend.constants)); 1474 1475 for (uint32_t i = 0; i < in_blend->attachmentCount; i++) { 1476 if (i > 0 && 1477 !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i], 1478 sizeof(*in_blend->pAttachments))) 1479 desc->IndependentBlendEnable = true; 1480 1481 desc->RenderTarget[i].BlendEnable = 1482 in_blend->pAttachments[i].blendEnable; 1483 desc->RenderTarget[i].RenderTargetWriteMask = 1484 in_blend->pAttachments[i].colorWriteMask; 1485 1486 if (in_blend->logicOpEnable) { 1487 desc->RenderTarget[i].LogicOpEnable = true; 1488 desc->RenderTarget[i].LogicOp = logicop; 1489 } else { 1490 desc->RenderTarget[i].SrcBlend = 1491 translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false); 1492 desc->RenderTarget[i].DestBlend = 1493 translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false); 1494 desc->RenderTarget[i].BlendOp = 1495 translate_blend_op(in_blend->pAttachments[i].colorBlendOp); 1496 desc->RenderTarget[i].SrcBlendAlpha = 1497 translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true); 1498 desc->RenderTarget[i].DestBlendAlpha = 1499 translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true); 1500 desc->RenderTarget[i].BlendOpAlpha = 1501 translate_blend_op(in_blend->pAttachments[i].alphaBlendOp); 1502 } 1503 } 1504} 1505 1506 1507static void 1508dzn_pipeline_init(struct dzn_pipeline *pipeline, 1509 struct dzn_device *device, 1510 VkPipelineBindPoint type, 1511 struct dzn_pipeline_layout *layout, 1512 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc) 1513{ 1514 pipeline->type = type; 1515 pipeline->root.sets_param_count = layout->root.sets_param_count; 1516 pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx; 1517 pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx; 1518 STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type)); 1519 memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type)); 1520 pipeline->root.sig = layout->root.sig; 1521 ID3D12RootSignature_AddRef(pipeline->root.sig); 1522 1523 STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count)); 1524 memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count)); 1525 1526 STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets)); 1527 memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets)); 1528 vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); 1529 1530 ASSERTED uint32_t max_streamsz = 1531 type == VK_PIPELINE_BIND_POINT_GRAPHICS ? 1532 MAX_GFX_PIPELINE_STATE_STREAM_SIZE : 1533 MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE; 1534 1535 d3d12_pipeline_state_stream_new_desc(stream_desc, max_streamsz, ROOT_SIGNATURE, 1536 ID3D12RootSignature *, root_sig); 1537 *root_sig = pipeline->root.sig; 1538} 1539 1540static void 1541dzn_pipeline_finish(struct dzn_pipeline *pipeline) 1542{ 1543 if (pipeline->state) 1544 ID3D12PipelineState_Release(pipeline->state); 1545 if (pipeline->root.sig) 1546 ID3D12RootSignature_Release(pipeline->root.sig); 1547 1548 vk_object_base_finish(&pipeline->base); 1549} 1550 1551static void dzn_graphics_pipeline_delete_variant(struct hash_entry *he) 1552{ 1553 struct dzn_graphics_pipeline_variant *variant = he->data; 1554 1555 if (variant->state) 1556 ID3D12PipelineState_Release(variant->state); 1557} 1558 1559static void 1560dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline *pipeline) 1561{ 1562 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) { 1563 ralloc_free(pipeline->templates.shaders[i].nir); 1564 pipeline->templates.shaders[i].nir = NULL; 1565 } 1566} 1567 1568static void 1569dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline *pipeline) 1570{ 1571 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) { 1572 if (pipeline->templates.shaders[i].bc) { 1573 free((void *)pipeline->templates.shaders[i].bc->pShaderBytecode); 1574 pipeline->templates.shaders[i].bc = NULL; 1575 } 1576 } 1577} 1578 1579static void 1580dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline, 1581 const VkAllocationCallbacks *alloc) 1582{ 1583 if (!pipeline) 1584 return; 1585 1586 _mesa_hash_table_destroy(pipeline->variants, 1587 dzn_graphics_pipeline_delete_variant); 1588 1589 dzn_graphics_pipeline_cleanup_nir_shaders(pipeline); 1590 dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline); 1591 1592 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) { 1593 if (pipeline->indirect_cmd_sigs[i]) 1594 ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]); 1595 } 1596 1597 dzn_pipeline_finish(&pipeline->base); 1598 vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline); 1599} 1600 1601static VkResult 1602dzn_graphics_pipeline_create(struct dzn_device *device, 1603 VkPipelineCache cache, 1604 const VkGraphicsPipelineCreateInfo *pCreateInfo, 1605 const VkAllocationCallbacks *pAllocator, 1606 VkPipeline *out) 1607{ 1608 const VkPipelineRenderingCreateInfo *ri = (const VkPipelineRenderingCreateInfo *) 1609 vk_find_struct_const(pCreateInfo, PIPELINE_RENDERING_CREATE_INFO); 1610 VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache); 1611 VK_FROM_HANDLE(vk_render_pass, pass, pCreateInfo->renderPass); 1612 VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout); 1613 uint32_t color_count = 0; 1614 VkFormat color_fmts[MAX_RTS] = { 0 }; 1615 VkFormat zs_fmt = VK_FORMAT_UNDEFINED; 1616 VkResult ret; 1617 HRESULT hres = 0; 1618 1619 struct dzn_graphics_pipeline *pipeline = 1620 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 1621 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1622 if (!pipeline) 1623 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1624 1625 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc = &pipeline->templates.stream_desc; 1626 stream_desc->pPipelineStateSubobjectStream = pipeline->templates.stream_buf; 1627 1628 dzn_pipeline_init(&pipeline->base, device, 1629 VK_PIPELINE_BIND_POINT_GRAPHICS, 1630 layout, stream_desc); 1631 D3D12_INPUT_ELEMENT_DESC attribs[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 }; 1632 enum pipe_format vi_conversions[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 }; 1633 1634 ret = dzn_graphics_pipeline_translate_vi(pipeline, pCreateInfo, 1635 attribs, vi_conversions); 1636 if (ret != VK_SUCCESS) 1637 goto out; 1638 1639 if (pCreateInfo->pDynamicState) { 1640 for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) { 1641 switch (pCreateInfo->pDynamicState->pDynamicStates[i]) { 1642 case VK_DYNAMIC_STATE_VIEWPORT: 1643 pipeline->vp.dynamic = true; 1644 break; 1645 case VK_DYNAMIC_STATE_SCISSOR: 1646 pipeline->scissor.dynamic = true; 1647 break; 1648 case VK_DYNAMIC_STATE_STENCIL_REFERENCE: 1649 pipeline->zsa.stencil_test.dynamic_ref = true; 1650 break; 1651 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: 1652 pipeline->zsa.stencil_test.dynamic_compare_mask = true; 1653 break; 1654 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: 1655 pipeline->zsa.stencil_test.dynamic_write_mask = true; 1656 break; 1657 case VK_DYNAMIC_STATE_BLEND_CONSTANTS: 1658 pipeline->blend.dynamic_constants = true; 1659 break; 1660 case VK_DYNAMIC_STATE_DEPTH_BOUNDS: 1661 pipeline->zsa.depth_bounds.dynamic = true; 1662 break; 1663 case VK_DYNAMIC_STATE_DEPTH_BIAS: 1664 pipeline->zsa.dynamic_depth_bias = true; 1665 ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline); 1666 if (ret) 1667 goto out; 1668 break; 1669 case VK_DYNAMIC_STATE_LINE_WIDTH: 1670 /* Nothing to do since we just support lineWidth = 1. */ 1671 break; 1672 default: unreachable("Unsupported dynamic state"); 1673 } 1674 } 1675 } 1676 1677 ret = dzn_graphics_pipeline_translate_ia(device, pipeline, stream_desc, pCreateInfo); 1678 if (ret) 1679 goto out; 1680 1681 dzn_graphics_pipeline_translate_rast(pipeline, stream_desc, pCreateInfo); 1682 dzn_graphics_pipeline_translate_ms(pipeline, stream_desc, pCreateInfo); 1683 dzn_graphics_pipeline_translate_zsa(pipeline, stream_desc, pCreateInfo); 1684 dzn_graphics_pipeline_translate_blend(pipeline, stream_desc, pCreateInfo); 1685 1686 if (pass) { 1687 const struct vk_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; 1688 color_count = subpass->color_count; 1689 for (uint32_t i = 0; i < subpass->color_count; i++) { 1690 uint32_t idx = subpass->color_attachments[i].attachment; 1691 1692 if (idx == VK_ATTACHMENT_UNUSED) continue; 1693 1694 const struct vk_render_pass_attachment *attachment = 1695 &pass->attachments[idx]; 1696 1697 color_fmts[i] = attachment->format; 1698 } 1699 1700 if (subpass->depth_stencil_attachment && 1701 subpass->depth_stencil_attachment->attachment != VK_ATTACHMENT_UNUSED) { 1702 const struct vk_render_pass_attachment *attachment = 1703 &pass->attachments[subpass->depth_stencil_attachment->attachment]; 1704 1705 zs_fmt = attachment->format; 1706 } 1707 } else if (ri) { 1708 color_count = ri->colorAttachmentCount; 1709 memcpy(color_fmts, ri->pColorAttachmentFormats, 1710 sizeof(color_fmts[0]) * color_count); 1711 if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED) 1712 zs_fmt = ri->depthAttachmentFormat; 1713 else if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) 1714 zs_fmt = ri->stencilAttachmentFormat; 1715 } 1716 1717 if (color_count > 0) { 1718 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, RENDER_TARGET_FORMATS, struct D3D12_RT_FORMAT_ARRAY, rts); 1719 rts->NumRenderTargets = color_count; 1720 for (uint32_t i = 0; i < color_count; i++) { 1721 rts->RTFormats[i] = 1722 dzn_image_get_dxgi_format(color_fmts[i], 1723 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 1724 VK_IMAGE_ASPECT_COLOR_BIT); 1725 } 1726 } 1727 1728 if (zs_fmt != VK_FORMAT_UNDEFINED) { 1729 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, DEPTH_STENCIL_FORMAT, DXGI_FORMAT, ds_fmt); 1730 *ds_fmt = 1731 dzn_image_get_dxgi_format(zs_fmt, 1732 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 1733 VK_IMAGE_ASPECT_DEPTH_BIT | 1734 VK_IMAGE_ASPECT_STENCIL_BIT); 1735 } 1736 1737 ret = dzn_graphics_pipeline_compile_shaders(device, pipeline, pcache, 1738 layout, stream_desc, 1739 attribs, vi_conversions, 1740 pCreateInfo); 1741 if (ret != VK_SUCCESS) 1742 goto out; 1743 1744 if (!pipeline->variants) { 1745 hres = ID3D12Device2_CreatePipelineState(device->dev, stream_desc, 1746 &IID_ID3D12PipelineState, 1747 (void **)&pipeline->base.state); 1748 if (FAILED(hres)) { 1749 ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1750 goto out; 1751 } 1752 1753 dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline); 1754 } 1755 1756 dzn_graphics_pipeline_cleanup_nir_shaders(pipeline); 1757 ret = VK_SUCCESS; 1758 1759out: 1760 if (ret != VK_SUCCESS) 1761 dzn_graphics_pipeline_destroy(pipeline, pAllocator); 1762 else 1763 *out = dzn_graphics_pipeline_to_handle(pipeline); 1764 1765 return ret; 1766} 1767 1768ID3D12PipelineState * 1769dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline *pipeline, 1770 const struct dzn_graphics_pipeline_variant_key *key) 1771{ 1772 if (!pipeline->variants) 1773 return pipeline->base.state; 1774 1775 struct dzn_graphics_pipeline_variant_key masked_key = { 0 }; 1776 1777 if (dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) 1778 masked_key.ib_strip_cut = key->ib_strip_cut; 1779 1780 if (dzn_graphics_pipeline_get_desc_template(pipeline, rast) && 1781 pipeline->zsa.dynamic_depth_bias) 1782 masked_key.depth_bias = key->depth_bias; 1783 1784 const D3D12_DEPTH_STENCIL_DESC1 *ds_templ = 1785 dzn_graphics_pipeline_get_desc_template(pipeline, ds); 1786 if (ds_templ && ds_templ->StencilEnable) { 1787 if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER && 1788 ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) 1789 masked_key.stencil_test.front.compare_mask = key->stencil_test.front.compare_mask; 1790 if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER && 1791 ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) 1792 masked_key.stencil_test.back.compare_mask = key->stencil_test.back.compare_mask; 1793 if (pipeline->zsa.stencil_test.dynamic_write_mask) { 1794 masked_key.stencil_test.front.write_mask = key->stencil_test.front.write_mask; 1795 masked_key.stencil_test.back.write_mask = key->stencil_test.back.write_mask; 1796 } 1797 } 1798 1799 struct dzn_device *device = 1800 container_of(pipeline->base.base.device, struct dzn_device, vk); 1801 struct hash_entry *he = 1802 _mesa_hash_table_search(pipeline->variants, &masked_key); 1803 1804 struct dzn_graphics_pipeline_variant *variant; 1805 1806 if (!he) { 1807 variant = rzalloc(pipeline->variants, struct dzn_graphics_pipeline_variant); 1808 variant->key = masked_key; 1809 1810 uintptr_t stream_buf[MAX_GFX_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)]; 1811 D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = { 1812 .SizeInBytes = pipeline->templates.stream_desc.SizeInBytes, 1813 .pPipelineStateSubobjectStream = stream_buf, 1814 }; 1815 1816 memcpy(stream_buf, pipeline->templates.stream_buf, stream_desc.SizeInBytes); 1817 1818 D3D12_INDEX_BUFFER_STRIP_CUT_VALUE *ib_strip_cut = 1819 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ib_strip_cut); 1820 if (ib_strip_cut) 1821 *ib_strip_cut = masked_key.ib_strip_cut; 1822 1823 D3D12_RASTERIZER_DESC *rast = 1824 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, rast); 1825 if (rast && pipeline->zsa.dynamic_depth_bias) { 1826 rast->DepthBias = translate_depth_bias(masked_key.depth_bias.constant_factor); 1827 rast->DepthBiasClamp = masked_key.depth_bias.clamp; 1828 rast->SlopeScaledDepthBias = masked_key.depth_bias.slope_factor; 1829 } 1830 1831 D3D12_DEPTH_STENCIL_DESC1 *ds = 1832 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds); 1833 if (ds && ds->StencilEnable) { 1834 if (pipeline->zsa.stencil_test.dynamic_compare_mask) { 1835 if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER && 1836 ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) { 1837 ds->StencilReadMask = masked_key.stencil_test.front.compare_mask; 1838 } 1839 1840 if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER && 1841 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) { 1842 ds->StencilReadMask = masked_key.stencil_test.back.compare_mask; 1843 } 1844 1845 if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER && 1846 ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS && 1847 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER && 1848 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) 1849 assert(masked_key.stencil_test.front.compare_mask == masked_key.stencil_test.back.compare_mask); 1850 } 1851 1852 if (pipeline->zsa.stencil_test.dynamic_write_mask) { 1853 assert(!masked_key.stencil_test.front.write_mask || 1854 !masked_key.stencil_test.back.write_mask || 1855 masked_key.stencil_test.front.write_mask == masked_key.stencil_test.back.write_mask); 1856 ds->StencilWriteMask = 1857 masked_key.stencil_test.front.write_mask | 1858 masked_key.stencil_test.back.write_mask; 1859 } 1860 } 1861 1862 ASSERTED HRESULT hres = ID3D12Device2_CreatePipelineState(device->dev, &stream_desc, 1863 &IID_ID3D12PipelineState, 1864 (void**)(&variant->state)); 1865 assert(!FAILED(hres)); 1866 he = _mesa_hash_table_insert(pipeline->variants, &variant->key, variant); 1867 assert(he); 1868 } else { 1869 variant = he->data; 1870 } 1871 1872 if (variant->state) 1873 ID3D12PipelineState_AddRef(variant->state); 1874 1875 if (pipeline->base.state) 1876 ID3D12PipelineState_Release(pipeline->base.state); 1877 1878 pipeline->base.state = variant->state; 1879 return variant->state; 1880} 1881 1882#define DZN_INDIRECT_CMD_SIG_MAX_ARGS 4 1883 1884ID3D12CommandSignature * 1885dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline, 1886 enum dzn_indirect_draw_cmd_sig_type type) 1887{ 1888 assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS); 1889 1890 struct dzn_device *device = 1891 container_of(pipeline->base.base.device, struct dzn_device, vk); 1892 ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type]; 1893 1894 if (cmdsig) 1895 return cmdsig; 1896 1897 bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG; 1898 bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan; 1899 1900 uint32_t cmd_arg_count = 0; 1901 D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS]; 1902 1903 if (triangle_fan) { 1904 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) { 1905 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW, 1906 }; 1907 } 1908 1909 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) { 1910 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, 1911 .Constant = { 1912 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, 1913 .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4, 1914 .Num32BitValuesToSet = 2, 1915 }, 1916 }; 1917 1918 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) { 1919 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, 1920 .Constant = { 1921 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, 1922 .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4, 1923 .Num32BitValuesToSet = 1, 1924 }, 1925 }; 1926 1927 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) { 1928 .Type = indexed ? 1929 D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED : 1930 D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, 1931 }; 1932 1933 assert(cmd_arg_count <= ARRAY_SIZE(cmd_args)); 1934 assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0); 1935 1936 D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = { 1937 .ByteStride = 1938 triangle_fan ? 1939 sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) : 1940 sizeof(struct dzn_indirect_draw_exec_params), 1941 .NumArgumentDescs = cmd_arg_count, 1942 .pArgumentDescs = cmd_args, 1943 }; 1944 HRESULT hres = 1945 ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc, 1946 pipeline->base.root.sig, 1947 &IID_ID3D12CommandSignature, 1948 (void **)&cmdsig); 1949 if (FAILED(hres)) 1950 return NULL; 1951 1952 pipeline->indirect_cmd_sigs[type] = cmdsig; 1953 return cmdsig; 1954} 1955 1956VKAPI_ATTR VkResult VKAPI_CALL 1957dzn_CreateGraphicsPipelines(VkDevice dev, 1958 VkPipelineCache pipelineCache, 1959 uint32_t count, 1960 const VkGraphicsPipelineCreateInfo *pCreateInfos, 1961 const VkAllocationCallbacks *pAllocator, 1962 VkPipeline *pPipelines) 1963{ 1964 VK_FROM_HANDLE(dzn_device, device, dev); 1965 VkResult result = VK_SUCCESS; 1966 1967 unsigned i; 1968 for (i = 0; i < count; i++) { 1969 result = dzn_graphics_pipeline_create(device, 1970 pipelineCache, 1971 &pCreateInfos[i], 1972 pAllocator, 1973 &pPipelines[i]); 1974 if (result != VK_SUCCESS) { 1975 pPipelines[i] = VK_NULL_HANDLE; 1976 1977 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it 1978 * is not obvious what error should be report upon 2 different failures. 1979 */ 1980 if (result != VK_PIPELINE_COMPILE_REQUIRED) 1981 break; 1982 1983 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 1984 break; 1985 } 1986 } 1987 1988 for (; i < count; i++) 1989 pPipelines[i] = VK_NULL_HANDLE; 1990 1991 return result; 1992} 1993 1994static void 1995dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline, 1996 const VkAllocationCallbacks *alloc) 1997{ 1998 if (!pipeline) 1999 return; 2000 2001 if (pipeline->indirect_cmd_sig) 2002 ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig); 2003 2004 dzn_pipeline_finish(&pipeline->base); 2005 vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline); 2006} 2007 2008static VkResult 2009dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache *cache, 2010 uint8_t *pipeline_hash, 2011 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc, 2012 D3D12_SHADER_BYTECODE *dxil, 2013 bool *cache_hit) 2014{ 2015 *cache_hit = false; 2016 2017 if (!cache) 2018 return VK_SUCCESS; 2019 2020 struct vk_pipeline_cache_object *cache_obj = NULL; 2021 2022 cache_obj = 2023 vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH, 2024 &dzn_cached_blob_ops, 2025 NULL); 2026 if (!cache_obj) 2027 return VK_SUCCESS; 2028 2029 struct dzn_cached_blob *cached_blob = 2030 container_of(cache_obj, struct dzn_cached_blob, base); 2031 2032 assert(cached_blob->size == SHA1_DIGEST_LENGTH); 2033 2034 const uint8_t *dxil_hash = cached_blob->data; 2035 gl_shader_stage stage; 2036 2037 VkResult ret = 2038 dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, dxil); 2039 2040 if (ret != VK_SUCCESS || stage == MESA_SHADER_NONE) 2041 goto out; 2042 2043 assert(stage == MESA_SHADER_COMPUTE); 2044 2045 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, slot); 2046 *slot = *dxil; 2047 *cache_hit = true; 2048 2049out: 2050 vk_pipeline_cache_object_unref(cache_obj); 2051 return ret; 2052} 2053 2054static void 2055dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache *cache, 2056 uint8_t *pipeline_hash, 2057 uint8_t *dxil_hash) 2058{ 2059 struct vk_pipeline_cache_object *cache_obj = 2060 dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, SHA1_DIGEST_LENGTH); 2061 if (!cache_obj) 2062 return; 2063 2064 struct dzn_cached_blob *cached_blob = 2065 container_of(cache_obj, struct dzn_cached_blob, base); 2066 2067 memcpy((void *)cached_blob->data, dxil_hash, SHA1_DIGEST_LENGTH); 2068 2069 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); 2070 vk_pipeline_cache_object_unref(cache_obj); 2071} 2072 2073static VkResult 2074dzn_compute_pipeline_compile_shader(struct dzn_device *device, 2075 struct dzn_compute_pipeline *pipeline, 2076 struct vk_pipeline_cache *cache, 2077 const struct dzn_pipeline_layout *layout, 2078 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc, 2079 D3D12_SHADER_BYTECODE *shader, 2080 const VkComputePipelineCreateInfo *info) 2081{ 2082 uint8_t spirv_hash[SHA1_DIGEST_LENGTH], pipeline_hash[SHA1_DIGEST_LENGTH]; 2083 VkResult ret = VK_SUCCESS; 2084 nir_shader *nir = NULL; 2085 2086 if (cache) { 2087 struct mesa_sha1 pipeline_hash_ctx; 2088 2089 _mesa_sha1_init(&pipeline_hash_ctx); 2090 vk_pipeline_hash_shader_stage(&info->stage, spirv_hash); 2091 _mesa_sha1_update(&pipeline_hash_ctx, spirv_hash, sizeof(spirv_hash)); 2092 _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[MESA_SHADER_COMPUTE].hash, 2093 sizeof(layout->stages[MESA_SHADER_COMPUTE].hash)); 2094 _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash); 2095 2096 bool cache_hit = false; 2097 ret = dzn_pipeline_cache_lookup_compute_pipeline(cache, pipeline_hash, 2098 stream_desc, shader, 2099 &cache_hit); 2100 if (ret != VK_SUCCESS || cache_hit) 2101 goto out; 2102 } 2103 2104 ret = dzn_pipeline_get_nir_shader(device, layout, cache, spirv_hash, 2105 &info->stage, MESA_SHADER_COMPUTE, 2106 DXIL_SPIRV_YZ_FLIP_NONE, 0, 0, 2107 false, NULL, 2108 dxil_get_nir_compiler_options(), &nir); 2109 if (ret != VK_SUCCESS) 2110 return ret; 2111 2112 uint8_t bindings_hash[SHA1_DIGEST_LENGTH], dxil_hash[SHA1_DIGEST_LENGTH]; 2113 2114 NIR_PASS_V(nir, adjust_var_bindings, layout, cache ? bindings_hash : NULL); 2115 2116 if (cache) { 2117 struct mesa_sha1 dxil_hash_ctx; 2118 2119 _mesa_sha1_init(&dxil_hash_ctx); 2120 _mesa_sha1_update(&dxil_hash_ctx, spirv_hash, sizeof(spirv_hash)); 2121 _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash)); 2122 _mesa_sha1_final(&dxil_hash_ctx, dxil_hash); 2123 2124 gl_shader_stage stage; 2125 2126 ret = dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, shader); 2127 if (ret != VK_SUCCESS) 2128 goto out; 2129 2130 if (stage != MESA_SHADER_NONE) { 2131 assert(stage == MESA_SHADER_COMPUTE); 2132 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs); 2133 *cs = *shader; 2134 dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash); 2135 goto out; 2136 } 2137 } 2138 2139 ret = dzn_pipeline_compile_shader(device, nir, shader); 2140 if (ret != VK_SUCCESS) 2141 goto out; 2142 2143 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs); 2144 *cs = *shader; 2145 2146 if (cache) { 2147 dzn_pipeline_cache_add_dxil_shader(cache, dxil_hash, MESA_SHADER_COMPUTE, shader); 2148 dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash); 2149 } 2150 2151out: 2152 ralloc_free(nir); 2153 return ret; 2154} 2155 2156static VkResult 2157dzn_compute_pipeline_create(struct dzn_device *device, 2158 VkPipelineCache cache, 2159 const VkComputePipelineCreateInfo *pCreateInfo, 2160 const VkAllocationCallbacks *pAllocator, 2161 VkPipeline *out) 2162{ 2163 VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout); 2164 VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache); 2165 2166 struct dzn_compute_pipeline *pipeline = 2167 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 2168 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2169 if (!pipeline) 2170 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2171 2172 uintptr_t state_buf[MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)]; 2173 D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = { 2174 .pPipelineStateSubobjectStream = state_buf, 2175 }; 2176 2177 dzn_pipeline_init(&pipeline->base, device, 2178 VK_PIPELINE_BIND_POINT_COMPUTE, 2179 layout, &stream_desc); 2180 2181 D3D12_SHADER_BYTECODE shader = { 0 }; 2182 VkResult ret = 2183 dzn_compute_pipeline_compile_shader(device, pipeline, pcache, layout, 2184 &stream_desc, &shader, pCreateInfo); 2185 if (ret != VK_SUCCESS) 2186 goto out; 2187 2188 if (FAILED(ID3D12Device2_CreatePipelineState(device->dev, &stream_desc, 2189 &IID_ID3D12PipelineState, 2190 (void **)&pipeline->base.state))) 2191 ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2192 2193out: 2194 free((void *)shader.pShaderBytecode); 2195 if (ret != VK_SUCCESS) 2196 dzn_compute_pipeline_destroy(pipeline, pAllocator); 2197 else 2198 *out = dzn_compute_pipeline_to_handle(pipeline); 2199 2200 return ret; 2201} 2202 2203ID3D12CommandSignature * 2204dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline) 2205{ 2206 if (pipeline->indirect_cmd_sig) 2207 return pipeline->indirect_cmd_sig; 2208 2209 struct dzn_device *device = 2210 container_of(pipeline->base.base.device, struct dzn_device, vk); 2211 2212 D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = { 2213 { 2214 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, 2215 .Constant = { 2216 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, 2217 .DestOffsetIn32BitValues = 0, 2218 .Num32BitValuesToSet = 3, 2219 }, 2220 }, 2221 { 2222 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, 2223 }, 2224 }; 2225 2226 D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = { 2227 .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, 2228 .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args), 2229 .pArgumentDescs = indirect_dispatch_args, 2230 }; 2231 2232 HRESULT hres = 2233 ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc, 2234 pipeline->base.root.sig, 2235 &IID_ID3D12CommandSignature, 2236 (void **)&pipeline->indirect_cmd_sig); 2237 if (FAILED(hres)) 2238 return NULL; 2239 2240 return pipeline->indirect_cmd_sig; 2241} 2242 2243VKAPI_ATTR VkResult VKAPI_CALL 2244dzn_CreateComputePipelines(VkDevice dev, 2245 VkPipelineCache pipelineCache, 2246 uint32_t count, 2247 const VkComputePipelineCreateInfo *pCreateInfos, 2248 const VkAllocationCallbacks *pAllocator, 2249 VkPipeline *pPipelines) 2250{ 2251 VK_FROM_HANDLE(dzn_device, device, dev); 2252 VkResult result = VK_SUCCESS; 2253 2254 unsigned i; 2255 for (i = 0; i < count; i++) { 2256 result = dzn_compute_pipeline_create(device, 2257 pipelineCache, 2258 &pCreateInfos[i], 2259 pAllocator, 2260 &pPipelines[i]); 2261 if (result != VK_SUCCESS) { 2262 pPipelines[i] = VK_NULL_HANDLE; 2263 2264 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it 2265 * is not obvious what error should be report upon 2 different failures. 2266 */ 2267 if (result != VK_PIPELINE_COMPILE_REQUIRED) 2268 break; 2269 2270 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 2271 break; 2272 } 2273 } 2274 2275 for (; i < count; i++) 2276 pPipelines[i] = VK_NULL_HANDLE; 2277 2278 return result; 2279} 2280 2281VKAPI_ATTR void VKAPI_CALL 2282dzn_DestroyPipeline(VkDevice device, 2283 VkPipeline pipeline, 2284 const VkAllocationCallbacks *pAllocator) 2285{ 2286 VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline); 2287 2288 if (!pipe) 2289 return; 2290 2291 if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) { 2292 struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base); 2293 dzn_graphics_pipeline_destroy(gfx, pAllocator); 2294 } else { 2295 assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE); 2296 struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base); 2297 dzn_compute_pipeline_destroy(compute, pAllocator); 2298 } 2299} 2300