1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "dzn_private.h" 25 26#include "vk_alloc.h" 27#include "vk_debug_report.h" 28#include "vk_format.h" 29#include "vk_util.h" 30 31 32static void 33dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf, 34 D3D12_RESOURCE_BARRIER *barriers, 35 uint32_t barrier_count) 36{ 37 uint32_t flush_count = 0; 38 for (uint32_t b = 0; b < barrier_count; b++) { 39 assert(barriers[b].Transition.pResource); 40 41 /* some layouts map to the same states, and NOP-barriers are illegal */ 42 if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) { 43 if (flush_count) { 44 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count, 45 &barriers[b - flush_count]); 46 flush_count = 0; 47 } 48 } else { 49 flush_count++; 50 } 51 } 52 53 if (flush_count) 54 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count, 55 &barriers[barrier_count - flush_count]); 56 57 /* Set Before = After so we don't execute the same barrier twice. */ 58 for (uint32_t b = 0; b < barrier_count; b++) 59 barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter; 60} 61 62static void 63dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf, 64 ID3D12Resource *res, 65 uint32_t first_subres, 66 uint32_t subres_count) 67{ 68 struct hash_entry *he = 69 _mesa_hash_table_search(cmdbuf->transition_barriers, res); 70 D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL; 71 72 if (!barriers) 73 return; 74 75 dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count); 76} 77 78enum dzn_queue_transition_flags { 79 DZN_QUEUE_TRANSITION_FLUSH = 1 << 0, 80 DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1, 81}; 82 83static VkResult 84dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf, 85 ID3D12Resource *res, 86 uint32_t first_subres, 87 uint32_t subres_count, 88 D3D12_RESOURCE_STATES before, 89 D3D12_RESOURCE_STATES after, 90 uint32_t flags) 91{ 92 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 93 struct hash_entry *he = 94 _mesa_hash_table_search(cmdbuf->transition_barriers, res); 95 struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL; 96 97 if (!barriers) { 98 D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res); 99 D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 }; 100 ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info)); 101 uint32_t barrier_count = 102 fmt_info.PlaneCount * 103 desc.MipLevels * desc.DepthOrArraySize; 104 105 barriers = 106 vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count, 107 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 108 if (!barriers) { 109 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 110 return cmdbuf->error; 111 } 112 113 he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers); 114 if (!he) { 115 vk_free(&cmdbuf->vk.pool->alloc, barriers); 116 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 117 return cmdbuf->error; 118 } 119 } 120 121 for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) { 122 if (!barriers[subres].Transition.pResource) { 123 barriers[subres] = (D3D12_RESOURCE_BARRIER) { 124 .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, 125 .Flags = 0, 126 .Transition = { 127 .pResource = res, 128 .Subresource = subres, 129 .StateBefore = before, 130 .StateAfter = after, 131 }, 132 }; 133 } else { 134 if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED) 135 before = barriers[subres].Transition.StateAfter; 136 137 assert(barriers[subres].Transition.StateAfter == before || 138 barriers[subres].Transition.StateAfter == after); 139 barriers[subres].Transition.StateAfter = after; 140 } 141 } 142 143 if (flags & DZN_QUEUE_TRANSITION_FLUSH) 144 dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count); 145 146 return VK_SUCCESS; 147} 148 149static VkResult 150dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf, 151 const struct dzn_image *image, 152 const VkImageSubresourceRange *range, 153 D3D12_RESOURCE_STATES before, 154 D3D12_RESOURCE_STATES after, 155 uint32_t flags) 156{ 157 uint32_t first_barrier = 0, barrier_count = 0; 158 VkResult ret = VK_SUCCESS; 159 160 dzn_foreach_aspect(aspect, range->aspectMask) { 161 uint32_t layer_count = dzn_get_layer_count(image, range); 162 uint32_t level_count = dzn_get_level_count(image, range); 163 for (uint32_t layer = 0; layer < layer_count; layer++) { 164 uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer); 165 if (!barrier_count) { 166 first_barrier = subres; 167 barrier_count = level_count; 168 continue; 169 } else if (first_barrier + barrier_count == subres) { 170 barrier_count += level_count; 171 continue; 172 } 173 174 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res, 175 first_barrier, barrier_count, 176 before, after, flags); 177 if (ret != VK_SUCCESS) 178 return ret; 179 180 barrier_count = 0; 181 } 182 183 if (barrier_count) { 184 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res, 185 first_barrier, barrier_count, 186 before, after, flags); 187 if (ret != VK_SUCCESS) 188 return ret; 189 } 190 } 191 192 return VK_SUCCESS; 193} 194 195static VkResult 196dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf, 197 const struct dzn_image *image, 198 const VkImageSubresourceRange *range, 199 VkImageLayout old_layout, 200 VkImageLayout new_layout, 201 uint32_t flags) 202{ 203 uint32_t first_barrier = 0, barrier_count = 0; 204 VkResult ret = VK_SUCCESS; 205 206 if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED) 207 flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED; 208 209 dzn_foreach_aspect(aspect, range->aspectMask) { 210 D3D12_RESOURCE_STATES after = 211 dzn_image_layout_to_state(image, new_layout, aspect); 212 D3D12_RESOURCE_STATES before = 213 (old_layout == VK_IMAGE_LAYOUT_UNDEFINED || 214 old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ? 215 image->mem->initial_state : 216 dzn_image_layout_to_state(image, old_layout, aspect); 217 218 uint32_t layer_count = dzn_get_layer_count(image, range); 219 uint32_t level_count = dzn_get_level_count(image, range); 220 for (uint32_t layer = 0; layer < layer_count; layer++) { 221 uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer); 222 if (!barrier_count) { 223 first_barrier = subres; 224 barrier_count = level_count; 225 continue; 226 } else if (first_barrier + barrier_count == subres) { 227 barrier_count += level_count; 228 continue; 229 } 230 231 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res, 232 first_barrier, barrier_count, 233 before, after, flags); 234 if (ret != VK_SUCCESS) 235 return ret; 236 237 barrier_count = 0; 238 } 239 240 if (barrier_count) { 241 ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res, 242 first_barrier, barrier_count, 243 before, after, flags); 244 if (ret != VK_SUCCESS) 245 return ret; 246 } 247 } 248 249 return VK_SUCCESS; 250} 251 252static void 253dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf) 254{ 255 if (!cbuf) 256 return; 257 258 struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk); 259 260 if (cmdbuf->cmdlist) 261 ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist); 262 263 if (cmdbuf->cmdalloc) 264 ID3D12CommandAllocator_Release(cmdbuf->cmdalloc); 265 266 list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) { 267 list_del(&res->link); 268 ID3D12Resource_Release(res->res); 269 vk_free(&cbuf->pool->alloc, res); 270 } 271 272 dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool); 273 dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool); 274 dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool); 275 dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool); 276 util_dynarray_fini(&cmdbuf->events.wait); 277 util_dynarray_fini(&cmdbuf->events.signal); 278 util_dynarray_fini(&cmdbuf->queries.reset); 279 util_dynarray_fini(&cmdbuf->queries.wait); 280 util_dynarray_fini(&cmdbuf->queries.signal); 281 282 if (cmdbuf->rtvs.ht) { 283 hash_table_foreach(cmdbuf->rtvs.ht, he) 284 vk_free(&cbuf->pool->alloc, he->data); 285 _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL); 286 } 287 288 if (cmdbuf->dsvs.ht) { 289 hash_table_foreach(cmdbuf->dsvs.ht, he) 290 vk_free(&cbuf->pool->alloc, he->data); 291 _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL); 292 } 293 294 if (cmdbuf->events.ht) 295 _mesa_hash_table_destroy(cmdbuf->events.ht, NULL); 296 297 if (cmdbuf->queries.ht) { 298 hash_table_foreach(cmdbuf->queries.ht, he) { 299 struct dzn_cmd_buffer_query_pool_state *qpstate = he->data; 300 util_dynarray_fini(&qpstate->reset); 301 util_dynarray_fini(&qpstate->collect); 302 util_dynarray_fini(&qpstate->wait); 303 util_dynarray_fini(&qpstate->signal); 304 vk_free(&cbuf->pool->alloc, he->data); 305 } 306 _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL); 307 } 308 309 if (cmdbuf->transition_barriers) { 310 hash_table_foreach(cmdbuf->transition_barriers, he) 311 vk_free(&cbuf->pool->alloc, he->data); 312 _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL); 313 } 314 315 vk_command_buffer_finish(&cmdbuf->vk); 316 vk_free(&cbuf->pool->alloc, cmdbuf); 317} 318 319static uint32_t 320dzn_cmd_buffer_rtv_key_hash_function(const void *key) 321{ 322 return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key)); 323} 324 325static bool 326dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b) 327{ 328 return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0; 329} 330 331static uint32_t 332dzn_cmd_buffer_dsv_key_hash_function(const void *key) 333{ 334 return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key)); 335} 336 337static bool 338dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b) 339{ 340 return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0; 341} 342 343static VkResult 344dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info, 345 VkCommandBuffer *out) 346{ 347 VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool); 348 struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk); 349 struct dzn_physical_device *pdev = 350 container_of(device->vk.physical, struct dzn_physical_device, vk); 351 352 assert(pool->queue_family_index < pdev->queue_family_count); 353 354 D3D12_COMMAND_LIST_TYPE type = 355 pdev->queue_families[pool->queue_family_index].desc.Type; 356 357 struct dzn_cmd_buffer *cmdbuf = 358 vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 359 if (!cmdbuf) 360 return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); 361 362 VkResult result = 363 vk_command_buffer_init(&cmdbuf->vk, pool, info->level); 364 if (result != VK_SUCCESS) { 365 vk_free(&pool->alloc, cmdbuf); 366 return result; 367 } 368 369 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); 370 list_inithead(&cmdbuf->internal_bufs); 371 util_dynarray_init(&cmdbuf->events.wait, NULL); 372 util_dynarray_init(&cmdbuf->events.signal, NULL); 373 util_dynarray_init(&cmdbuf->queries.reset, NULL); 374 util_dynarray_init(&cmdbuf->queries.wait, NULL); 375 util_dynarray_init(&cmdbuf->queries.signal, NULL); 376 dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device, 377 D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 378 false, &pool->alloc); 379 dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device, 380 D3D12_DESCRIPTOR_HEAP_TYPE_DSV, 381 false, &pool->alloc); 382 dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device, 383 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 384 true, &pool->alloc); 385 dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device, 386 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 387 true, &pool->alloc); 388 389 cmdbuf->events.ht = 390 _mesa_pointer_hash_table_create(NULL); 391 cmdbuf->queries.ht = 392 _mesa_pointer_hash_table_create(NULL); 393 cmdbuf->transition_barriers = 394 _mesa_pointer_hash_table_create(NULL); 395 cmdbuf->rtvs.ht = 396 _mesa_hash_table_create(NULL, 397 dzn_cmd_buffer_rtv_key_hash_function, 398 dzn_cmd_buffer_rtv_key_equals_function); 399 cmdbuf->dsvs.ht = 400 _mesa_hash_table_create(NULL, 401 dzn_cmd_buffer_dsv_key_hash_function, 402 dzn_cmd_buffer_dsv_key_equals_function); 403 if (!cmdbuf->events.ht || !cmdbuf->queries.ht || 404 !cmdbuf->transition_barriers || 405 !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) { 406 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 407 goto out; 408 } 409 410 cmdbuf->vk.destroy = dzn_cmd_buffer_destroy; 411 412 if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type, 413 &IID_ID3D12CommandAllocator, 414 (void **)&cmdbuf->cmdalloc))) { 415 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 416 goto out; 417 } 418 419 if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, type, 420 cmdbuf->cmdalloc, NULL, 421 &IID_ID3D12GraphicsCommandList1, 422 (void **)&cmdbuf->cmdlist))) { 423 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 424 goto out; 425 } 426 427out: 428 if (result != VK_SUCCESS) 429 dzn_cmd_buffer_destroy(&cmdbuf->vk); 430 else 431 *out = dzn_cmd_buffer_to_handle(cmdbuf); 432 433 return result; 434} 435 436static VkResult 437dzn_cmd_buffer_reset(struct dzn_cmd_buffer *cmdbuf) 438{ 439 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 440 const struct dzn_physical_device *pdev = 441 container_of(device->vk.physical, struct dzn_physical_device, vk); 442 const struct vk_command_pool *pool = cmdbuf->vk.pool; 443 444 /* Reset the state */ 445 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); 446 447 /* TODO: Return resources to the pool */ 448 list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) { 449 list_del(&res->link); 450 ID3D12Resource_Release(res->res); 451 vk_free(&cmdbuf->vk.pool->alloc, res); 452 } 453 454 cmdbuf->error = VK_SUCCESS; 455 util_dynarray_clear(&cmdbuf->events.wait); 456 util_dynarray_clear(&cmdbuf->events.signal); 457 util_dynarray_clear(&cmdbuf->queries.reset); 458 util_dynarray_clear(&cmdbuf->queries.wait); 459 util_dynarray_clear(&cmdbuf->queries.signal); 460 hash_table_foreach(cmdbuf->rtvs.ht, he) 461 vk_free(&cmdbuf->vk.pool->alloc, he->data); 462 _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL); 463 cmdbuf->null_rtv.ptr = 0; 464 dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool); 465 hash_table_foreach(cmdbuf->dsvs.ht, he) 466 vk_free(&cmdbuf->vk.pool->alloc, he->data); 467 _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL); 468 hash_table_foreach(cmdbuf->queries.ht, he) { 469 struct dzn_cmd_buffer_query_pool_state *qpstate = he->data; 470 util_dynarray_fini(&qpstate->reset); 471 util_dynarray_fini(&qpstate->collect); 472 util_dynarray_fini(&qpstate->wait); 473 util_dynarray_fini(&qpstate->signal); 474 vk_free(&cmdbuf->vk.pool->alloc, he->data); 475 } 476 _mesa_hash_table_clear(cmdbuf->queries.ht, NULL); 477 _mesa_hash_table_clear(cmdbuf->events.ht, NULL); 478 hash_table_foreach(cmdbuf->transition_barriers, he) 479 vk_free(&cmdbuf->vk.pool->alloc, he->data); 480 _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL); 481 dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool); 482 dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool); 483 dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool); 484 vk_command_buffer_reset(&cmdbuf->vk); 485 486 /* cmdlist->Reset() doesn't return the memory back the the command list 487 * allocator, and cmdalloc->Reset() can only be called if there's no live 488 * cmdlist allocated from the allocator, so we need to release and create 489 * a new command list. 490 */ 491 ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist); 492 cmdbuf->cmdlist = NULL; 493 ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc); 494 D3D12_COMMAND_LIST_TYPE type = 495 pdev->queue_families[pool->queue_family_index].desc.Type; 496 if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, 497 type, 498 cmdbuf->cmdalloc, NULL, 499 &IID_ID3D12GraphicsCommandList1, 500 (void **)&cmdbuf->cmdlist))) { 501 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 502 } 503 504 return cmdbuf->error; 505} 506 507VKAPI_ATTR VkResult VKAPI_CALL 508dzn_AllocateCommandBuffers(VkDevice device, 509 const VkCommandBufferAllocateInfo *pAllocateInfo, 510 VkCommandBuffer *pCommandBuffers) 511{ 512 VK_FROM_HANDLE(dzn_device, dev, device); 513 VkResult result = VK_SUCCESS; 514 uint32_t i; 515 516 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 517 result = dzn_cmd_buffer_create(pAllocateInfo, 518 &pCommandBuffers[i]); 519 if (result != VK_SUCCESS) 520 break; 521 } 522 523 if (result != VK_SUCCESS) { 524 dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool, 525 i, pCommandBuffers); 526 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) 527 pCommandBuffers[i] = VK_NULL_HANDLE; 528 } 529 530 return result; 531} 532 533VKAPI_ATTR VkResult VKAPI_CALL 534dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer, 535 VkCommandBufferResetFlags flags) 536{ 537 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 538 539 return dzn_cmd_buffer_reset(cmdbuf); 540} 541 542VKAPI_ATTR VkResult VKAPI_CALL 543dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer, 544 const VkCommandBufferBeginInfo *info) 545{ 546 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 547 548 /* If this is the first vkBeginCommandBuffer, we must *initialize* the 549 * command buffer's state. Otherwise, we must *reset* its state. In both 550 * cases we reset it. 551 * 552 * From the Vulkan 1.0 spec: 553 * 554 * If a command buffer is in the executable state and the command buffer 555 * was allocated from a command pool with the 556 * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then 557 * vkBeginCommandBuffer implicitly resets the command buffer, behaving 558 * as if vkResetCommandBuffer had been called with 559 * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts 560 * the command buffer in the recording state. 561 */ 562 return dzn_cmd_buffer_reset(cmdbuf); 563} 564 565static void 566dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf) 567{ 568 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 569 570 if (cmdbuf->error != VK_SUCCESS) 571 goto out; 572 573 hash_table_foreach(cmdbuf->events.ht, he) { 574 enum dzn_event_state state = (uintptr_t)he->data; 575 576 if (state != DZN_EVENT_STATE_EXTERNAL_WAIT) { 577 struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET }; 578 struct dzn_cmd_event_signal *entry = 579 util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1); 580 581 if (!entry) { 582 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 583 break; 584 } 585 586 *entry = signal; 587 } 588 } 589 590out: 591 _mesa_hash_table_clear(cmdbuf->events.ht, NULL); 592} 593 594static VkResult 595dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) 596{ 597 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 598 599 if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) 600 return VK_SUCCESS; 601 602 unsigned old_sz = array->size; 603 void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS); 604 if (!ptr) { 605 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 606 return cmdbuf->error; 607 } 608 609 memset(ptr, 0, array->size - old_sz); 610 return VK_SUCCESS; 611} 612 613static bool 614dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit) 615{ 616 uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; 617 618 if (bit < nbits) 619 return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit); 620 621 return false; 622} 623 624static VkResult 625dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) 626{ 627 VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit); 628 if (result != VK_SUCCESS) 629 return result; 630 631 BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit); 632 return VK_SUCCESS; 633} 634 635static void 636dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) 637{ 638 if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) 639 return; 640 641 BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit); 642} 643 644static VkResult 645dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf, 646 struct util_dynarray *array, 647 uint32_t bit, uint32_t count) 648{ 649 VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1); 650 if (result != VK_SUCCESS) 651 return result; 652 653 BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1); 654 return VK_SUCCESS; 655} 656 657static void 658dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf, 659 struct util_dynarray *array, 660 uint32_t bit, uint32_t count) 661{ 662 uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; 663 664 if (!nbits) 665 return; 666 667 uint32_t end = MIN2(bit + count, nbits) - 1; 668 669 while (bit <= end) { 670 uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32)); 671 BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1); 672 bit += subcount; 673 } 674} 675 676static struct dzn_cmd_buffer_query_pool_state * 677dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf) 678{ 679 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 680 struct dzn_cmd_buffer_query_pool_state *state = 681 vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state), 682 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 683 if (!state) { 684 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 685 return NULL; 686 } 687 688 util_dynarray_init(&state->reset, NULL); 689 util_dynarray_init(&state->collect, NULL); 690 util_dynarray_init(&state->wait, NULL); 691 util_dynarray_init(&state->signal, NULL); 692 return state; 693} 694 695static void 696dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf, 697 struct dzn_cmd_buffer_query_pool_state *state) 698{ 699 util_dynarray_fini(&state->reset); 700 util_dynarray_fini(&state->collect); 701 util_dynarray_fini(&state->wait); 702 util_dynarray_fini(&state->signal); 703 vk_free(&cmdbuf->vk.pool->alloc, state); 704} 705 706static struct dzn_cmd_buffer_query_pool_state * 707dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf, 708 struct dzn_query_pool *qpool) 709{ 710 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 711 struct dzn_cmd_buffer_query_pool_state *state = NULL; 712 struct hash_entry *he = 713 _mesa_hash_table_search(cmdbuf->queries.ht, qpool); 714 715 if (!he) { 716 state = dzn_cmd_buffer_create_query_pool_state(cmdbuf); 717 if (!state) 718 return NULL; 719 720 he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state); 721 if (!he) { 722 dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state); 723 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 724 return NULL; 725 } 726 } else { 727 state = he->data; 728 } 729 730 return state; 731} 732 733static VkResult 734dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf, 735 const struct dzn_query_pool *qpool, 736 struct dzn_cmd_buffer_query_pool_state *state, 737 uint32_t first_query, 738 uint32_t query_count) 739{ 740 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 741 uint32_t nbits = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS; 742 uint32_t start, end; 743 744 if (!nbits) 745 return VK_SUCCESS; 746 747 query_count = MIN2(query_count, nbits - first_query); 748 nbits = MIN2(first_query + query_count, nbits); 749 750 VkResult result = 751 dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1); 752 if (result != VK_SUCCESS) 753 return result; 754 755 dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1); 756 757 BITSET_WORD *collect = 758 util_dynarray_element(&state->collect, BITSET_WORD, 0); 759 760 for (start = first_query, end = first_query, 761 __bitset_next_range(&start, &end, collect, nbits); 762 start < nbits; 763 __bitset_next_range(&start, &end, collect, nbits)) { 764 ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist, 765 qpool->heap, 766 qpool->queries[start].type, 767 start, end - start, 768 qpool->resolve_buffer, 769 qpool->query_size * start); 770 } 771 772 uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query); 773 uint32_t size = dzn_query_pool_get_result_size(qpool, query_count); 774 775 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer, 776 0, 1, 777 D3D12_RESOURCE_STATE_COPY_DEST, 778 D3D12_RESOURCE_STATE_COPY_SOURCE, 779 DZN_QUEUE_TRANSITION_FLUSH); 780 781 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, 782 qpool->collect_buffer, offset, 783 qpool->resolve_buffer, offset, 784 size); 785 786 for (start = first_query, end = first_query, 787 __bitset_next_range(&start, &end, collect, nbits); 788 start < nbits; 789 __bitset_next_range(&start, &end, collect, nbits)) { 790 uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); 791 uint32_t count = end - start; 792 793 for (unsigned i = 0; i < count; i += step) { 794 uint32_t sub_count = MIN2(step, count - i); 795 796 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, 797 qpool->collect_buffer, 798 dzn_query_pool_get_availability_offset(qpool, start + i), 799 device->queries.refs, 800 DZN_QUERY_REFS_ALL_ONES_OFFSET, 801 sizeof(uint64_t) * sub_count); 802 } 803 804 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count); 805 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, start, count); 806 } 807 808 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer, 809 0, 1, 810 D3D12_RESOURCE_STATE_COPY_SOURCE, 811 D3D12_RESOURCE_STATE_COPY_DEST, 812 0); 813 return VK_SUCCESS; 814} 815 816static VkResult 817dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf, 818 struct dzn_query_pool *qpool, 819 struct util_dynarray *bitset_array, 820 struct util_dynarray *ops_array) 821{ 822 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 823 BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0); 824 uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS; 825 uint32_t start, end; 826 827 BITSET_FOREACH_RANGE(start, end, bitset, nbits) { 828 struct dzn_cmd_buffer_query_range range = { qpool, start, end - start }; 829 struct dzn_cmd_buffer_query_range *entry = 830 util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1); 831 832 if (!entry) { 833 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 834 return cmdbuf->error; 835 } 836 837 *entry = range; 838 } 839 840 return VK_SUCCESS; 841} 842 843static VkResult 844dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf) 845{ 846 hash_table_foreach(cmdbuf->queries.ht, he) { 847 struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key; 848 struct dzn_cmd_buffer_query_pool_state *state = he->data; 849 VkResult result = 850 dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count); 851 if (result != VK_SUCCESS) 852 return result; 853 854 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset); 855 if (result != VK_SUCCESS) 856 return result; 857 858 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->wait, &cmdbuf->queries.wait); 859 if (result != VK_SUCCESS) 860 return result; 861 862 result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal); 863 if (result != VK_SUCCESS) 864 return result; 865 } 866 867 return VK_SUCCESS; 868} 869 870VKAPI_ATTR VkResult VKAPI_CALL 871dzn_EndCommandBuffer(VkCommandBuffer commandBuffer) 872{ 873 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 874 875 if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 876 dzn_cmd_buffer_gather_events(cmdbuf); 877 dzn_cmd_buffer_gather_queries(cmdbuf); 878 HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist); 879 if (FAILED(hres)) 880 cmdbuf->error = vk_error(cmdbuf->vk.base.device, VK_ERROR_OUT_OF_HOST_MEMORY); 881 } else { 882 cmdbuf->error = cmdbuf->vk.cmd_queue.error; 883 } 884 885 return cmdbuf->error; 886} 887 888VKAPI_ATTR void VKAPI_CALL 889dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, 890 const VkDependencyInfo *info) 891{ 892 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 893 894 bool execution_barrier = 895 !info->memoryBarrierCount && 896 !info->bufferMemoryBarrierCount && 897 !info->imageMemoryBarrierCount; 898 899 if (execution_barrier) { 900 /* Execution barrier can be emulated with a NULL UAV barrier (AKA 901 * pipeline flush). That's the best we can do with the standard D3D12 902 * barrier API. 903 */ 904 D3D12_RESOURCE_BARRIER barrier = { 905 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, 906 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, 907 .UAV = { .pResource = NULL }, 908 }; 909 910 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); 911 } 912 913 /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers. 914 * Scopes are not taken into account, but that's inherent to the current 915 * D3D12 barrier API. 916 */ 917 if (info->memoryBarrierCount) { 918 D3D12_RESOURCE_BARRIER barriers[2] = { 0 }; 919 920 barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; 921 barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; 922 barriers[0].UAV.pResource = NULL; 923 barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; 924 barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; 925 barriers[1].Aliasing.pResourceBefore = NULL; 926 barriers[1].Aliasing.pResourceAfter = NULL; 927 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers); 928 } 929 930 for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) { 931 VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer); 932 D3D12_RESOURCE_BARRIER barrier = { 0 }; 933 934 /* UAV are used only for storage buffers, skip all other buffers. */ 935 if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)) 936 continue; 937 938 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; 939 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; 940 barrier.UAV.pResource = buf->res; 941 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); 942 } 943 944 for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) { 945 const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i]; 946 const VkImageSubresourceRange *range = &ibarrier->subresourceRange; 947 VK_FROM_HANDLE(dzn_image, image, ibarrier->image); 948 949 /* We use placed resource's simple model, in which only one resource 950 * pointing to a given heap is active at a given time. To make the 951 * resource active we need to add an aliasing barrier. 952 */ 953 D3D12_RESOURCE_BARRIER aliasing_barrier = { 954 .Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING, 955 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, 956 .Aliasing = { 957 .pResourceBefore = NULL, 958 .pResourceAfter = image->res, 959 }, 960 }; 961 962 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &aliasing_barrier); 963 964 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, 965 ibarrier->oldLayout, 966 ibarrier->newLayout, 967 DZN_QUEUE_TRANSITION_FLUSH); 968 } 969} 970 971static D3D12_CPU_DESCRIPTOR_HANDLE 972dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf, 973 const struct dzn_image *image, 974 const D3D12_DEPTH_STENCIL_VIEW_DESC *desc) 975{ 976 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 977 struct dzn_cmd_buffer_dsv_key key = { image, *desc }; 978 struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key); 979 struct dzn_cmd_buffer_dsv_entry *dsve; 980 981 if (!he) { 982 struct dzn_descriptor_heap *heap; 983 uint32_t slot; 984 985 // TODO: error handling 986 dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8, 987 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 988 dsve->key = key; 989 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot); 990 dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); 991 ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle); 992 _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve); 993 } else { 994 dsve = he->data; 995 } 996 997 return dsve->handle; 998} 999 1000static D3D12_CPU_DESCRIPTOR_HANDLE 1001dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf, 1002 const struct dzn_image *image, 1003 const D3D12_RENDER_TARGET_VIEW_DESC *desc) 1004{ 1005 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 1006 struct dzn_cmd_buffer_rtv_key key = { image, *desc }; 1007 struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key); 1008 struct dzn_cmd_buffer_rtv_entry *rtve; 1009 1010 if (!he) { 1011 struct dzn_descriptor_heap *heap; 1012 uint32_t slot; 1013 1014 // TODO: error handling 1015 rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8, 1016 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1017 rtve->key = key; 1018 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot); 1019 rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); 1020 ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle); 1021 he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve); 1022 } else { 1023 rtve = he->data; 1024 } 1025 1026 return rtve->handle; 1027} 1028 1029static D3D12_CPU_DESCRIPTOR_HANDLE 1030dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf) 1031{ 1032 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 1033 1034 if (!cmdbuf->null_rtv.ptr) { 1035 struct dzn_descriptor_heap *heap; 1036 uint32_t slot; 1037 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot); 1038 cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot); 1039 1040 D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 }; 1041 desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; 1042 desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; 1043 desc.Texture2D.MipSlice = 0; 1044 desc.Texture2D.PlaneSlice = 0; 1045 1046 ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv); 1047 } 1048 1049 return cmdbuf->null_rtv; 1050} 1051 1052static VkResult 1053dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf, 1054 uint32_t size, 1055 D3D12_HEAP_TYPE heap_type, 1056 D3D12_RESOURCE_STATES init_state, 1057 ID3D12Resource **out) 1058{ 1059 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 1060 ID3D12Resource *res; 1061 *out = NULL; 1062 1063 /* Align size on 64k (the default alignment) */ 1064 size = ALIGN_POT(size, 64 * 1024); 1065 1066 D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, heap_type); 1067 D3D12_RESOURCE_DESC rdesc = { 1068 .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, 1069 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, 1070 .Width = size, 1071 .Height = 1, 1072 .DepthOrArraySize = 1, 1073 .MipLevels = 1, 1074 .Format = DXGI_FORMAT_UNKNOWN, 1075 .SampleDesc = { .Count = 1, .Quality = 0 }, 1076 .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, 1077 .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, 1078 }; 1079 1080 HRESULT hres = 1081 ID3D12Device1_CreateCommittedResource(device->dev, &hprops, 1082 D3D12_HEAP_FLAG_NONE, &rdesc, 1083 init_state, NULL, 1084 &IID_ID3D12Resource, 1085 (void **)&res); 1086 if (FAILED(hres)) { 1087 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 1088 return cmdbuf->error; 1089 } 1090 1091 struct dzn_internal_resource *entry = 1092 vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8, 1093 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1094 if (!entry) { 1095 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 1096 ID3D12Resource_Release(res); 1097 return cmdbuf->error; 1098 } 1099 1100 entry->res = res; 1101 list_addtail(&entry->link, &cmdbuf->internal_bufs); 1102 *out = entry->res; 1103 return VK_SUCCESS; 1104} 1105 1106static void 1107dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf, 1108 const struct dzn_image *image, 1109 VkImageLayout layout, 1110 const VkClearColorValue *color, 1111 const VkImageSubresourceRange *range, 1112 uint32_t rect_count, D3D12_RECT *rects) 1113{ 1114 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); 1115 uint32_t blksize = util_format_get_blocksize(pfmt); 1116 uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 }; 1117 uint32_t raw[4] = { 0 }; 1118 1119 assert(blksize <= sizeof(raw)); 1120 assert(!(sizeof(buf) % blksize)); 1121 1122 util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1); 1123 1124 uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; 1125 while (fill_step % blksize) 1126 fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; 1127 1128 uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel); 1129 uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel); 1130 uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step); 1131 uint32_t res_size = max_h * row_pitch; 1132 1133 assert(fill_step <= sizeof(buf)); 1134 1135 for (uint32_t i = 0; i < fill_step; i += blksize) 1136 memcpy(&buf[i], raw, blksize); 1137 1138 ID3D12Resource *src_res; 1139 1140 VkResult result = 1141 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, 1142 D3D12_HEAP_TYPE_UPLOAD, 1143 D3D12_RESOURCE_STATE_GENERIC_READ, 1144 &src_res); 1145 if (result != VK_SUCCESS) 1146 return; 1147 1148 assert(!(res_size % fill_step)); 1149 1150 uint8_t *cpu_ptr; 1151 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); 1152 for (uint32_t i = 0; i < res_size; i += fill_step) 1153 memcpy(&cpu_ptr[i], buf, fill_step); 1154 1155 ID3D12Resource_Unmap(src_res, 0, NULL); 1156 1157 D3D12_TEXTURE_COPY_LOCATION src_loc = { 1158 .pResource = src_res, 1159 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, 1160 .PlacedFootprint = { 1161 .Offset = 0, 1162 .Footprint = { 1163 .Width = max_w, 1164 .Height = max_h, 1165 .Depth = 1, 1166 .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step), 1167 }, 1168 }, 1169 }; 1170 1171 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, src_res, 0, 1, 1172 D3D12_RESOURCE_STATE_GENERIC_READ, 1173 D3D12_RESOURCE_STATE_COPY_SOURCE, 1174 DZN_QUEUE_TRANSITION_FLUSH); 1175 1176 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, 1177 layout, 1178 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1179 DZN_QUEUE_TRANSITION_FLUSH); 1180 1181 assert(dzn_get_level_count(image, range) == 1); 1182 uint32_t layer_count = dzn_get_layer_count(image, range); 1183 1184 dzn_foreach_aspect(aspect, range->aspectMask) { 1185 VkImageSubresourceLayers subres = { 1186 .aspectMask = (VkImageAspectFlags)aspect, 1187 .mipLevel = range->baseMipLevel, 1188 .baseArrayLayer = range->baseArrayLayer, 1189 .layerCount = layer_count, 1190 }; 1191 1192 for (uint32_t layer = 0; layer < layer_count; layer++) { 1193 D3D12_TEXTURE_COPY_LOCATION dst_loc = 1194 dzn_image_get_copy_loc(image, &subres, aspect, layer); 1195 1196 src_loc.PlacedFootprint.Footprint.Format = 1197 dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? 1198 dst_loc.PlacedFootprint.Footprint.Format : 1199 image->desc.Format; 1200 1201 for (uint32_t r = 0; r < rect_count; r++) { 1202 D3D12_BOX src_box = { 1203 .left = 0, 1204 .top = 0, 1205 .front = 0, 1206 .right = (UINT)(rects[r].right - rects[r].left), 1207 .bottom = (UINT)(rects[r].bottom - rects[r].top), 1208 .back = 1, 1209 }; 1210 1211 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, 1212 &dst_loc, 1213 rects[r].left, 1214 rects[r].top, 0, 1215 &src_loc, 1216 &src_box); 1217 } 1218 } 1219 } 1220 1221 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, 1222 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1223 layout, 1224 DZN_QUEUE_TRANSITION_FLUSH); 1225} 1226 1227static VkClearColorValue 1228adjust_clear_color(VkFormat format, const VkClearColorValue *col) 1229{ 1230 VkClearColorValue out = *col; 1231 1232 // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things 1233 // manually where it matters, like here, in the clear path. 1234 if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { 1235 DZN_SWAP(float, out.float32[0], out.float32[1]); 1236 DZN_SWAP(float, out.float32[2], out.float32[3]); 1237 } 1238 1239 return out; 1240} 1241 1242static void 1243dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf, 1244 const struct dzn_image *image, 1245 VkImageLayout layout, 1246 const VkClearColorValue *color, 1247 uint32_t range_count, 1248 const VkImageSubresourceRange *ranges) 1249{ 1250 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); 1251 uint32_t blksize = util_format_get_blocksize(pfmt); 1252 uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 }; 1253 uint32_t raw[4] = { 0 }; 1254 1255 assert(blksize <= sizeof(raw)); 1256 assert(!(sizeof(buf) % blksize)); 1257 1258 util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1); 1259 1260 uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; 1261 while (fill_step % blksize) 1262 fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; 1263 1264 uint32_t res_size = 0; 1265 for (uint32_t r = 0; r < range_count; r++) { 1266 uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel); 1267 uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel); 1268 uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel); 1269 uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step); 1270 1271 res_size = MAX2(res_size, h * d * row_pitch); 1272 } 1273 1274 assert(fill_step <= sizeof(buf)); 1275 1276 for (uint32_t i = 0; i < fill_step; i += blksize) 1277 memcpy(&buf[i], raw, blksize); 1278 1279 ID3D12Resource *src_res; 1280 1281 VkResult result = 1282 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, 1283 D3D12_HEAP_TYPE_UPLOAD, 1284 D3D12_RESOURCE_STATE_GENERIC_READ, 1285 &src_res); 1286 if (result != VK_SUCCESS) 1287 return; 1288 1289 assert(!(res_size % fill_step)); 1290 1291 uint8_t *cpu_ptr; 1292 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); 1293 for (uint32_t i = 0; i < res_size; i += fill_step) 1294 memcpy(&cpu_ptr[i], buf, fill_step); 1295 1296 ID3D12Resource_Unmap(src_res, 0, NULL); 1297 1298 D3D12_TEXTURE_COPY_LOCATION src_loc = { 1299 .pResource = src_res, 1300 .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, 1301 .PlacedFootprint = { 1302 .Offset = 0, 1303 }, 1304 }; 1305 1306 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, src_res, 0, 1, 1307 D3D12_RESOURCE_STATE_GENERIC_READ, 1308 D3D12_RESOURCE_STATE_COPY_SOURCE, 1309 DZN_QUEUE_TRANSITION_FLUSH); 1310 1311 for (uint32_t r = 0; r < range_count; r++) { 1312 uint32_t level_count = dzn_get_level_count(image, &ranges[r]); 1313 uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]); 1314 1315 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r], 1316 layout, 1317 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1318 DZN_QUEUE_TRANSITION_FLUSH); 1319 1320 dzn_foreach_aspect(aspect, ranges[r].aspectMask) { 1321 for (uint32_t lvl = 0; lvl < level_count; lvl++) { 1322 uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl); 1323 uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl); 1324 uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl); 1325 VkImageSubresourceLayers subres = { 1326 .aspectMask = (VkImageAspectFlags)aspect, 1327 .mipLevel = ranges[r].baseMipLevel + lvl, 1328 .baseArrayLayer = ranges[r].baseArrayLayer, 1329 .layerCount = layer_count, 1330 }; 1331 1332 for (uint32_t layer = 0; layer < layer_count; layer++) { 1333 D3D12_TEXTURE_COPY_LOCATION dst_loc = 1334 dzn_image_get_copy_loc(image, &subres, aspect, layer); 1335 1336 src_loc.PlacedFootprint.Footprint.Format = 1337 dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? 1338 dst_loc.PlacedFootprint.Footprint.Format : 1339 image->desc.Format; 1340 src_loc.PlacedFootprint.Footprint.Width = w; 1341 src_loc.PlacedFootprint.Footprint.Height = h; 1342 src_loc.PlacedFootprint.Footprint.Depth = d; 1343 src_loc.PlacedFootprint.Footprint.RowPitch = 1344 ALIGN_NPOT(w * blksize, fill_step); 1345 D3D12_BOX src_box = { 1346 .left = 0, 1347 .top = 0, 1348 .front = 0, 1349 .right = w, 1350 .bottom = h, 1351 .back = d, 1352 }; 1353 1354 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0, 1355 &src_loc, &src_box); 1356 1357 } 1358 } 1359 } 1360 1361 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r], 1362 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1363 layout, 1364 DZN_QUEUE_TRANSITION_FLUSH); 1365 } 1366} 1367 1368static void 1369dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf, 1370 struct dzn_image_view *view, 1371 VkImageLayout layout, 1372 const VkClearValue *value, 1373 VkImageAspectFlags aspects, 1374 uint32_t base_layer, 1375 uint32_t layer_count, 1376 uint32_t rect_count, 1377 D3D12_RECT *rects) 1378{ 1379 struct dzn_image *image = 1380 container_of(view->vk.image, struct dzn_image, vk); 1381 1382 VkImageSubresourceRange range = { 1383 .aspectMask = aspects, 1384 .baseMipLevel = view->vk.base_mip_level, 1385 .levelCount = 1, 1386 .baseArrayLayer = view->vk.base_array_layer + base_layer, 1387 .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ? 1388 view->vk.layer_count - base_layer : layer_count, 1389 }; 1390 1391 layer_count = vk_image_subresource_layer_count(&image->vk, &range); 1392 1393 if (vk_format_is_depth_or_stencil(view->vk.format)) { 1394 D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; 1395 1396 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 1397 flags |= D3D12_CLEAR_FLAG_DEPTH; 1398 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) 1399 flags |= D3D12_CLEAR_FLAG_STENCIL; 1400 1401 if (flags != 0) { 1402 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, 1403 layout, 1404 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1405 DZN_QUEUE_TRANSITION_FLUSH); 1406 1407 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0); 1408 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); 1409 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags, 1410 value->depthStencil.depth, 1411 value->depthStencil.stencil, 1412 rect_count, rects); 1413 1414 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, 1415 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1416 layout, 1417 DZN_QUEUE_TRANSITION_FLUSH); 1418 } 1419 } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { 1420 VkClearColorValue color = adjust_clear_color(view->vk.format, &value->color); 1421 bool clear_with_cpy = false; 1422 float vals[4]; 1423 1424 if (vk_format_is_sint(view->vk.format)) { 1425 for (uint32_t i = 0; i < 4; i++) { 1426 vals[i] = color.int32[i]; 1427 if (color.int32[i] != (int32_t)vals[i]) { 1428 clear_with_cpy = true; 1429 break; 1430 } 1431 } 1432 } else if (vk_format_is_uint(view->vk.format)) { 1433 for (uint32_t i = 0; i < 4; i++) { 1434 vals[i] = color.uint32[i]; 1435 if (color.uint32[i] != (uint32_t)vals[i]) { 1436 clear_with_cpy = true; 1437 break; 1438 } 1439 } 1440 } else { 1441 for (uint32_t i = 0; i < 4; i++) 1442 vals[i] = color.float32[i]; 1443 } 1444 1445 if (clear_with_cpy) { 1446 dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image, 1447 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 1448 &value->color, 1449 &range, rect_count, rects); 1450 } else { 1451 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, 1452 layout, 1453 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 1454 DZN_QUEUE_TRANSITION_FLUSH); 1455 1456 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0); 1457 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); 1458 ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects); 1459 1460 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, 1461 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 1462 layout, 1463 DZN_QUEUE_TRANSITION_FLUSH); 1464 } 1465 } 1466} 1467 1468static void 1469dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf, 1470 const struct dzn_image *image, 1471 VkImageLayout layout, 1472 const VkClearColorValue *col, 1473 uint32_t range_count, 1474 const VkImageSubresourceRange *ranges) 1475{ 1476 if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { 1477 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); 1478 return; 1479 } 1480 1481 VkClearColorValue color = adjust_clear_color(image->vk.format, col); 1482 float clear_vals[4]; 1483 1484 enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); 1485 1486 if (util_format_is_pure_sint(pfmt)) { 1487 for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { 1488 clear_vals[c] = color.int32[c]; 1489 if (color.int32[c] != (int32_t)clear_vals[c]) { 1490 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); 1491 return; 1492 } 1493 } 1494 } else if (util_format_is_pure_uint(pfmt)) { 1495 for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { 1496 clear_vals[c] = color.uint32[c]; 1497 if (color.uint32[c] != (uint32_t)clear_vals[c]) { 1498 dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); 1499 return; 1500 } 1501 } 1502 } else { 1503 memcpy(clear_vals, color.float32, sizeof(clear_vals)); 1504 } 1505 1506 for (uint32_t r = 0; r < range_count; r++) { 1507 const VkImageSubresourceRange *range = &ranges[r]; 1508 uint32_t level_count = dzn_get_level_count(image, range); 1509 1510 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, 1511 layout, 1512 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 1513 DZN_QUEUE_TRANSITION_FLUSH); 1514 for (uint32_t lvl = 0; lvl < level_count; lvl++) { 1515 VkImageSubresourceRange view_range = *range; 1516 1517 if (image->vk.image_type == VK_IMAGE_TYPE_3D) { 1518 view_range.baseArrayLayer = 0; 1519 view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl); 1520 } 1521 1522 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl); 1523 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); 1524 ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL); 1525 } 1526 1527 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, 1528 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 1529 layout, 1530 DZN_QUEUE_TRANSITION_FLUSH); 1531 } 1532} 1533 1534static void 1535dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf, 1536 const struct dzn_image *image, 1537 VkImageLayout layout, 1538 const VkClearDepthStencilValue *zs, 1539 uint32_t range_count, 1540 const VkImageSubresourceRange *ranges) 1541{ 1542 assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); 1543 1544 for (uint32_t r = 0; r < range_count; r++) { 1545 const VkImageSubresourceRange *range = &ranges[r]; 1546 uint32_t level_count = dzn_get_level_count(image, range); 1547 1548 D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; 1549 1550 if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) 1551 flags |= D3D12_CLEAR_FLAG_DEPTH; 1552 if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) 1553 flags |= D3D12_CLEAR_FLAG_STENCIL; 1554 1555 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, 1556 layout, 1557 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1558 DZN_QUEUE_TRANSITION_FLUSH); 1559 1560 for (uint32_t lvl = 0; lvl < level_count; lvl++) { 1561 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl); 1562 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); 1563 ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, 1564 handle, flags, 1565 zs->depth, 1566 zs->stencil, 1567 0, NULL); 1568 } 1569 1570 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, 1571 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 1572 layout, 1573 DZN_QUEUE_TRANSITION_FLUSH); 1574 } 1575} 1576 1577static void 1578dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf, 1579 const VkCopyBufferToImageInfo2 *info, 1580 uint32_t r, 1581 VkImageAspectFlagBits aspect, 1582 uint32_t l) 1583{ 1584 VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); 1585 VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage); 1586 1587 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; 1588 1589 VkBufferImageCopy2 region = info->pRegions[r]; 1590 enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format); 1591 uint32_t blkh = util_format_get_blockheight(pfmt); 1592 uint32_t blkd = util_format_get_blockdepth(pfmt); 1593 1594 /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent 1595 * to not be block aligned if it's reaching the image boundary, offsets still 1596 * have to be aligned. Align the image extent to make D3D12 happy. 1597 */ 1598 dzn_image_align_extent(dst_image, ®ion.imageExtent); 1599 1600 D3D12_TEXTURE_COPY_LOCATION dst_img_loc = 1601 dzn_image_get_copy_loc(dst_image, ®ion.imageSubresource, aspect, l); 1602 D3D12_TEXTURE_COPY_LOCATION src_buf_loc = 1603 dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, ®ion, aspect, l); 1604 1605 if (dzn_buffer_supports_region_copy(&src_buf_loc)) { 1606 /* RowPitch and Offset are properly aligned, we can copy 1607 * the whole thing in one call. 1608 */ 1609 D3D12_BOX src_box = { 1610 .left = 0, 1611 .top = 0, 1612 .front = 0, 1613 .right = region.imageExtent.width, 1614 .bottom = region.imageExtent.height, 1615 .back = region.imageExtent.depth, 1616 }; 1617 1618 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc, 1619 region.imageOffset.x, 1620 region.imageOffset.y, 1621 region.imageOffset.z, 1622 &src_buf_loc, &src_box); 1623 return; 1624 } 1625 1626 /* Copy line-by-line if things are not properly aligned. */ 1627 D3D12_BOX src_box = { 1628 .top = 0, 1629 .front = 0, 1630 .bottom = blkh, 1631 .back = blkd, 1632 }; 1633 1634 for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) { 1635 for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) { 1636 uint32_t src_x; 1637 1638 D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc = 1639 dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format, 1640 ®ion, &src_buf_loc, 1641 y, z, &src_x); 1642 1643 src_box.left = src_x; 1644 src_box.right = src_x + region.imageExtent.width; 1645 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, 1646 &dst_img_loc, 1647 region.imageOffset.x, 1648 region.imageOffset.y + y, 1649 region.imageOffset.z + z, 1650 &src_buf_line_loc, 1651 &src_box); 1652 } 1653 } 1654} 1655 1656static void 1657dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf, 1658 const VkCopyImageToBufferInfo2 *info, 1659 uint32_t r, 1660 VkImageAspectFlagBits aspect, 1661 uint32_t l) 1662{ 1663 VK_FROM_HANDLE(dzn_image, src_image, info->srcImage); 1664 VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); 1665 1666 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; 1667 1668 VkBufferImageCopy2 region = info->pRegions[r]; 1669 enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format); 1670 uint32_t blkh = util_format_get_blockheight(pfmt); 1671 uint32_t blkd = util_format_get_blockdepth(pfmt); 1672 1673 /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent 1674 * to not be block aligned if it's reaching the image boundary, offsets still 1675 * have to be aligned. Align the image extent to make D3D12 happy. 1676 */ 1677 dzn_image_align_extent(src_image, ®ion.imageExtent); 1678 1679 D3D12_TEXTURE_COPY_LOCATION src_img_loc = 1680 dzn_image_get_copy_loc(src_image, ®ion.imageSubresource, aspect, l); 1681 D3D12_TEXTURE_COPY_LOCATION dst_buf_loc = 1682 dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, ®ion, aspect, l); 1683 1684 if (dzn_buffer_supports_region_copy(&dst_buf_loc)) { 1685 /* RowPitch and Offset are properly aligned on 256 bytes, we can copy 1686 * the whole thing in one call. 1687 */ 1688 D3D12_BOX src_box = { 1689 .left = (UINT)region.imageOffset.x, 1690 .top = (UINT)region.imageOffset.y, 1691 .front = (UINT)region.imageOffset.z, 1692 .right = (UINT)(region.imageOffset.x + region.imageExtent.width), 1693 .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height), 1694 .back = (UINT)(region.imageOffset.z + region.imageExtent.depth), 1695 }; 1696 1697 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc, 1698 0, 0, 0, &src_img_loc, 1699 &src_box); 1700 return; 1701 } 1702 1703 D3D12_BOX src_box = { 1704 .left = (UINT)region.imageOffset.x, 1705 .right = (UINT)(region.imageOffset.x + region.imageExtent.width), 1706 }; 1707 1708 /* Copy line-by-line if things are not properly aligned. */ 1709 for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) { 1710 src_box.front = region.imageOffset.z + z; 1711 src_box.back = src_box.front + blkd; 1712 1713 for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) { 1714 uint32_t dst_x; 1715 1716 D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc = 1717 dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format, 1718 ®ion, &dst_buf_loc, 1719 y, z, &dst_x); 1720 1721 src_box.top = region.imageOffset.y + y; 1722 src_box.bottom = src_box.top + blkh; 1723 1724 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, 1725 &dst_buf_line_loc, 1726 dst_x, 0, 0, 1727 &src_img_loc, 1728 &src_box); 1729 } 1730 } 1731} 1732 1733static void 1734dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf, 1735 const VkCopyImageInfo2 *info, 1736 D3D12_RESOURCE_DESC *tmp_desc, 1737 D3D12_TEXTURE_COPY_LOCATION *tmp_loc, 1738 uint32_t r, 1739 VkImageAspectFlagBits aspect, 1740 uint32_t l) 1741{ 1742 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 1743 VK_FROM_HANDLE(dzn_image, src, info->srcImage); 1744 VK_FROM_HANDLE(dzn_image, dst, info->dstImage); 1745 1746 ID3D12Device2 *dev = device->dev; 1747 ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; 1748 1749 VkImageCopy2 region = info->pRegions[r]; 1750 dzn_image_align_extent(src, ®ion.extent); 1751 1752 const VkImageSubresourceLayers *src_subres = ®ion.srcSubresource; 1753 const VkImageSubresourceLayers *dst_subres = ®ion.dstSubresource; 1754 VkFormat src_format = 1755 dzn_image_get_plane_format(src->vk.format, aspect); 1756 VkFormat dst_format = 1757 dzn_image_get_plane_format(dst->vk.format, aspect); 1758 1759 enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format); 1760 uint32_t src_blkw = util_format_get_blockwidth(src_pfmt); 1761 uint32_t src_blkh = util_format_get_blockheight(src_pfmt); 1762 uint32_t src_blkd = util_format_get_blockdepth(src_pfmt); 1763 enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format); 1764 uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt); 1765 uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt); 1766 uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt); 1767 uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z; 1768 uint32_t depth = region.extent.depth; 1769 uint32_t dst_l = l, src_l = l; 1770 1771 assert(src_subres->aspectMask == dst_subres->aspectMask); 1772 1773 if (src->vk.image_type == VK_IMAGE_TYPE_3D && 1774 dst->vk.image_type == VK_IMAGE_TYPE_2D) { 1775 assert(src_subres->layerCount == 1); 1776 src_l = 0; 1777 src_z += l; 1778 depth = 1; 1779 } else if (src->vk.image_type == VK_IMAGE_TYPE_2D && 1780 dst->vk.image_type == VK_IMAGE_TYPE_3D) { 1781 assert(dst_subres->layerCount == 1); 1782 dst_l = 0; 1783 dst_z += l; 1784 depth = 1; 1785 } else { 1786 assert(src_subres->layerCount == dst_subres->layerCount); 1787 } 1788 1789 D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l); 1790 D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l); 1791 1792 D3D12_BOX src_box = { 1793 .left = (UINT)MAX2(region.srcOffset.x, 0), 1794 .top = (UINT)MAX2(region.srcOffset.y, 0), 1795 .front = (UINT)MAX2(src_z, 0), 1796 .right = (UINT)region.srcOffset.x + region.extent.width, 1797 .bottom = (UINT)region.srcOffset.y + region.extent.height, 1798 .back = (UINT)src_z + depth, 1799 }; 1800 1801 if (!tmp_loc->pResource) { 1802 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc, 1803 region.dstOffset.x, 1804 region.dstOffset.y, 1805 dst_z, &src_loc, 1806 &src_box); 1807 return; 1808 } 1809 1810 tmp_desc->Format = 1811 dzn_image_get_placed_footprint_format(src->vk.format, aspect); 1812 tmp_desc->Width = region.extent.width; 1813 tmp_desc->Height = region.extent.height; 1814 1815 ID3D12Device1_GetCopyableFootprints(dev, tmp_desc, 1816 0, 1, 0, 1817 &tmp_loc->PlacedFootprint, 1818 NULL, NULL, NULL); 1819 1820 tmp_loc->PlacedFootprint.Footprint.Depth = depth; 1821 1822 if (r > 0 || l > 0) { 1823 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1, 1824 D3D12_RESOURCE_STATE_COPY_SOURCE, 1825 D3D12_RESOURCE_STATE_COPY_DEST, 1826 DZN_QUEUE_TRANSITION_FLUSH); 1827 } 1828 1829 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box); 1830 1831 if (r > 0 || l > 0) { 1832 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1, 1833 D3D12_RESOURCE_STATE_COPY_DEST, 1834 D3D12_RESOURCE_STATE_COPY_SOURCE, 1835 DZN_QUEUE_TRANSITION_FLUSH); 1836 } 1837 1838 tmp_desc->Format = 1839 dzn_image_get_placed_footprint_format(dst->vk.format, aspect); 1840 if (src_blkw != dst_blkw) 1841 tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw; 1842 if (src_blkh != dst_blkh) 1843 tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh; 1844 1845 ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc, 1846 0, 1, 0, 1847 &tmp_loc->PlacedFootprint, 1848 NULL, NULL, NULL); 1849 1850 if (src_blkd != dst_blkd) { 1851 tmp_loc->PlacedFootprint.Footprint.Depth = 1852 DIV_ROUND_UP(depth, src_blkd) * dst_blkd; 1853 } else { 1854 tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth; 1855 } 1856 1857 D3D12_BOX tmp_box = { 1858 .left = 0, 1859 .top = 0, 1860 .front = 0, 1861 .right = tmp_loc->PlacedFootprint.Footprint.Width, 1862 .bottom = tmp_loc->PlacedFootprint.Footprint.Height, 1863 .back = tmp_loc->PlacedFootprint.Footprint.Depth, 1864 }; 1865 1866 ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc, 1867 region.dstOffset.x, 1868 region.dstOffset.y, 1869 dst_z, 1870 tmp_loc, &tmp_box); 1871} 1872 1873static void 1874dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf, 1875 VkImage image, 1876 VkImageAspectFlagBits aspect, 1877 const VkImageSubresourceLayers *subres, 1878 struct dzn_descriptor_heap *heap, 1879 uint32_t heap_slot) 1880{ 1881 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 1882 VK_FROM_HANDLE(dzn_image, img, image); 1883 VkImageViewCreateInfo iview_info = { 1884 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 1885 .image = image, 1886 .format = img->vk.format, 1887 .subresourceRange = { 1888 .aspectMask = (VkImageAspectFlags)aspect, 1889 .baseMipLevel = subres->mipLevel, 1890 .levelCount = 1, 1891 .baseArrayLayer = subres->baseArrayLayer, 1892 .layerCount = subres->layerCount, 1893 }, 1894 }; 1895 1896 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { 1897 iview_info.components.r = VK_COMPONENT_SWIZZLE_G; 1898 iview_info.components.g = VK_COMPONENT_SWIZZLE_G; 1899 iview_info.components.b = VK_COMPONENT_SWIZZLE_G; 1900 iview_info.components.a = VK_COMPONENT_SWIZZLE_G; 1901 } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { 1902 iview_info.components.r = VK_COMPONENT_SWIZZLE_R; 1903 iview_info.components.g = VK_COMPONENT_SWIZZLE_R; 1904 iview_info.components.b = VK_COMPONENT_SWIZZLE_R; 1905 iview_info.components.a = VK_COMPONENT_SWIZZLE_R; 1906 } 1907 1908 switch (img->vk.image_type) { 1909 case VK_IMAGE_TYPE_1D: 1910 iview_info.viewType = img->vk.array_layers > 1 ? 1911 VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D; 1912 break; 1913 case VK_IMAGE_TYPE_2D: 1914 iview_info.viewType = img->vk.array_layers > 1 ? 1915 VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; 1916 break; 1917 case VK_IMAGE_TYPE_3D: 1918 iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D; 1919 break; 1920 default: 1921 unreachable("Invalid type"); 1922 } 1923 1924 struct dzn_image_view iview; 1925 dzn_image_view_init(device, &iview, &iview_info); 1926 dzn_descriptor_heap_write_image_view_desc(heap, heap_slot, false, false, &iview); 1927 dzn_image_view_finish(&iview); 1928 1929 D3D12_GPU_DESCRIPTOR_HANDLE handle = 1930 dzn_descriptor_heap_get_gpu_handle(heap, heap_slot); 1931 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle); 1932} 1933 1934static void 1935dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf, 1936 struct dzn_image *img, 1937 VkImageAspectFlagBits aspect, 1938 uint32_t level, uint32_t layer) 1939{ 1940 bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); 1941 VkImageSubresourceRange range = { 1942 .aspectMask = (VkImageAspectFlags)aspect, 1943 .baseMipLevel = level, 1944 .levelCount = 1, 1945 .baseArrayLayer = layer, 1946 .layerCount = 1, 1947 }; 1948 1949 if (ds) { 1950 D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0); 1951 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc); 1952 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, TRUE, &handle); 1953 } else { 1954 D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0); 1955 D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc); 1956 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, FALSE, NULL); 1957 } 1958} 1959 1960static void 1961dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf, 1962 const struct dzn_image *src, 1963 const struct dzn_image *dst, 1964 VkImageAspectFlagBits aspect, 1965 VkFilter filter, bool resolve) 1966{ 1967 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 1968 enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format); 1969 VkImageUsageFlags usage = 1970 vk_format_is_depth_or_stencil(dst->vk.format) ? 1971 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 1972 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; 1973 struct dzn_meta_blit_key ctx_key = { 1974 .out_format = dzn_image_get_dxgi_format(dst->vk.format, usage, aspect), 1975 .samples = (uint32_t)src->vk.samples, 1976 .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? 1977 FRAG_RESULT_DEPTH : 1978 aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1979 FRAG_RESULT_STENCIL : 1980 FRAG_RESULT_DATA0), 1981 .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT : 1982 util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT : 1983 aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT : 1984 GLSL_TYPE_FLOAT), 1985 .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D : 1986 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D : 1987 src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS : 1988 GLSL_SAMPLER_DIM_3D), 1989 .src_is_array = src->vk.array_layers > 1, 1990 .resolve = resolve, 1991 .linear_filter = filter == VK_FILTER_LINEAR, 1992 .padding = 0, 1993 }; 1994 1995 const struct dzn_meta_blit *ctx = 1996 dzn_meta_blits_get_context(device, &ctx_key); 1997 assert(ctx); 1998 1999 ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig); 2000 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state); 2001} 2002 2003static void 2004dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf, 2005 const struct dzn_image *src, 2006 const VkImageSubresourceLayers *src_subres, 2007 const VkOffset3D *src_offsets, 2008 const struct dzn_image *dst, 2009 const VkImageSubresourceLayers *dst_subres, 2010 const VkOffset3D *dst_offsets, 2011 bool normalize_src_coords) 2012{ 2013 uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel); 2014 uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel); 2015 uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel); 2016 uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel); 2017 2018 float dst_pos[4] = { 2019 (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f), 2020 (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f), 2021 }; 2022 2023 float src_pos[4] = { 2024 (float)src_offsets[0].x, (float)src_offsets[0].y, 2025 (float)src_offsets[1].x, (float)src_offsets[1].y, 2026 }; 2027 2028 if (normalize_src_coords) { 2029 src_pos[0] /= src_w; 2030 src_pos[1] /= src_h; 2031 src_pos[2] /= src_w; 2032 src_pos[3] /= src_h; 2033 } 2034 2035 float coords[] = { 2036 dst_pos[0], dst_pos[1], src_pos[0], src_pos[1], 2037 dst_pos[2], dst_pos[1], src_pos[2], src_pos[1], 2038 dst_pos[0], dst_pos[3], src_pos[0], src_pos[3], 2039 dst_pos[2], dst_pos[3], src_pos[2], src_pos[3], 2040 }; 2041 2042 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0); 2043 2044 D3D12_VIEWPORT vp = { 2045 .TopLeftX = 0, 2046 .TopLeftY = 0, 2047 .Width = (float)dst_w, 2048 .Height = (float)dst_h, 2049 .MinDepth = 0, 2050 .MaxDepth = 1, 2051 }; 2052 ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp); 2053 2054 D3D12_RECT scissor = { 2055 .left = MIN2(dst_offsets[0].x, dst_offsets[1].x), 2056 .top = MIN2(dst_offsets[0].y, dst_offsets[1].y), 2057 .right = MAX2(dst_offsets[0].x, dst_offsets[1].x), 2058 .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y), 2059 }; 2060 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor); 2061} 2062 2063static void 2064dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf, 2065 struct dzn_image *src, VkImageLayout src_layout, 2066 const VkImageSubresourceLayers *src_subres, 2067 struct dzn_image *dst, VkImageLayout dst_layout, 2068 const VkImageSubresourceLayers *dst_subres, 2069 VkImageAspectFlagBits aspect, 2070 bool post) 2071{ 2072 VkImageSubresourceRange src_range = { 2073 .aspectMask = src_subres->aspectMask, 2074 .baseMipLevel = src_subres->mipLevel, 2075 .levelCount = 1, 2076 .baseArrayLayer = src_subres->baseArrayLayer, 2077 .layerCount = src_subres->layerCount, 2078 }; 2079 VkImageSubresourceRange dst_range = { 2080 .aspectMask = dst_subres->aspectMask, 2081 .baseMipLevel = dst_subres->mipLevel, 2082 .levelCount = 1, 2083 .baseArrayLayer = dst_subres->baseArrayLayer, 2084 .layerCount = dst_subres->layerCount, 2085 }; 2086 2087 if (!post) { 2088 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range, 2089 src_layout, 2090 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 2091 DZN_QUEUE_TRANSITION_FLUSH); 2092 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range, 2093 dst_layout, 2094 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 2095 DZN_QUEUE_TRANSITION_FLUSH); 2096 } else { 2097 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range, 2098 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 2099 src_layout, 2100 DZN_QUEUE_TRANSITION_FLUSH); 2101 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range, 2102 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 2103 dst_layout, 2104 DZN_QUEUE_TRANSITION_FLUSH); 2105 } 2106} 2107 2108static void 2109dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf, 2110 const VkBlitImageInfo2 *info, 2111 struct dzn_descriptor_heap *heap, 2112 uint32_t *heap_slot, 2113 uint32_t r) 2114{ 2115 VK_FROM_HANDLE(dzn_image, src, info->srcImage); 2116 VK_FROM_HANDLE(dzn_image, dst, info->dstImage); 2117 2118 const VkImageBlit2 *region = &info->pRegions[r]; 2119 bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D; 2120 bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D; 2121 2122 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { 2123 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, false); 2124 dzn_cmd_buffer_blit_issue_barriers(cmdbuf, 2125 src, info->srcImageLayout, ®ion->srcSubresource, 2126 dst, info->dstImageLayout, ®ion->dstSubresource, 2127 aspect, false); 2128 dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, 2129 aspect, ®ion->srcSubresource, 2130 heap, (*heap_slot)++); 2131 dzn_cmd_buffer_blit_set_2d_region(cmdbuf, 2132 src, ®ion->srcSubresource, region->srcOffsets, 2133 dst, ®ion->dstSubresource, region->dstOffsets, 2134 src->vk.samples == 1); 2135 2136 uint32_t dst_depth = 2137 region->dstOffsets[1].z > region->dstOffsets[0].z ? 2138 region->dstOffsets[1].z - region->dstOffsets[0].z : 2139 region->dstOffsets[0].z - region->dstOffsets[1].z; 2140 uint32_t src_depth = 2141 region->srcOffsets[1].z > region->srcOffsets[0].z ? 2142 region->srcOffsets[1].z - region->srcOffsets[0].z : 2143 region->srcOffsets[0].z - region->srcOffsets[1].z; 2144 2145 uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); 2146 uint32_t dst_level = region->dstSubresource.mipLevel; 2147 2148 float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1; 2149 if (region->srcOffsets[0].z > region->srcOffsets[1].z) 2150 src_slice_step = -src_slice_step; 2151 float src_z_coord = 2152 src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0; 2153 uint32_t slice_count = dst_is_3d ? dst_depth : layer_count; 2154 uint32_t dst_z_coord = 2155 dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer; 2156 if (region->dstOffsets[0].z > region->dstOffsets[1].z) 2157 dst_z_coord--; 2158 2159 uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ? 2160 1 : -1; 2161 2162 /* Normalize the src coordinates/step */ 2163 if (src_is_3d) { 2164 src_z_coord /= src->vk.extent.depth; 2165 src_slice_step /= src->vk.extent.depth; 2166 } 2167 2168 for (uint32_t slice = 0; slice < slice_count; slice++) { 2169 dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord); 2170 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); 2171 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); 2172 src_z_coord += src_slice_step; 2173 dst_z_coord += dst_slice_step; 2174 } 2175 2176 dzn_cmd_buffer_blit_issue_barriers(cmdbuf, 2177 src, info->srcImageLayout, ®ion->srcSubresource, 2178 dst, info->dstImageLayout, ®ion->dstSubresource, 2179 aspect, true); 2180 } 2181} 2182 2183static void 2184dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf, 2185 const VkResolveImageInfo2 *info, 2186 struct dzn_descriptor_heap *heap, 2187 uint32_t *heap_slot, 2188 uint32_t r) 2189{ 2190 VK_FROM_HANDLE(dzn_image, src, info->srcImage); 2191 VK_FROM_HANDLE(dzn_image, dst, info->dstImage); 2192 2193 const VkImageResolve2 *region = &info->pRegions[r]; 2194 2195 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { 2196 dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, true); 2197 dzn_cmd_buffer_blit_issue_barriers(cmdbuf, 2198 src, info->srcImageLayout, ®ion->srcSubresource, 2199 dst, info->dstImageLayout, ®ion->dstSubresource, 2200 aspect, false); 2201 dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect, 2202 ®ion->srcSubresource, 2203 heap, (*heap_slot)++); 2204 2205 VkOffset3D src_offset[2] = { 2206 { 2207 .x = region->srcOffset.x, 2208 .y = region->srcOffset.y, 2209 }, 2210 { 2211 .x = (int32_t)(region->srcOffset.x + region->extent.width), 2212 .y = (int32_t)(region->srcOffset.y + region->extent.height), 2213 }, 2214 }; 2215 VkOffset3D dst_offset[2] = { 2216 { 2217 .x = region->dstOffset.x, 2218 .y = region->dstOffset.y, 2219 }, 2220 { 2221 .x = (int32_t)(region->dstOffset.x + region->extent.width), 2222 .y = (int32_t)(region->dstOffset.y + region->extent.height), 2223 }, 2224 }; 2225 2226 dzn_cmd_buffer_blit_set_2d_region(cmdbuf, 2227 src, ®ion->srcSubresource, src_offset, 2228 dst, ®ion->dstSubresource, dst_offset, 2229 false); 2230 2231 uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); 2232 for (uint32_t layer = 0; layer < layer_count; layer++) { 2233 float src_z_coord = layer; 2234 2235 dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, 2236 dst, aspect, region->dstSubresource.mipLevel, 2237 region->dstSubresource.baseArrayLayer + layer); 2238 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); 2239 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); 2240 } 2241 2242 dzn_cmd_buffer_blit_issue_barriers(cmdbuf, 2243 src, info->srcImageLayout, ®ion->srcSubresource, 2244 dst, info->dstImageLayout, ®ion->dstSubresource, 2245 aspect, true); 2246 } 2247} 2248 2249static void 2250dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) 2251{ 2252 const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; 2253 2254 if (!pipeline) 2255 return; 2256 2257 ID3D12PipelineState *old_pipeline_state = 2258 cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL; 2259 2260 if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) { 2261 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { 2262 struct dzn_graphics_pipeline *gfx = 2263 (struct dzn_graphics_pipeline *)pipeline; 2264 ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig); 2265 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology); 2266 dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant); 2267 } else { 2268 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig); 2269 } 2270 } 2271 2272 ID3D12PipelineState *new_pipeline_state = pipeline->state; 2273 2274 if (old_pipeline_state != new_pipeline_state) { 2275 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state); 2276 cmdbuf->state.pipeline = pipeline; 2277 } 2278} 2279 2280static void 2281dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) 2282{ 2283 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 2284 struct dzn_descriptor_state *desc_state = 2285 &cmdbuf->state.bindpoint[bindpoint].desc_state; 2286 struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = { 2287 desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV], 2288 desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] 2289 }; 2290 uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 }; 2291 bool update_root_desc_table[NUM_POOL_TYPES] = { 0 }; 2292 const struct dzn_pipeline *pipeline = 2293 cmdbuf->state.bindpoint[bindpoint].pipeline; 2294 2295 if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_HEAPS)) 2296 goto set_heaps; 2297 2298 dzn_foreach_pool_type (type) { 2299 uint32_t desc_count = pipeline->desc_count[type]; 2300 if (!desc_count) 2301 continue; 2302 2303 struct dzn_descriptor_heap_pool *pool = 2304 type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ? 2305 &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool; 2306 struct dzn_descriptor_heap *dst_heap = NULL; 2307 uint32_t dst_heap_offset = 0; 2308 2309 dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count, 2310 &dst_heap, &dst_heap_offset); 2311 new_heap_offsets[type] = dst_heap_offset; 2312 update_root_desc_table[type] = true; 2313 2314 for (uint32_t s = 0; s < MAX_SETS; s++) { 2315 const struct dzn_descriptor_set *set = desc_state->sets[s].set; 2316 if (!set) continue; 2317 2318 uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type]; 2319 uint32_t set_desc_count = pipeline->sets[s].range_desc_count[type]; 2320 if (set_desc_count) { 2321 mtx_lock(&set->pool->defragment_lock); 2322 dzn_descriptor_heap_copy(dst_heap, dst_heap_offset + set_heap_offset, 2323 &set->pool->heaps[type], set->heap_offsets[type], 2324 set_desc_count); 2325 mtx_unlock(&set->pool->defragment_lock); 2326 } 2327 2328 if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { 2329 uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count; 2330 for (uint32_t o = 0; o < dynamic_buffer_count; o++) { 2331 uint32_t desc_heap_offset = 2332 pipeline->sets[s].dynamic_buffer_heap_offsets[o].srv; 2333 struct dzn_buffer_desc bdesc = set->dynamic_buffers[o]; 2334 bdesc.offset += desc_state->sets[s].dynamic_offsets[o]; 2335 2336 dzn_descriptor_heap_write_buffer_desc(dst_heap, 2337 dst_heap_offset + set_heap_offset + desc_heap_offset, 2338 false, &bdesc); 2339 2340 if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav != ~0) { 2341 desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav; 2342 dzn_descriptor_heap_write_buffer_desc(dst_heap, 2343 dst_heap_offset + set_heap_offset + desc_heap_offset, 2344 true, &bdesc); 2345 } 2346 } 2347 } 2348 } 2349 2350 new_heaps[type] = dst_heap; 2351 } 2352 2353set_heaps: 2354 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] || 2355 new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) { 2356 ID3D12DescriptorHeap *desc_heaps[2]; 2357 uint32_t num_desc_heaps = 0; 2358 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) 2359 desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap; 2360 if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) 2361 desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap; 2362 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps); 2363 2364 for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++) 2365 cmdbuf->state.heaps[h] = new_heaps[h]; 2366 } 2367 2368 for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) { 2369 D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r]; 2370 2371 if (!update_root_desc_table[type]) 2372 continue; 2373 2374 D3D12_GPU_DESCRIPTOR_HANDLE handle = 2375 dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]); 2376 2377 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) 2378 ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle); 2379 else 2380 ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle); 2381 } 2382} 2383 2384static void 2385dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) 2386{ 2387 if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS)) 2388 return; 2389 2390 const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; 2391 uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx; 2392 2393 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { 2394 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx, 2395 sizeof(cmdbuf->state.sysvals.gfx) / 4, 2396 &cmdbuf->state.sysvals.gfx, 0); 2397 } else { 2398 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx, 2399 sizeof(cmdbuf->state.sysvals.compute) / 4, 2400 &cmdbuf->state.sysvals.compute, 0); 2401 } 2402} 2403 2404static void 2405dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf) 2406{ 2407 const struct dzn_graphics_pipeline *pipeline = 2408 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; 2409 2410 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) || 2411 !pipeline->vp.count) 2412 return; 2413 2414 ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports); 2415} 2416 2417static void 2418dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf) 2419{ 2420 const struct dzn_graphics_pipeline *pipeline = 2421 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; 2422 2423 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS)) 2424 return; 2425 2426 if (!pipeline->scissor.count) { 2427 /* Apply a scissor delimiting the render area. */ 2428 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area); 2429 return; 2430 } 2431 2432 D3D12_RECT scissors[MAX_SCISSOR]; 2433 2434 memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count); 2435 for (uint32_t i = 0; i < pipeline->scissor.count; i++) { 2436 scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left); 2437 scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top); 2438 scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right); 2439 scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom); 2440 } 2441 2442 ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors); 2443} 2444 2445static void 2446dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf) 2447{ 2448 unsigned start, end; 2449 2450 BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS) 2451 ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, cmdbuf->state.vb.views); 2452 2453 BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS); 2454} 2455 2456static void 2457dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf) 2458{ 2459 if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB)) 2460 return; 2461 2462 ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view); 2463} 2464 2465static void 2466dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) 2467{ 2468 struct dzn_cmd_buffer_push_constant_state *state = 2469 bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? 2470 &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute; 2471 2472 uint32_t offset = state->offset / 4; 2473 uint32_t end = ALIGN(state->end, 4) / 4; 2474 uint32_t count = end - offset; 2475 2476 if (!count) 2477 return; 2478 2479 uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx; 2480 uint32_t *vals = state->values + offset; 2481 2482 if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) 2483 ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset); 2484 else 2485 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset); 2486 2487 state->offset = 0; 2488 state->end = 0; 2489} 2490 2491static void 2492dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf) 2493{ 2494 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) { 2495 const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *) 2496 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; 2497 uint32_t ref = 2498 gfx->zsa.stencil_test.front.uses_ref ? 2499 cmdbuf->state.zsa.stencil_test.front.ref : 2500 cmdbuf->state.zsa.stencil_test.back.ref; 2501 ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref); 2502 } 2503} 2504 2505static void 2506dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf) 2507{ 2508 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS) 2509 ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist, 2510 cmdbuf->state.blend.constants); 2511} 2512 2513static void 2514dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf) 2515{ 2516 if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) { 2517 ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist, 2518 cmdbuf->state.zsa.depth_bounds.min, 2519 cmdbuf->state.zsa.depth_bounds.max); 2520 } 2521} 2522 2523static VkResult 2524dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count) 2525{ 2526 uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4; 2527 uint32_t triangle_count = MAX2(*vertex_count, 2) - 2; 2528 2529 *vertex_count = triangle_count * 3; 2530 if (!*vertex_count) 2531 return VK_SUCCESS; 2532 2533 ID3D12Resource *index_buf; 2534 VkResult result = 2535 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size, 2536 D3D12_HEAP_TYPE_UPLOAD, 2537 D3D12_RESOURCE_STATE_GENERIC_READ, 2538 &index_buf); 2539 if (result != VK_SUCCESS) 2540 return result; 2541 2542 void *cpu_ptr; 2543 ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr); 2544 2545 /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */ 2546 if (index_size == 2) { 2547 uint16_t *indices = (uint16_t *)cpu_ptr; 2548 for (uint32_t t = 0; t < triangle_count; t++) { 2549 indices[t * 3] = t + 1; 2550 indices[(t * 3) + 1] = t + 2; 2551 indices[(t * 3) + 2] = 0; 2552 } 2553 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; 2554 } else { 2555 uint32_t *indices = (uint32_t *)cpu_ptr; 2556 for (uint32_t t = 0; t < triangle_count; t++) { 2557 indices[t * 3] = t + 1; 2558 indices[(t * 3) + 1] = t + 2; 2559 indices[(t * 3) + 2] = 0; 2560 } 2561 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; 2562 } 2563 2564 cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size; 2565 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf); 2566 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; 2567 return VK_SUCCESS; 2568} 2569 2570static VkResult 2571dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf, 2572 uint32_t *index_count, 2573 uint32_t *first_index) 2574{ 2575 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 2576 uint32_t triangle_count = MAX2(*index_count, 2) - 2; 2577 2578 *index_count = triangle_count * 3; 2579 if (!*index_count) 2580 return VK_SUCCESS; 2581 2582 /* New index is always 32bit to make the compute shader rewriting the 2583 * index simpler */ 2584 ID3D12Resource *new_index_buf; 2585 VkResult result = 2586 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4, 2587 D3D12_HEAP_TYPE_DEFAULT, 2588 D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 2589 &new_index_buf); 2590 if (result != VK_SUCCESS) 2591 return result; 2592 2593 D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu = 2594 cmdbuf->state.ib.view.BufferLocation; 2595 2596 ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *) 2597 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; 2598 ASSERTED bool prim_restart = 2599 dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL; 2600 2601 assert(!prim_restart); 2602 2603 enum dzn_index_type index_type = 2604 dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false); 2605 const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index = 2606 &device->triangle_fan[index_type]; 2607 2608 struct dzn_triangle_fan_rewrite_index_params params = { 2609 .first_index = *first_index, 2610 }; 2611 2612 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig); 2613 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state); 2614 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf)); 2615 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4, 2616 ¶ms, 0); 2617 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu); 2618 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1); 2619 2620 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1, 2621 D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 2622 D3D12_RESOURCE_STATE_INDEX_BUFFER, 2623 DZN_QUEUE_TRANSITION_FLUSH); 2624 2625 /* We don't mess up with the driver state when executing our internal 2626 * compute shader, but we still change the D3D12 state, so let's mark 2627 * things dirty if needed. 2628 */ 2629 cmdbuf->state.pipeline = NULL; 2630 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { 2631 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= 2632 DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 2633 } 2634 2635 cmdbuf->state.ib.view.SizeInBytes = *index_count * 4; 2636 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf); 2637 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; 2638 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; 2639 *first_index = 0; 2640 return VK_SUCCESS; 2641} 2642 2643static void 2644dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed) 2645{ 2646 if (indexed) 2647 dzn_cmd_buffer_update_ibview(cmdbuf); 2648 2649 dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); 2650 dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); 2651 dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); 2652 dzn_cmd_buffer_update_viewports(cmdbuf); 2653 dzn_cmd_buffer_update_scissors(cmdbuf); 2654 dzn_cmd_buffer_update_vbviews(cmdbuf); 2655 dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); 2656 dzn_cmd_buffer_update_zsa(cmdbuf); 2657 dzn_cmd_buffer_update_blend_constants(cmdbuf); 2658 dzn_cmd_buffer_update_depth_bounds(cmdbuf); 2659 2660 /* Reset the dirty states */ 2661 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty = 0; 2662 cmdbuf->state.dirty = 0; 2663} 2664 2665static uint32_t 2666dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed) 2667{ 2668 struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) 2669 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; 2670 2671 if (!pipeline->ia.triangle_fan) 2672 return 0; 2673 2674 uint32_t max_triangles; 2675 2676 if (indexed) { 2677 uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2; 2678 uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size; 2679 2680 max_triangles = MAX2(max_indices, 2) - 2; 2681 } else { 2682 uint32_t max_vertex = 0; 2683 for (uint32_t i = 0; i < pipeline->vb.count; i++) { 2684 max_vertex = 2685 MAX2(max_vertex, 2686 cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes); 2687 } 2688 2689 max_triangles = MAX2(max_vertex, 2) - 2; 2690 } 2691 2692 return max_triangles * 3; 2693} 2694 2695static void 2696dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf, 2697 ID3D12Resource *draw_buf, 2698 size_t draw_buf_offset, 2699 ID3D12Resource *count_buf, 2700 size_t count_buf_offset, 2701 uint32_t max_draw_count, 2702 uint32_t draw_buf_stride, 2703 bool indexed) 2704{ 2705 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 2706 struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) 2707 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; 2708 uint32_t min_draw_buf_stride = 2709 indexed ? 2710 sizeof(struct dzn_indirect_indexed_draw_params) : 2711 sizeof(struct dzn_indirect_draw_params); 2712 bool prim_restart = 2713 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL; 2714 2715 draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride; 2716 assert(draw_buf_stride >= min_draw_buf_stride); 2717 assert((draw_buf_stride & 3) == 0); 2718 2719 uint32_t triangle_fan_index_buf_stride = 2720 dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) * 2721 sizeof(uint32_t); 2722 uint32_t exec_buf_stride = 2723 triangle_fan_index_buf_stride > 0 ? 2724 sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) : 2725 sizeof(struct dzn_indirect_draw_exec_params); 2726 uint32_t triangle_fan_exec_buf_stride = 2727 sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params); 2728 uint32_t exec_buf_size = max_draw_count * exec_buf_stride; 2729 uint32_t exec_buf_draw_offset = 0; 2730 2731 // We reserve the first slot for the draw_count value when indirect count is 2732 // involved. 2733 if (count_buf != NULL) { 2734 exec_buf_size += exec_buf_stride; 2735 exec_buf_draw_offset = exec_buf_stride; 2736 } 2737 2738 ID3D12Resource *exec_buf; 2739 VkResult result = 2740 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size, 2741 D3D12_HEAP_TYPE_DEFAULT, 2742 D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 2743 &exec_buf); 2744 if (result != VK_SUCCESS) 2745 return; 2746 2747 D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu = 2748 ID3D12Resource_GetGPUVirtualAddress(draw_buf) + draw_buf_offset; 2749 ID3D12Resource *triangle_fan_index_buf = NULL; 2750 ID3D12Resource *triangle_fan_exec_buf = NULL; 2751 2752 if (triangle_fan_index_buf_stride) { 2753 result = 2754 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, 2755 max_draw_count * triangle_fan_index_buf_stride, 2756 D3D12_HEAP_TYPE_DEFAULT, 2757 D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 2758 &triangle_fan_index_buf); 2759 if (result != VK_SUCCESS) 2760 return; 2761 2762 result = 2763 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, 2764 max_draw_count * triangle_fan_exec_buf_stride, 2765 D3D12_HEAP_TYPE_DEFAULT, 2766 D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 2767 &triangle_fan_exec_buf); 2768 if (result != VK_SUCCESS) 2769 return; 2770 } 2771 2772 struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = { 2773 .draw_buf_stride = draw_buf_stride, 2774 .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride, 2775 .triangle_fan_index_buf_start = 2776 triangle_fan_index_buf ? 2777 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0, 2778 .exec_buf_start = 2779 prim_restart ? 2780 ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0, 2781 }; 2782 uint32_t params_size; 2783 if (triangle_fan_index_buf_stride > 0 && prim_restart) 2784 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params); 2785 else if (triangle_fan_index_buf_stride > 0) 2786 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params); 2787 else 2788 params_size = sizeof(struct dzn_indirect_draw_rewrite_params); 2789 2790 enum dzn_indirect_draw_type draw_type; 2791 2792 if (indexed && triangle_fan_index_buf_stride > 0) { 2793 if (prim_restart && count_buf) 2794 draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 2795 else if (prim_restart && !count_buf) 2796 draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART; 2797 else if (!prim_restart && count_buf) 2798 draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN; 2799 else 2800 draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; 2801 } else if (!indexed && triangle_fan_index_buf_stride > 0) { 2802 draw_type = count_buf ? 2803 DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN : 2804 DZN_INDIRECT_DRAW_TRIANGLE_FAN; 2805 } else if (indexed) { 2806 draw_type = count_buf ? 2807 DZN_INDIRECT_INDEXED_DRAW_COUNT : 2808 DZN_INDIRECT_INDEXED_DRAW; 2809 } else { 2810 draw_type = count_buf ? DZN_INDIRECT_DRAW_COUNT : DZN_INDIRECT_DRAW; 2811 } 2812 2813 struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type]; 2814 uint32_t root_param_idx = 0; 2815 2816 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig); 2817 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state); 2818 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++, 2819 params_size / 4, (const void *)¶ms, 0); 2820 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++, 2821 draw_buf_gpu); 2822 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++, 2823 ID3D12Resource_GetGPUVirtualAddress(exec_buf)); 2824 if (count_buf) { 2825 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2826 root_param_idx++, 2827 ID3D12Resource_GetGPUVirtualAddress(count_buf) + 2828 count_buf_offset); 2829 } 2830 2831 if (triangle_fan_exec_buf) { 2832 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 2833 root_param_idx++, 2834 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf)); 2835 } 2836 2837 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1); 2838 2839 D3D12_INDEX_BUFFER_VIEW ib_view = { 0 }; 2840 2841 if (triangle_fan_exec_buf) { 2842 enum dzn_index_type index_type = 2843 indexed ? 2844 dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, prim_restart) : 2845 DZN_NO_INDEX; 2846 struct dzn_meta_triangle_fan_rewrite_index *rewrite_index = 2847 &device->triangle_fan[index_type]; 2848 2849 struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 }; 2850 2851 assert(rewrite_index->root_sig); 2852 assert(rewrite_index->pipeline_state); 2853 assert(rewrite_index->cmd_sig); 2854 2855 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1, 2856 D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 2857 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, 2858 DZN_QUEUE_TRANSITION_FLUSH); 2859 2860 ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig); 2861 ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state); 2862 root_param_idx = 0; 2863 ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++, 2864 ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf)); 2865 ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++, 2866 sizeof(rewrite_index_params) / 4, 2867 (const void *)&rewrite_index_params, 0); 2868 2869 if (indexed) { 2870 ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2871 root_param_idx++, 2872 cmdbuf->state.ib.view.BufferLocation); 2873 } 2874 2875 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig, 2876 max_draw_count, triangle_fan_exec_buf, 0, 2877 count_buf ? exec_buf : NULL, 0); 2878 2879 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1, 2880 D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 2881 D3D12_RESOURCE_STATE_INDEX_BUFFER, 2882 DZN_QUEUE_TRANSITION_FLUSH); 2883 2884 /* After our triangle-fan lowering the draw is indexed */ 2885 indexed = true; 2886 ib_view = cmdbuf->state.ib.view; 2887 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf); 2888 cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride; 2889 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; 2890 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; 2891 } 2892 2893 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1, 2894 D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 2895 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, 2896 DZN_QUEUE_TRANSITION_FLUSH); 2897 2898 /* We don't mess up with the driver state when executing our internal 2899 * compute shader, but we still change the D3D12 state, so let's mark 2900 * things dirty if needed. 2901 */ 2902 cmdbuf->state.pipeline = NULL; 2903 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { 2904 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= 2905 DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 2906 } 2907 2908 cmdbuf->state.sysvals.gfx.first_vertex = 0; 2909 cmdbuf->state.sysvals.gfx.base_instance = 0; 2910 cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed; 2911 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= 2912 DZN_CMD_BINDPOINT_DIRTY_SYSVALS; 2913 2914 dzn_cmd_buffer_prepare_draw(cmdbuf, indexed); 2915 2916 /* Restore the old IB view if we modified it during the triangle fan lowering */ 2917 if (ib_view.SizeInBytes) { 2918 cmdbuf->state.ib.view = ib_view; 2919 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; 2920 } 2921 2922 enum dzn_indirect_draw_cmd_sig_type cmd_sig_type = 2923 triangle_fan_index_buf_stride > 0 ? 2924 DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG : 2925 indexed ? 2926 DZN_INDIRECT_INDEXED_DRAW_CMD_SIG : 2927 DZN_INDIRECT_DRAW_CMD_SIG; 2928 ID3D12CommandSignature *cmdsig = 2929 dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type); 2930 2931 if (!cmdsig) { 2932 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2933 return; 2934 } 2935 2936 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 2937 max_draw_count, 2938 exec_buf, exec_buf_draw_offset, 2939 count_buf ? exec_buf : NULL, 0); 2940} 2941 2942static void 2943dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf) 2944{ 2945 dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); 2946 dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); 2947 dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); 2948 dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); 2949 2950 /* Reset the dirty states */ 2951 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty = 0; 2952} 2953 2954VKAPI_ATTR void VKAPI_CALL 2955dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer, 2956 const VkCopyBufferInfo2 *info) 2957{ 2958 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 2959 VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); 2960 VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); 2961 2962 for (int i = 0; i < info->regionCount; i++) { 2963 const VkBufferCopy2 *region = info->pRegions + i; 2964 2965 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset, 2966 src_buffer->res, region->srcOffset, 2967 region->size); 2968 } 2969} 2970 2971VKAPI_ATTR void VKAPI_CALL 2972dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, 2973 const VkCopyBufferToImageInfo2 *info) 2974{ 2975 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 2976 2977 for (int i = 0; i < info->regionCount; i++) { 2978 const VkBufferImageCopy2 *region = info->pRegions + i; 2979 2980 dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) { 2981 for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++) 2982 dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l); 2983 } 2984 } 2985} 2986 2987VKAPI_ATTR void VKAPI_CALL 2988dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, 2989 const VkCopyImageToBufferInfo2 *info) 2990{ 2991 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 2992 2993 for (int i = 0; i < info->regionCount; i++) { 2994 const VkBufferImageCopy2 *region = info->pRegions + i; 2995 2996 dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) { 2997 for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++) 2998 dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l); 2999 } 3000 } 3001} 3002 3003VKAPI_ATTR void VKAPI_CALL 3004dzn_CmdCopyImage2(VkCommandBuffer commandBuffer, 3005 const VkCopyImageInfo2 *info) 3006{ 3007 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3008 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 3009 VK_FROM_HANDLE(dzn_image, src, info->srcImage); 3010 VK_FROM_HANDLE(dzn_image, dst, info->dstImage); 3011 3012 assert(src->vk.samples == dst->vk.samples); 3013 3014 bool requires_temp_res = false; 3015 3016 for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) { 3017 const VkImageCopy2 *region = &info->pRegions[i]; 3018 3019 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { 3020 assert(aspect & region->dstSubresource.aspectMask); 3021 3022 if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format, 3023 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) && 3024 src->vk.tiling != VK_IMAGE_TILING_LINEAR && 3025 dst->vk.tiling != VK_IMAGE_TILING_LINEAR) { 3026 requires_temp_res = true; 3027 break; 3028 } 3029 } 3030 } 3031 3032 bool use_blit = false; 3033 if (src->vk.samples > 1) { 3034 use_blit = requires_temp_res; 3035 3036 for (int i = 0; i < info->regionCount; i++) { 3037 const VkImageCopy2 *region = info->pRegions + i; 3038 if (region->srcOffset.x != 0 || region->srcOffset.y != 0 || 3039 region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) || 3040 region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) || 3041 region->dstOffset.x != 0 || region->dstOffset.y != 0 || 3042 region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) || 3043 region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel)) 3044 use_blit = true; 3045 } 3046 } 3047 3048 if (use_blit) { 3049 /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is 3050 * is issued on a transfer queue, but we don't have any better option 3051 * right now... 3052 */ 3053 STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount); 3054 3055 VkBlitImageInfo2 blit_info = { 3056 .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2, 3057 .srcImage = info->srcImage, 3058 .srcImageLayout = info->srcImageLayout, 3059 .dstImage = info->dstImage, 3060 .dstImageLayout = info->dstImageLayout, 3061 .regionCount = info->regionCount, 3062 .pRegions = blit_regions, 3063 .filter = VK_FILTER_NEAREST, 3064 }; 3065 3066 for (uint32_t r = 0; r < info->regionCount; r++) { 3067 blit_regions[r] = (VkImageBlit2) { 3068 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2, 3069 .srcSubresource = info->pRegions[r].srcSubresource, 3070 .srcOffsets = { 3071 info->pRegions[r].srcOffset, 3072 info->pRegions[r].srcOffset, 3073 }, 3074 .dstSubresource = info->pRegions[r].dstSubresource, 3075 .dstOffsets = { 3076 info->pRegions[r].dstOffset, 3077 info->pRegions[r].dstOffset, 3078 }, 3079 }; 3080 3081 blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width; 3082 blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height; 3083 blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth; 3084 blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width; 3085 blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height; 3086 blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth; 3087 } 3088 3089 dzn_CmdBlitImage2(commandBuffer, &blit_info); 3090 3091 STACK_ARRAY_FINISH(blit_regions); 3092 return; 3093 } 3094 3095 D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 }; 3096 D3D12_RESOURCE_DESC tmp_desc = { 3097 .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, 3098 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, 3099 .DepthOrArraySize = 1, 3100 .MipLevels = 1, 3101 .Format = src->desc.Format, 3102 .SampleDesc = { .Count = 1, .Quality = 0 }, 3103 .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, 3104 .Flags = D3D12_RESOURCE_FLAG_NONE, 3105 }; 3106 3107 if (requires_temp_res) { 3108 ID3D12Device2 *dev = device->dev; 3109 VkImageAspectFlags aspect = 0; 3110 uint64_t max_size = 0; 3111 3112 if (vk_format_has_depth(src->vk.format)) 3113 aspect = VK_IMAGE_ASPECT_DEPTH_BIT; 3114 else if (vk_format_has_stencil(src->vk.format)) 3115 aspect = VK_IMAGE_ASPECT_DEPTH_BIT; 3116 else 3117 aspect = VK_IMAGE_ASPECT_COLOR_BIT; 3118 3119 for (uint32_t i = 0; i < info->regionCount; i++) { 3120 const VkImageCopy2 *region = &info->pRegions[i]; 3121 uint64_t region_size = 0; 3122 3123 tmp_desc.Format = 3124 dzn_image_get_dxgi_format(src->vk.format, 3125 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 3126 aspect); 3127 tmp_desc.Width = region->extent.width; 3128 tmp_desc.Height = region->extent.height; 3129 3130 ID3D12Device1_GetCopyableFootprints(dev, &src->desc, 3131 0, 1, 0, 3132 NULL, NULL, NULL, 3133 ®ion_size); 3134 max_size = MAX2(max_size, region_size * region->extent.depth); 3135 } 3136 3137 VkResult result = 3138 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size, 3139 D3D12_HEAP_TYPE_DEFAULT, 3140 D3D12_RESOURCE_STATE_COPY_DEST, 3141 &tmp_loc.pResource); 3142 if (result != VK_SUCCESS) 3143 return; 3144 3145 tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; 3146 } 3147 3148 for (int i = 0; i < info->regionCount; i++) { 3149 const VkImageCopy2 *region = &info->pRegions[i]; 3150 3151 dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { 3152 for (uint32_t l = 0; l < region->srcSubresource.layerCount; l++) 3153 dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l); 3154 } 3155 } 3156} 3157 3158VKAPI_ATTR void VKAPI_CALL 3159dzn_CmdBlitImage2(VkCommandBuffer commandBuffer, 3160 const VkBlitImageInfo2 *info) 3161{ 3162 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3163 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 3164 3165 if (info->regionCount == 0) 3166 return; 3167 3168 uint32_t desc_count = 0; 3169 for (uint32_t r = 0; r < info->regionCount; r++) 3170 desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); 3171 3172 struct dzn_descriptor_heap *heap; 3173 uint32_t heap_slot; 3174 VkResult result = 3175 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, 3176 desc_count, &heap, &heap_slot); 3177 3178 if (result != VK_SUCCESS) { 3179 cmdbuf->error = result; 3180 return; 3181 } 3182 3183 if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) { 3184 ID3D12DescriptorHeap * const heaps[] = { heap->heap }; 3185 cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap; 3186 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps); 3187 } 3188 3189 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); 3190 3191 for (uint32_t r = 0; r < info->regionCount; r++) 3192 dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, r); 3193 3194 cmdbuf->state.pipeline = NULL; 3195 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; 3196 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { 3197 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= 3198 DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 3199 } 3200} 3201 3202VKAPI_ATTR void VKAPI_CALL 3203dzn_CmdResolveImage2(VkCommandBuffer commandBuffer, 3204 const VkResolveImageInfo2 *info) 3205{ 3206 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3207 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 3208 3209 if (info->regionCount == 0) 3210 return; 3211 3212 uint32_t desc_count = 0; 3213 for (uint32_t r = 0; r < info->regionCount; r++) 3214 desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); 3215 3216 struct dzn_descriptor_heap *heap; 3217 uint32_t heap_slot; 3218 VkResult result = 3219 dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, 3220 desc_count, &heap, &heap_slot); 3221 if (result != VK_SUCCESS) { 3222 cmdbuf->error = result; 3223 return; 3224 } 3225 3226 if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) { 3227 ID3D12DescriptorHeap * const heaps[] = { heap->heap }; 3228 cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap; 3229 ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps); 3230 } 3231 3232 ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); 3233 3234 uint32_t heap_offset = 0; 3235 for (uint32_t r = 0; r < info->regionCount; r++) 3236 dzn_cmd_buffer_resolve_region(cmdbuf, info, heap, &heap_offset, r); 3237 3238 cmdbuf->state.pipeline = NULL; 3239 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; 3240 if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { 3241 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= 3242 DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 3243 } 3244} 3245 3246VKAPI_ATTR void VKAPI_CALL 3247dzn_CmdClearColorImage(VkCommandBuffer commandBuffer, 3248 VkImage image, 3249 VkImageLayout imageLayout, 3250 const VkClearColorValue *pColor, 3251 uint32_t rangeCount, 3252 const VkImageSubresourceRange *pRanges) 3253{ 3254 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3255 VK_FROM_HANDLE(dzn_image, img, image); 3256 3257 dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges); 3258} 3259 3260VKAPI_ATTR void VKAPI_CALL 3261dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, 3262 VkImage image, 3263 VkImageLayout imageLayout, 3264 const VkClearDepthStencilValue *pDepthStencil, 3265 uint32_t rangeCount, 3266 const VkImageSubresourceRange *pRanges) 3267{ 3268 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3269 VK_FROM_HANDLE(dzn_image, img, image); 3270 3271 dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges); 3272} 3273 3274VKAPI_ATTR void VKAPI_CALL 3275dzn_CmdDispatch(VkCommandBuffer commandBuffer, 3276 uint32_t groupCountX, 3277 uint32_t groupCountY, 3278 uint32_t groupCountZ) 3279{ 3280 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3281 3282 cmdbuf->state.sysvals.compute.group_count_x = groupCountX; 3283 cmdbuf->state.sysvals.compute.group_count_y = groupCountY; 3284 cmdbuf->state.sysvals.compute.group_count_z = groupCountZ; 3285 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= 3286 DZN_CMD_BINDPOINT_DIRTY_SYSVALS; 3287 3288 dzn_cmd_buffer_prepare_dispatch(cmdbuf); 3289 ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ); 3290} 3291 3292VKAPI_ATTR void VKAPI_CALL 3293dzn_CmdFillBuffer(VkCommandBuffer commandBuffer, 3294 VkBuffer dstBuffer, 3295 VkDeviceSize dstOffset, 3296 VkDeviceSize size, 3297 uint32_t data) 3298{ 3299 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3300 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); 3301 3302 if (size == VK_WHOLE_SIZE) 3303 size = buf->size - dstOffset; 3304 3305 size &= ~3ULL; 3306 3307 ID3D12Resource *src_res; 3308 VkResult result = 3309 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, 3310 D3D12_HEAP_TYPE_UPLOAD, 3311 D3D12_RESOURCE_STATE_GENERIC_READ, 3312 &src_res); 3313 if (result != VK_SUCCESS) 3314 return; 3315 3316 uint32_t *cpu_ptr; 3317 ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); 3318 for (uint32_t i = 0; i < size / 4; i++) 3319 cpu_ptr[i] = data; 3320 3321 ID3D12Resource_Unmap(src_res, 0, NULL); 3322 3323 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size); 3324} 3325 3326VKAPI_ATTR void VKAPI_CALL 3327dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer, 3328 VkBuffer dstBuffer, 3329 VkDeviceSize dstOffset, 3330 VkDeviceSize size, 3331 const void *data) 3332{ 3333 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3334 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); 3335 3336 if (size == VK_WHOLE_SIZE) 3337 size = buf->size - dstOffset; 3338 3339 /* 3340 * The spec says: 3341 * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the 3342 * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer 3343 * is not a multiple of 4, then the nearest smaller multiple is used." 3344 */ 3345 size &= ~3ULL; 3346 3347 ID3D12Resource *src_res; 3348 VkResult result = 3349 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, 3350 D3D12_HEAP_TYPE_UPLOAD, 3351 D3D12_RESOURCE_STATE_GENERIC_READ, 3352 &src_res); 3353 if (result != VK_SUCCESS) 3354 return; 3355 3356 void *cpu_ptr; 3357 ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr); 3358 memcpy(cpu_ptr, data, size), 3359 ID3D12Resource_Unmap(src_res, 0, NULL); 3360 3361 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size); 3362} 3363 3364VKAPI_ATTR void VKAPI_CALL 3365dzn_CmdClearAttachments(VkCommandBuffer commandBuffer, 3366 uint32_t attachmentCount, 3367 const VkClearAttachment *pAttachments, 3368 uint32_t rectCount, 3369 const VkClearRect *pRects) 3370{ 3371 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3372 3373 for (unsigned i = 0; i < attachmentCount; i++) { 3374 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; 3375 struct dzn_image_view *view = NULL; 3376 3377 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { 3378 assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count); 3379 view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview; 3380 layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout; 3381 } else { 3382 if (cmdbuf->state.render.attachments.depth.iview && 3383 (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) { 3384 view = cmdbuf->state.render.attachments.depth.iview; 3385 layout = cmdbuf->state.render.attachments.depth.layout; 3386 } 3387 3388 if (cmdbuf->state.render.attachments.stencil.iview && 3389 (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) { 3390 assert(!view || view == cmdbuf->state.render.attachments.depth.iview); 3391 view = cmdbuf->state.render.attachments.stencil.iview; 3392 layout = cmdbuf->state.render.attachments.stencil.layout; 3393 } 3394 } 3395 3396 if (!view) 3397 continue; 3398 3399 for (uint32_t j = 0; j < rectCount; j++) { 3400 D3D12_RECT rect; 3401 3402 dzn_translate_rect(&rect, &pRects[j].rect); 3403 dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout, 3404 &pAttachments[i].clearValue, 3405 pAttachments[i].aspectMask, 3406 pRects[j].baseArrayLayer, 3407 pRects[j].layerCount, 3408 1, &rect); 3409 } 3410 } 3411} 3412 3413static void 3414dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf, 3415 const struct dzn_rendering_attachment *att, 3416 VkImageAspectFlagBits aspect) 3417{ 3418 struct dzn_image_view *src = att->iview; 3419 struct dzn_image_view *dst = att->resolve.iview; 3420 3421 if (!src || !dst) 3422 return; 3423 3424 VkImageLayout src_layout = att->layout; 3425 VkImageLayout dst_layout = att->resolve.layout; 3426 struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk); 3427 D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect); 3428 struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk); 3429 D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect); 3430 3431 VkImageSubresourceRange src_range = { 3432 .aspectMask = (VkImageAspectFlags)aspect, 3433 .baseMipLevel = src->vk.base_mip_level, 3434 .levelCount = MIN2(src->vk.level_count, dst->vk.level_count), 3435 .baseArrayLayer = src->vk.base_array_layer, 3436 .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count), 3437 }; 3438 3439 VkImageSubresourceRange dst_range = { 3440 .aspectMask = (VkImageAspectFlags)aspect, 3441 .baseMipLevel = dst->vk.base_mip_level, 3442 .levelCount = MIN2(src->vk.level_count, dst->vk.level_count), 3443 .baseArrayLayer = dst->vk.base_array_layer, 3444 .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count), 3445 }; 3446 3447 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range, 3448 src_state, 3449 D3D12_RESOURCE_STATE_RESOLVE_SOURCE, 3450 DZN_QUEUE_TRANSITION_FLUSH); 3451 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range, 3452 dst_state, 3453 D3D12_RESOURCE_STATE_RESOLVE_DEST, 3454 DZN_QUEUE_TRANSITION_FLUSH); 3455 3456 for (uint32_t level = 0; level < src_range.levelCount; level++) { 3457 for (uint32_t layer = 0; layer < src_range.layerCount; layer++) { 3458 uint32_t src_subres = 3459 dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer); 3460 uint32_t dst_subres = 3461 dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer); 3462 3463 ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist, 3464 dst_img->res, dst_subres, 3465 src_img->res, src_subres, 3466 dst->srv_desc.Format); 3467 } 3468 } 3469 3470 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range, 3471 D3D12_RESOURCE_STATE_RESOLVE_SOURCE, 3472 src_state, 3473 DZN_QUEUE_TRANSITION_FLUSH); 3474 dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range, 3475 D3D12_RESOURCE_STATE_RESOLVE_DEST, 3476 dst_state, 3477 DZN_QUEUE_TRANSITION_FLUSH); 3478} 3479 3480static void 3481dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf, 3482 const VkRenderingAttachmentInfo *att, 3483 VkImageAspectFlagBits aspect) 3484{ 3485 const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout = 3486 vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA); 3487 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView); 3488 3489 if (!initial_layout || !iview) 3490 return; 3491 3492 struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk); 3493 const VkImageSubresourceRange range = { 3494 .aspectMask = aspect, 3495 .baseMipLevel = iview->vk.base_mip_level, 3496 .levelCount = iview->vk.level_count, 3497 .baseArrayLayer = iview->vk.base_array_layer, 3498 .layerCount = iview->vk.layer_count, 3499 }; 3500 3501 dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, 3502 initial_layout->initialLayout, 3503 att->imageLayout, 3504 DZN_QUEUE_TRANSITION_FLUSH); 3505} 3506 3507VKAPI_ATTR void VKAPI_CALL 3508dzn_CmdBeginRendering(VkCommandBuffer commandBuffer, 3509 const VkRenderingInfo *pRenderingInfo) 3510{ 3511 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3512 3513 D3D12_RECT new_render_area = { 3514 .left = pRenderingInfo->renderArea.offset.x, 3515 .top = pRenderingInfo->renderArea.offset.y, 3516 .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width), 3517 .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height), 3518 }; 3519 3520 // The render area has an impact on the scissor state. 3521 if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) { 3522 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; 3523 cmdbuf->state.render.area = new_render_area; 3524 } 3525 3526 cmdbuf->state.render.flags = pRenderingInfo->flags; 3527 cmdbuf->state.render.layer_count = pRenderingInfo->layerCount; 3528 cmdbuf->state.render.view_mask = pRenderingInfo->viewMask; 3529 3530 D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 }; 3531 D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 }; 3532 3533 cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount; 3534 for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { 3535 const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i]; 3536 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView); 3537 3538 cmdbuf->state.render.attachments.colors[i].iview = iview; 3539 cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout; 3540 cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode; 3541 cmdbuf->state.render.attachments.colors[i].resolve.iview = 3542 dzn_image_view_from_handle(att->resolveImageView); 3543 cmdbuf->state.render.attachments.colors[i].resolve.layout = 3544 att->resolveImageLayout; 3545 cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp; 3546 3547 if (!iview) { 3548 rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf); 3549 continue; 3550 } 3551 3552 struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk); 3553 rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc); 3554 dzn_rendering_attachment_initial_transition(cmdbuf, att, 3555 VK_IMAGE_ASPECT_COLOR_BIT); 3556 } 3557 3558 if (pRenderingInfo->pDepthAttachment) { 3559 const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment; 3560 3561 cmdbuf->state.render.attachments.depth.iview = 3562 dzn_image_view_from_handle(att->imageView); 3563 cmdbuf->state.render.attachments.depth.layout = att->imageLayout; 3564 cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode; 3565 cmdbuf->state.render.attachments.depth.resolve.iview = 3566 dzn_image_view_from_handle(att->resolveImageView); 3567 cmdbuf->state.render.attachments.depth.resolve.layout = 3568 att->resolveImageLayout; 3569 cmdbuf->state.render.attachments.depth.store_op = att->storeOp; 3570 dzn_rendering_attachment_initial_transition(cmdbuf, att, 3571 VK_IMAGE_ASPECT_DEPTH_BIT); 3572 } 3573 3574 if (pRenderingInfo->pStencilAttachment) { 3575 const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment; 3576 3577 cmdbuf->state.render.attachments.stencil.iview = 3578 dzn_image_view_from_handle(att->imageView); 3579 cmdbuf->state.render.attachments.stencil.layout = att->imageLayout; 3580 cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode; 3581 cmdbuf->state.render.attachments.stencil.resolve.iview = 3582 dzn_image_view_from_handle(att->resolveImageView); 3583 cmdbuf->state.render.attachments.stencil.resolve.layout = 3584 att->resolveImageLayout; 3585 cmdbuf->state.render.attachments.stencil.store_op = att->storeOp; 3586 dzn_rendering_attachment_initial_transition(cmdbuf, att, 3587 VK_IMAGE_ASPECT_STENCIL_BIT); 3588 } 3589 3590 if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) { 3591 struct dzn_image_view *z_iview = 3592 pRenderingInfo->pDepthAttachment ? 3593 dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) : 3594 NULL; 3595 struct dzn_image_view *s_iview = 3596 pRenderingInfo->pStencilAttachment ? 3597 dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) : 3598 NULL; 3599 struct dzn_image_view *iview = z_iview ? z_iview : s_iview; 3600 assert(!z_iview || !s_iview || z_iview == s_iview); 3601 3602 if (iview) { 3603 struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk); 3604 3605 zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc); 3606 } 3607 } 3608 3609 ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 3610 pRenderingInfo->colorAttachmentCount, 3611 pRenderingInfo->colorAttachmentCount ? rt_handles : NULL, 3612 FALSE, zs_handle.ptr ? &zs_handle : NULL); 3613 3614 for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) { 3615 const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a]; 3616 VK_FROM_HANDLE(dzn_image_view, iview, att->imageView); 3617 3618 if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { 3619 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout, 3620 &att->clearValue, 3621 VK_IMAGE_ASPECT_COLOR_BIT, 0, 3622 VK_REMAINING_ARRAY_LAYERS, 1, 3623 &cmdbuf->state.render.area); 3624 } 3625 } 3626 3627 if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) { 3628 const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment; 3629 const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment; 3630 struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL; 3631 struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL; 3632 struct dzn_image_view *iview = z_iview ? z_iview : s_iview; 3633 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; 3634 3635 assert(!z_iview || !s_iview || z_iview == s_iview); 3636 3637 VkImageAspectFlags aspects = 0; 3638 VkClearValue clear_val; 3639 3640 if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { 3641 aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; 3642 clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth; 3643 layout = z_att->imageLayout; 3644 } 3645 3646 if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { 3647 aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 3648 clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil; 3649 layout = s_att->imageLayout; 3650 } 3651 3652 if (aspects != 0) { 3653 dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout, 3654 &clear_val, aspects, 0, 3655 VK_REMAINING_ARRAY_LAYERS, 1, 3656 &cmdbuf->state.render.area); 3657 } 3658 } 3659} 3660 3661VKAPI_ATTR void VKAPI_CALL 3662dzn_CmdEndRendering(VkCommandBuffer commandBuffer) 3663{ 3664 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3665 3666 for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) { 3667 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf, 3668 &cmdbuf->state.render.attachments.colors[i], 3669 VK_IMAGE_ASPECT_COLOR_BIT); 3670 } 3671 3672 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf, 3673 &cmdbuf->state.render.attachments.depth, 3674 VK_IMAGE_ASPECT_DEPTH_BIT); 3675 dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf, 3676 &cmdbuf->state.render.attachments.stencil, 3677 VK_IMAGE_ASPECT_STENCIL_BIT); 3678 3679 memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render)); 3680} 3681 3682VKAPI_ATTR void VKAPI_CALL 3683dzn_CmdBindPipeline(VkCommandBuffer commandBuffer, 3684 VkPipelineBindPoint pipelineBindPoint, 3685 VkPipeline pipe) 3686{ 3687 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3688 VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe); 3689 3690 cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline; 3691 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 3692 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { 3693 const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline; 3694 3695 if (!gfx->vp.dynamic) { 3696 memcpy(cmdbuf->state.viewports, gfx->vp.desc, 3697 gfx->vp.count * sizeof(cmdbuf->state.viewports[0])); 3698 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; 3699 } 3700 3701 if (!gfx->scissor.dynamic) { 3702 memcpy(cmdbuf->state.scissors, gfx->scissor.desc, 3703 gfx->scissor.count * sizeof(cmdbuf->state.scissors[0])); 3704 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; 3705 } 3706 3707 if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) { 3708 cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref; 3709 cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref; 3710 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; 3711 } 3712 3713 if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) { 3714 cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min; 3715 cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max; 3716 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS; 3717 } 3718 3719 if (!gfx->blend.dynamic_constants) { 3720 memcpy(cmdbuf->state.blend.constants, gfx->blend.constants, 3721 sizeof(cmdbuf->state.blend.constants)); 3722 cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS; 3723 } 3724 3725 for (uint32_t vb = 0; vb < gfx->vb.count; vb++) 3726 cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb]; 3727 3728 if (gfx->vb.count > 0) 3729 BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1); 3730 } 3731} 3732 3733VKAPI_ATTR void VKAPI_CALL 3734dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, 3735 VkPipelineBindPoint pipelineBindPoint, 3736 VkPipelineLayout layout, 3737 uint32_t firstSet, 3738 uint32_t descriptorSetCount, 3739 const VkDescriptorSet *pDescriptorSets, 3740 uint32_t dynamicOffsetCount, 3741 const uint32_t *pDynamicOffsets) 3742{ 3743 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3744 VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); 3745 3746 struct dzn_descriptor_state *desc_state = 3747 &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state; 3748 uint32_t dirty = 0; 3749 3750 for (uint32_t i = 0; i < descriptorSetCount; i++) { 3751 uint32_t idx = firstSet + i; 3752 VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]); 3753 3754 if (desc_state->sets[idx].set != set) { 3755 desc_state->sets[idx].set = set; 3756 dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; 3757 } 3758 3759 uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count; 3760 if (dynamic_buffer_count) { 3761 assert(dynamicOffsetCount >= dynamic_buffer_count); 3762 3763 for (uint32_t j = 0; j < dynamic_buffer_count; j++) 3764 desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j]; 3765 3766 dynamicOffsetCount -= dynamic_buffer_count; 3767 pDynamicOffsets += dynamic_buffer_count; 3768 dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; 3769 } 3770 } 3771 3772 cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty; 3773} 3774 3775VKAPI_ATTR void VKAPI_CALL 3776dzn_CmdSetViewport(VkCommandBuffer commandBuffer, 3777 uint32_t firstViewport, 3778 uint32_t viewportCount, 3779 const VkViewport *pViewports) 3780{ 3781 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3782 3783 STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT); 3784 3785 for (uint32_t i = 0; i < viewportCount; i++) { 3786 uint32_t vp = i + firstViewport; 3787 3788 dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]); 3789 3790 if (pViewports[i].minDepth > pViewports[i].maxDepth) 3791 cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); 3792 else 3793 cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); 3794 3795 if (pViewports[i].height > 0) 3796 cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp); 3797 else 3798 cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp); 3799 } 3800 3801 if (viewportCount) { 3802 cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; 3803 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= 3804 DZN_CMD_BINDPOINT_DIRTY_SYSVALS; 3805 } 3806} 3807 3808VKAPI_ATTR void VKAPI_CALL 3809dzn_CmdSetScissor(VkCommandBuffer commandBuffer, 3810 uint32_t firstScissor, 3811 uint32_t scissorCount, 3812 const VkRect2D *pScissors) 3813{ 3814 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3815 3816 for (uint32_t i = 0; i < scissorCount; i++) 3817 dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]); 3818 3819 if (scissorCount) 3820 cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; 3821} 3822 3823VKAPI_ATTR void VKAPI_CALL 3824dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, 3825 VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, 3826 const void *pValues) 3827{ 3828 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3829 struct dzn_cmd_buffer_push_constant_state *states[2]; 3830 uint32_t num_states = 0; 3831 3832 if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) 3833 states[num_states++] = &cmdbuf->state.push_constant.gfx; 3834 3835 if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) 3836 states[num_states++] = &cmdbuf->state.push_constant.compute; 3837 3838 for (uint32_t i = 0; i < num_states; i++) { 3839 memcpy(((char *)states[i]->values) + offset, pValues, size); 3840 states[i]->offset = 3841 states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset; 3842 states[i]->end = MAX2(states[i]->end, offset + size); 3843 } 3844} 3845 3846VKAPI_ATTR void VKAPI_CALL 3847dzn_CmdDraw(VkCommandBuffer commandBuffer, 3848 uint32_t vertexCount, 3849 uint32_t instanceCount, 3850 uint32_t firstVertex, 3851 uint32_t firstInstance) 3852{ 3853 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3854 3855 const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *) 3856 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; 3857 3858 cmdbuf->state.sysvals.gfx.first_vertex = firstVertex; 3859 cmdbuf->state.sysvals.gfx.base_instance = firstInstance; 3860 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= 3861 DZN_CMD_BINDPOINT_DIRTY_SYSVALS; 3862 3863 if (pipeline->ia.triangle_fan) { 3864 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; 3865 3866 VkResult result = 3867 dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount); 3868 if (result != VK_SUCCESS || !vertexCount) 3869 return; 3870 3871 cmdbuf->state.sysvals.gfx.is_indexed_draw = true; 3872 dzn_cmd_buffer_prepare_draw(cmdbuf, true); 3873 ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0, 3874 firstVertex, firstInstance); 3875 3876 /* Restore the IB view if we modified it when lowering triangle fans. */ 3877 if (ib_view.SizeInBytes > 0) { 3878 cmdbuf->state.ib.view = ib_view; 3879 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; 3880 } 3881 } else { 3882 cmdbuf->state.sysvals.gfx.is_indexed_draw = false; 3883 dzn_cmd_buffer_prepare_draw(cmdbuf, false); 3884 ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 3885 firstVertex, firstInstance); 3886 } 3887} 3888 3889VKAPI_ATTR void VKAPI_CALL 3890dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer, 3891 uint32_t indexCount, 3892 uint32_t instanceCount, 3893 uint32_t firstIndex, 3894 int32_t vertexOffset, 3895 uint32_t firstInstance) 3896{ 3897 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3898 3899 const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *) 3900 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; 3901 3902 if (pipeline->ia.triangle_fan && 3903 dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) { 3904 /* The indexed+primitive-restart+triangle-fan combination is a mess, 3905 * since we have to walk the index buffer, skip entries with the 3906 * special 0xffff/0xffffffff values, and push triangle list indices 3907 * for the remaining values. All of this has an impact on the index 3908 * count passed to the draw call, which forces us to use the indirect 3909 * path. 3910 */ 3911 struct dzn_indirect_indexed_draw_params params = { 3912 .index_count = indexCount, 3913 .instance_count = instanceCount, 3914 .first_index = firstIndex, 3915 .vertex_offset = vertexOffset, 3916 .first_instance = firstInstance, 3917 }; 3918 3919 ID3D12Resource *draw_buf; 3920 VkResult result = 3921 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params), 3922 D3D12_HEAP_TYPE_UPLOAD, 3923 D3D12_RESOURCE_STATE_GENERIC_READ, 3924 &draw_buf); 3925 if (result != VK_SUCCESS) 3926 return; 3927 3928 void *cpu_ptr; 3929 ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr); 3930 memcpy(cpu_ptr, ¶ms, sizeof(params)); 3931 3932 ID3D12Resource_Unmap(draw_buf, 0, NULL); 3933 3934 dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, 0, NULL, 0, 1, sizeof(params), true); 3935 return; 3936 } 3937 3938 cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset; 3939 cmdbuf->state.sysvals.gfx.base_instance = firstInstance; 3940 cmdbuf->state.sysvals.gfx.is_indexed_draw = true; 3941 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= 3942 DZN_CMD_BINDPOINT_DIRTY_SYSVALS; 3943 3944 D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; 3945 3946 if (pipeline->ia.triangle_fan) { 3947 VkResult result = 3948 dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex); 3949 if (result != VK_SUCCESS || !indexCount) 3950 return; 3951 } 3952 3953 dzn_cmd_buffer_prepare_draw(cmdbuf, true); 3954 ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex, 3955 vertexOffset, firstInstance); 3956 3957 /* Restore the IB view if we modified it when lowering triangle fans. */ 3958 if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) { 3959 cmdbuf->state.ib.view = ib_view; 3960 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; 3961 } 3962} 3963 3964VKAPI_ATTR void VKAPI_CALL 3965dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer, 3966 VkBuffer buffer, 3967 VkDeviceSize offset, 3968 uint32_t drawCount, 3969 uint32_t stride) 3970{ 3971 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3972 VK_FROM_HANDLE(dzn_buffer, buf, buffer); 3973 3974 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false); 3975} 3976 3977VKAPI_ATTR void VKAPI_CALL 3978dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, 3979 VkBuffer buffer, 3980 VkDeviceSize offset, 3981 uint32_t drawCount, 3982 uint32_t stride) 3983{ 3984 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 3985 VK_FROM_HANDLE(dzn_buffer, buf, buffer); 3986 3987 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true); 3988} 3989 3990VKAPI_ATTR void VKAPI_CALL 3991dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, 3992 VkBuffer buffer, 3993 VkDeviceSize offset, 3994 VkBuffer countBuffer, 3995 VkDeviceSize countBufferOffset, 3996 uint32_t maxDrawCount, 3997 uint32_t stride) 3998{ 3999 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4000 VK_FROM_HANDLE(dzn_buffer, buf, buffer); 4001 VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer); 4002 4003 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, 4004 count_buf->res, countBufferOffset, 4005 maxDrawCount, stride, false); 4006} 4007 4008VKAPI_ATTR void VKAPI_CALL 4009dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, 4010 VkBuffer buffer, 4011 VkDeviceSize offset, 4012 VkBuffer countBuffer, 4013 VkDeviceSize countBufferOffset, 4014 uint32_t maxDrawCount, 4015 uint32_t stride) 4016{ 4017 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4018 VK_FROM_HANDLE(dzn_buffer, buf, buffer); 4019 VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer); 4020 4021 dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, 4022 count_buf->res, countBufferOffset, 4023 maxDrawCount, stride, true); 4024} 4025 4026VKAPI_ATTR void VKAPI_CALL 4027dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, 4028 uint32_t firstBinding, 4029 uint32_t bindingCount, 4030 const VkBuffer *pBuffers, 4031 const VkDeviceSize *pOffsets) 4032{ 4033 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4034 4035 if (!bindingCount) 4036 return; 4037 4038 D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views; 4039 4040 for (uint32_t i = 0; i < bindingCount; i++) { 4041 VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]); 4042 4043 vbviews[firstBinding + i].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + pOffsets[i]; 4044 vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i]; 4045 } 4046 4047 BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding, 4048 firstBinding + bindingCount - 1); 4049} 4050 4051VKAPI_ATTR void VKAPI_CALL 4052dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, 4053 VkBuffer buffer, 4054 VkDeviceSize offset, 4055 VkIndexType indexType) 4056{ 4057 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4058 VK_FROM_HANDLE(dzn_buffer, buf, buffer); 4059 4060 cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + offset; 4061 cmdbuf->state.ib.view.SizeInBytes = buf->size - offset; 4062 switch (indexType) { 4063 case VK_INDEX_TYPE_UINT16: 4064 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; 4065 cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF; 4066 break; 4067 case VK_INDEX_TYPE_UINT32: 4068 cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; 4069 cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF; 4070 break; 4071 default: unreachable("Invalid index type"); 4072 } 4073 4074 cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; 4075 4076 const struct dzn_graphics_pipeline *pipeline = 4077 (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; 4078 4079 if (pipeline && dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) 4080 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 4081} 4082 4083VKAPI_ATTR void VKAPI_CALL 4084dzn_CmdResetEvent(VkCommandBuffer commandBuffer, 4085 VkEvent event, 4086 VkPipelineStageFlags stageMask) 4087{ 4088 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4089 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 4090 VK_FROM_HANDLE(dzn_event, evt, event); 4091 4092 if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET)) 4093 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 4094} 4095 4096VKAPI_ATTR void VKAPI_CALL 4097dzn_CmdSetEvent(VkCommandBuffer commandBuffer, 4098 VkEvent event, 4099 VkPipelineStageFlags stageMask) 4100{ 4101 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4102 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 4103 VK_FROM_HANDLE(dzn_event, evt, event); 4104 4105 if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET)) 4106 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 4107} 4108 4109VKAPI_ATTR void VKAPI_CALL 4110dzn_CmdWaitEvents(VkCommandBuffer commandBuffer, 4111 uint32_t eventCount, 4112 const VkEvent *pEvents, 4113 VkPipelineStageFlags srcStageMask, 4114 VkPipelineStageFlags dstStageMask, 4115 uint32_t memoryBarrierCount, 4116 const VkMemoryBarrier *pMemoryBarriers, 4117 uint32_t bufferMemoryBarrierCount, 4118 const VkBufferMemoryBarrier *pBufferMemoryBarriers, 4119 uint32_t imageMemoryBarrierCount, 4120 const VkImageMemoryBarrier *pImageMemoryBarriers) 4121{ 4122 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4123 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 4124 4125 /* Intra-command list wait is handle by this pipeline flush, which is 4126 * overkill, but that's the best we can do with the standard D3D12 barrier 4127 * API. 4128 * 4129 * Inter-command list is taken care of by the serialization done at the 4130 * ExecuteCommandList() level: 4131 * "Calling ExecuteCommandLists twice in succession (from the same thread, 4132 * or different threads) guarantees that the first workload (A) finishes 4133 * before the second workload (B)" 4134 * 4135 * HOST -> DEVICE signaling is ignored and we assume events are always 4136 * signaled when we reach the vkCmdWaitEvents() point.: 4137 * "Command buffers in the submission can include vkCmdWaitEvents commands 4138 * that wait on events that will not be signaled by earlier commands in the 4139 * queue. Such events must be signaled by the application using vkSetEvent, 4140 * and the vkCmdWaitEvents commands that wait upon them must not be inside 4141 * a render pass instance. 4142 * The event must be set before the vkCmdWaitEvents command is executed." 4143 */ 4144 bool flush_pipeline = false; 4145 4146 for (uint32_t i = 0; i < eventCount; i++) { 4147 VK_FROM_HANDLE(dzn_event, event, pEvents[i]); 4148 4149 struct hash_entry *he = 4150 _mesa_hash_table_search(cmdbuf->events.ht, event); 4151 if (he) { 4152 enum dzn_event_state state = (uintptr_t)he->data; 4153 assert(state != DZN_EVENT_STATE_RESET); 4154 flush_pipeline = state == DZN_EVENT_STATE_SET; 4155 } else { 4156 if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, 4157 (void *)(uintptr_t)DZN_EVENT_STATE_EXTERNAL_WAIT)) { 4158 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 4159 return; 4160 } 4161 4162 struct dzn_event **entry = 4163 util_dynarray_grow(&cmdbuf->events.wait, struct dzn_event *, 1); 4164 4165 if (!entry) { 4166 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 4167 return; 4168 } 4169 4170 *entry = event; 4171 } 4172 } 4173 4174 if (flush_pipeline) { 4175 D3D12_RESOURCE_BARRIER barrier = { 4176 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, 4177 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, 4178 .UAV = { .pResource = NULL }, 4179 }; 4180 4181 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); 4182 } 4183} 4184 4185VKAPI_ATTR void VKAPI_CALL 4186dzn_CmdBeginQuery(VkCommandBuffer commandBuffer, 4187 VkQueryPool queryPool, 4188 uint32_t query, 4189 VkQueryControlFlags flags) 4190{ 4191 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4192 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); 4193 4194 struct dzn_cmd_buffer_query_pool_state *state = 4195 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); 4196 if (!state) 4197 return; 4198 4199 qpool->queries[query].type = dzn_query_pool_get_query_type(qpool, flags); 4200 dzn_cmd_buffer_dynbitset_clear(cmdbuf, &state->collect, query); 4201 ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); 4202} 4203 4204VKAPI_ATTR void VKAPI_CALL 4205dzn_CmdEndQuery(VkCommandBuffer commandBuffer, 4206 VkQueryPool queryPool, 4207 uint32_t query) 4208{ 4209 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4210 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); 4211 4212 struct dzn_cmd_buffer_query_pool_state *state = 4213 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); 4214 if (!state) 4215 return; 4216 4217 dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); 4218 ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); 4219} 4220 4221VKAPI_ATTR void VKAPI_CALL 4222dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, 4223 VkPipelineStageFlags2 stage, 4224 VkQueryPool queryPool, 4225 uint32_t query) 4226{ 4227 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4228 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); 4229 4230 struct dzn_cmd_buffer_query_pool_state *state = 4231 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); 4232 if (!state) 4233 return; 4234 4235 /* Execution barrier so the timestamp gets written after the pipeline flush. */ 4236 D3D12_RESOURCE_BARRIER barrier = { 4237 .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, 4238 .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, 4239 .UAV = { .pResource = NULL }, 4240 }; 4241 4242 ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); 4243 4244 qpool->queries[query].type = D3D12_QUERY_TYPE_TIMESTAMP; 4245 dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); 4246 ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); 4247} 4248 4249 4250VKAPI_ATTR void VKAPI_CALL 4251dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer, 4252 VkQueryPool queryPool, 4253 uint32_t firstQuery, 4254 uint32_t queryCount) 4255{ 4256 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4257 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 4258 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); 4259 4260 struct dzn_cmd_buffer_query_pool_state *state = 4261 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); 4262 4263 if (!state) 4264 return; 4265 4266 uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); 4267 4268 for (uint32_t q = 0; q < queryCount; q += q_step) { 4269 uint32_t q_count = MIN2(queryCount - q, q_step); 4270 4271 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer, 4272 dzn_query_pool_get_availability_offset(qpool, firstQuery + q), 4273 device->queries.refs, 4274 DZN_QUERY_REFS_ALL_ZEROS_OFFSET, 4275 q_count * sizeof(uint64_t)); 4276 } 4277 4278 q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size; 4279 4280 for (uint32_t q = 0; q < queryCount; q += q_step) { 4281 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer, 4282 dzn_query_pool_get_result_offset(qpool, firstQuery + q), 4283 device->queries.refs, 4284 DZN_QUERY_REFS_ALL_ZEROS_OFFSET, 4285 qpool->query_size); 4286 } 4287 4288 dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount); 4289 dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount); 4290} 4291 4292VKAPI_ATTR void VKAPI_CALL 4293dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, 4294 VkQueryPool queryPool, 4295 uint32_t firstQuery, 4296 uint32_t queryCount, 4297 VkBuffer dstBuffer, 4298 VkDeviceSize dstOffset, 4299 VkDeviceSize stride, 4300 VkQueryResultFlags flags) 4301{ 4302 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4303 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); 4304 VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); 4305 4306 struct dzn_cmd_buffer_query_pool_state *qpstate = 4307 dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); 4308 if (!qpstate) 4309 return; 4310 4311 if (flags & VK_QUERY_RESULT_WAIT_BIT) { 4312 for (uint32_t i = 0; i < queryCount; i++) { 4313 if (!dzn_cmd_buffer_dynbitset_test(&qpstate->collect, firstQuery + i) && 4314 !dzn_cmd_buffer_dynbitset_test(&qpstate->signal, firstQuery + i)) 4315 dzn_cmd_buffer_dynbitset_set(cmdbuf, &qpstate->wait, firstQuery + i); 4316 } 4317 } 4318 4319 VkResult result = 4320 dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount); 4321 if (result != VK_SUCCESS) 4322 return; 4323 4324 bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) && 4325 stride == qpool->query_size && 4326 !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); 4327#define ALL_STATS \ 4328 (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \ 4329 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \ 4330 VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \ 4331 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \ 4332 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \ 4333 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \ 4334 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \ 4335 VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \ 4336 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \ 4337 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \ 4338 VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) 4339 if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS && 4340 qpool->pipeline_statistics != ALL_STATS) 4341 raw_copy = false; 4342#undef ALL_STATS 4343 4344 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1, 4345 D3D12_RESOURCE_STATE_COPY_DEST, 4346 D3D12_RESOURCE_STATE_COPY_SOURCE, 4347 DZN_QUEUE_TRANSITION_FLUSH); 4348 4349 if (raw_copy) { 4350 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, 4351 qpool->collect_buffer, 4352 dzn_query_pool_get_result_offset(qpool, firstQuery), 4353 dzn_query_pool_get_result_size(qpool, queryCount)); 4354 } else { 4355 uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t); 4356 4357 for (uint32_t q = 0; q < queryCount; q++) { 4358 uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q); 4359 uint32_t dst_counter_offset = 0; 4360 4361 if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { 4362 for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { 4363 if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) 4364 continue; 4365 4366 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset, 4367 qpool->collect_buffer, 4368 res_offset + (c * sizeof(uint64_t)), 4369 step); 4370 dst_counter_offset += step; 4371 } 4372 } else { 4373 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, 4374 qpool->collect_buffer, 4375 res_offset, step); 4376 dst_counter_offset += step; 4377 } 4378 4379 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { 4380 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset, 4381 qpool->collect_buffer, 4382 dzn_query_pool_get_availability_offset(qpool, firstQuery + q), 4383 step); 4384 } 4385 4386 dstOffset += stride; 4387 } 4388 } 4389 4390 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1, 4391 D3D12_RESOURCE_STATE_COPY_SOURCE, 4392 D3D12_RESOURCE_STATE_COPY_DEST, 4393 0); 4394} 4395 4396VKAPI_ATTR void VKAPI_CALL 4397dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer, 4398 VkBuffer buffer, 4399 VkDeviceSize offset) 4400{ 4401 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4402 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 4403 VK_FROM_HANDLE(dzn_buffer, buf, buffer); 4404 4405 cmdbuf->state.sysvals.compute.group_count_x = 0; 4406 cmdbuf->state.sysvals.compute.group_count_y = 0; 4407 cmdbuf->state.sysvals.compute.group_count_z = 0; 4408 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= 4409 DZN_CMD_BINDPOINT_DIRTY_SYSVALS; 4410 4411 dzn_cmd_buffer_prepare_dispatch(cmdbuf); 4412 4413 struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *) 4414 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; 4415 ID3D12CommandSignature *cmdsig = 4416 dzn_compute_pipeline_get_indirect_cmd_sig(pipeline); 4417 4418 if (!cmdsig) { 4419 cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 4420 return; 4421 } 4422 4423 ID3D12Resource *exec_buf; 4424 VkResult result = 4425 dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, 4426 D3D12_HEAP_TYPE_DEFAULT, 4427 D3D12_RESOURCE_STATE_COPY_DEST, 4428 &exec_buf); 4429 if (result != VK_SUCCESS) 4430 return; 4431 4432 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0, 4433 buf->res, 4434 offset, 4435 sizeof(D3D12_DISPATCH_ARGUMENTS)); 4436 ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS), 4437 buf->res, 4438 offset, 4439 sizeof(D3D12_DISPATCH_ARGUMENTS)); 4440 4441 dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1, 4442 D3D12_RESOURCE_STATE_COPY_DEST, 4443 D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, 4444 DZN_QUEUE_TRANSITION_FLUSH); 4445 4446 ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0); 4447} 4448 4449VKAPI_ATTR void VKAPI_CALL 4450dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer, 4451 float lineWidth) 4452{ 4453 assert(lineWidth == 1.0f); 4454} 4455 4456VKAPI_ATTR void VKAPI_CALL 4457dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer, 4458 float depthBiasConstantFactor, 4459 float depthBiasClamp, 4460 float depthBiasSlopeFactor) 4461{ 4462 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4463 4464 cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor; 4465 cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp; 4466 cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor; 4467 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 4468} 4469 4470VKAPI_ATTR void VKAPI_CALL 4471dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer, 4472 const float blendConstants[4]) 4473{ 4474 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4475 4476 memcpy(cmdbuf->state.blend.constants, blendConstants, 4477 sizeof(cmdbuf->state.blend.constants)); 4478 cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS; 4479} 4480 4481VKAPI_ATTR void VKAPI_CALL 4482dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer, 4483 float minDepthBounds, 4484 float maxDepthBounds) 4485{ 4486 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4487 struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); 4488 struct dzn_physical_device *pdev = 4489 container_of(device->vk.physical, struct dzn_physical_device, vk); 4490 4491 if (pdev->options2.DepthBoundsTestSupported) { 4492 cmdbuf->state.zsa.depth_bounds.min = minDepthBounds; 4493 cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds; 4494 cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS; 4495 } 4496} 4497 4498VKAPI_ATTR void VKAPI_CALL 4499dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, 4500 VkStencilFaceFlags faceMask, 4501 uint32_t compareMask) 4502{ 4503 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4504 4505 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { 4506 cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask; 4507 cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask; 4508 } 4509 4510 if (faceMask & VK_STENCIL_FACE_BACK_BIT) { 4511 cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask; 4512 cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask; 4513 } 4514 4515 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK; 4516 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 4517} 4518 4519VKAPI_ATTR void VKAPI_CALL 4520dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, 4521 VkStencilFaceFlags faceMask, 4522 uint32_t writeMask) 4523{ 4524 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4525 4526 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { 4527 cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask; 4528 cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask; 4529 } 4530 4531 if (faceMask & VK_STENCIL_FACE_BACK_BIT) { 4532 cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask; 4533 cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask; 4534 } 4535 4536 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK; 4537 cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; 4538} 4539 4540VKAPI_ATTR void VKAPI_CALL 4541dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer, 4542 VkStencilFaceFlags faceMask, 4543 uint32_t reference) 4544{ 4545 VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); 4546 4547 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 4548 cmdbuf->state.zsa.stencil_test.front.ref = reference; 4549 4550 if (faceMask & VK_STENCIL_FACE_BACK_BIT) 4551 cmdbuf->state.zsa.stencil_test.back.ref = reference; 4552 4553 cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; 4554} 4555