1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "dzn_private.h"
25
26#include "vk_alloc.h"
27#include "vk_debug_report.h"
28#include "vk_format.h"
29#include "vk_util.h"
30
31
32static void
33dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
34                                        D3D12_RESOURCE_BARRIER *barriers,
35                                        uint32_t barrier_count)
36{
37   uint32_t flush_count = 0;
38   for (uint32_t b = 0; b < barrier_count; b++) {
39      assert(barriers[b].Transition.pResource);
40
41      /* some layouts map to the same states, and NOP-barriers are illegal */
42      if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) {
43         if (flush_count) {
44            ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
45                                                       &barriers[b - flush_count]);
46            flush_count = 0;
47         }
48      } else {
49         flush_count++;
50      }
51   }
52
53   if (flush_count)
54      ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count,
55                                                 &barriers[barrier_count - flush_count]);
56
57   /* Set Before = After so we don't execute the same barrier twice. */
58   for (uint32_t b = 0; b < barrier_count; b++)
59      barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter;
60}
61
62static void
63dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
64                                         ID3D12Resource *res,
65                                         uint32_t first_subres,
66                                         uint32_t subres_count)
67{
68   struct hash_entry *he =
69      _mesa_hash_table_search(cmdbuf->transition_barriers, res);
70   D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
71
72   if (!barriers)
73      return;
74
75   dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
76}
77
78enum dzn_queue_transition_flags {
79   DZN_QUEUE_TRANSITION_FLUSH = 1 << 0,
80   DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1,
81};
82
83static VkResult
84dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf,
85                                         ID3D12Resource *res,
86                                         uint32_t first_subres,
87                                         uint32_t subres_count,
88                                         D3D12_RESOURCE_STATES before,
89                                         D3D12_RESOURCE_STATES after,
90                                         uint32_t flags)
91{
92   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
93   struct hash_entry *he =
94      _mesa_hash_table_search(cmdbuf->transition_barriers, res);
95   struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL;
96
97   if (!barriers) {
98      D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res);
99      D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 };
100      ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info));
101      uint32_t barrier_count =
102         fmt_info.PlaneCount *
103         desc.MipLevels * desc.DepthOrArraySize;
104
105      barriers =
106         vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count,
107                   8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
108      if (!barriers) {
109         cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
110         return cmdbuf->error;
111      }
112
113      he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers);
114      if (!he) {
115         vk_free(&cmdbuf->vk.pool->alloc, barriers);
116         cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
117         return cmdbuf->error;
118      }
119   }
120
121   for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) {
122      if (!barriers[subres].Transition.pResource) {
123         barriers[subres] = (D3D12_RESOURCE_BARRIER) {
124            .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
125            .Flags = 0,
126            .Transition = {
127               .pResource = res,
128               .Subresource = subres,
129               .StateBefore = before,
130               .StateAfter = after,
131            },
132         };
133      } else {
134	 if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED)
135            before = barriers[subres].Transition.StateAfter;
136
137         assert(barriers[subres].Transition.StateAfter == before ||
138                barriers[subres].Transition.StateAfter == after);
139         barriers[subres].Transition.StateAfter = after;
140      }
141   }
142
143   if (flags & DZN_QUEUE_TRANSITION_FLUSH)
144      dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count);
145
146   return VK_SUCCESS;
147}
148
149static VkResult
150dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf,
151                                                  const struct dzn_image *image,
152                                                  const VkImageSubresourceRange *range,
153                                                  D3D12_RESOURCE_STATES before,
154                                                  D3D12_RESOURCE_STATES after,
155                                                  uint32_t flags)
156{
157   uint32_t first_barrier = 0, barrier_count = 0;
158   VkResult ret = VK_SUCCESS;
159
160   dzn_foreach_aspect(aspect, range->aspectMask) {
161      uint32_t layer_count = dzn_get_layer_count(image, range);
162      uint32_t level_count = dzn_get_level_count(image, range);
163      for (uint32_t layer = 0; layer < layer_count; layer++) {
164         uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
165         if (!barrier_count) {
166            first_barrier = subres;
167            barrier_count = level_count;
168            continue;
169         } else if (first_barrier + barrier_count == subres) {
170            barrier_count += level_count;
171            continue;
172         }
173
174         ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
175                                                        first_barrier, barrier_count,
176                                                        before, after, flags);
177         if (ret != VK_SUCCESS)
178            return ret;
179
180         barrier_count = 0;
181      }
182
183      if (barrier_count) {
184         ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
185                                                        first_barrier, barrier_count,
186                                                        before, after, flags);
187         if (ret != VK_SUCCESS)
188            return ret;
189      }
190   }
191
192   return VK_SUCCESS;
193}
194
195static VkResult
196dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf,
197                                                   const struct dzn_image *image,
198                                                   const VkImageSubresourceRange *range,
199                                                   VkImageLayout old_layout,
200                                                   VkImageLayout new_layout,
201                                                   uint32_t flags)
202{
203   uint32_t first_barrier = 0, barrier_count = 0;
204   VkResult ret = VK_SUCCESS;
205
206   if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED)
207      flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED;
208
209   dzn_foreach_aspect(aspect, range->aspectMask) {
210      D3D12_RESOURCE_STATES after =
211         dzn_image_layout_to_state(image, new_layout, aspect);
212      D3D12_RESOURCE_STATES before =
213         (old_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
214          old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ?
215         image->mem->initial_state :
216         dzn_image_layout_to_state(image, old_layout, aspect);
217
218      uint32_t layer_count = dzn_get_layer_count(image, range);
219      uint32_t level_count = dzn_get_level_count(image, range);
220      for (uint32_t layer = 0; layer < layer_count; layer++) {
221         uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer);
222         if (!barrier_count) {
223            first_barrier = subres;
224            barrier_count = level_count;
225            continue;
226         } else if (first_barrier + barrier_count == subres) {
227            barrier_count += level_count;
228            continue;
229         }
230
231         ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
232                                                        first_barrier, barrier_count,
233                                                        before, after, flags);
234         if (ret != VK_SUCCESS)
235            return ret;
236
237         barrier_count = 0;
238      }
239
240      if (barrier_count) {
241         ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res,
242                                                        first_barrier, barrier_count,
243                                                        before, after, flags);
244         if (ret != VK_SUCCESS)
245            return ret;
246      }
247   }
248
249   return VK_SUCCESS;
250}
251
252static void
253dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf)
254{
255   if (!cbuf)
256      return;
257
258   struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk);
259
260   if (cmdbuf->cmdlist)
261      ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
262
263   if (cmdbuf->cmdalloc)
264      ID3D12CommandAllocator_Release(cmdbuf->cmdalloc);
265
266   list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) {
267      list_del(&res->link);
268      ID3D12Resource_Release(res->res);
269      vk_free(&cbuf->pool->alloc, res);
270   }
271
272   dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool);
273   dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool);
274   dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool);
275   dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool);
276   util_dynarray_fini(&cmdbuf->events.wait);
277   util_dynarray_fini(&cmdbuf->events.signal);
278   util_dynarray_fini(&cmdbuf->queries.reset);
279   util_dynarray_fini(&cmdbuf->queries.wait);
280   util_dynarray_fini(&cmdbuf->queries.signal);
281
282   if (cmdbuf->rtvs.ht) {
283      hash_table_foreach(cmdbuf->rtvs.ht, he)
284         vk_free(&cbuf->pool->alloc, he->data);
285      _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL);
286   }
287
288   if (cmdbuf->dsvs.ht) {
289      hash_table_foreach(cmdbuf->dsvs.ht, he)
290         vk_free(&cbuf->pool->alloc, he->data);
291      _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL);
292   }
293
294   if (cmdbuf->events.ht)
295      _mesa_hash_table_destroy(cmdbuf->events.ht, NULL);
296
297   if (cmdbuf->queries.ht) {
298      hash_table_foreach(cmdbuf->queries.ht, he) {
299         struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
300         util_dynarray_fini(&qpstate->reset);
301         util_dynarray_fini(&qpstate->collect);
302         util_dynarray_fini(&qpstate->wait);
303         util_dynarray_fini(&qpstate->signal);
304         vk_free(&cbuf->pool->alloc, he->data);
305      }
306      _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL);
307   }
308
309   if (cmdbuf->transition_barriers) {
310      hash_table_foreach(cmdbuf->transition_barriers, he)
311         vk_free(&cbuf->pool->alloc, he->data);
312      _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL);
313   }
314
315   vk_command_buffer_finish(&cmdbuf->vk);
316   vk_free(&cbuf->pool->alloc, cmdbuf);
317}
318
319static uint32_t
320dzn_cmd_buffer_rtv_key_hash_function(const void *key)
321{
322   return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key));
323}
324
325static bool
326dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b)
327{
328   return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0;
329}
330
331static uint32_t
332dzn_cmd_buffer_dsv_key_hash_function(const void *key)
333{
334   return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key));
335}
336
337static bool
338dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b)
339{
340   return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0;
341}
342
343static VkResult
344dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info,
345                      VkCommandBuffer *out)
346{
347   VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool);
348   struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk);
349   struct dzn_physical_device *pdev =
350      container_of(device->vk.physical, struct dzn_physical_device, vk);
351
352   assert(pool->queue_family_index < pdev->queue_family_count);
353
354   D3D12_COMMAND_LIST_TYPE type =
355      pdev->queue_families[pool->queue_family_index].desc.Type;
356
357   struct dzn_cmd_buffer *cmdbuf =
358      vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
359   if (!cmdbuf)
360      return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
361
362   VkResult result =
363      vk_command_buffer_init(&cmdbuf->vk, pool, info->level);
364   if (result != VK_SUCCESS) {
365      vk_free(&pool->alloc, cmdbuf);
366      return result;
367   }
368
369   memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
370   list_inithead(&cmdbuf->internal_bufs);
371   util_dynarray_init(&cmdbuf->events.wait, NULL);
372   util_dynarray_init(&cmdbuf->events.signal, NULL);
373   util_dynarray_init(&cmdbuf->queries.reset, NULL);
374   util_dynarray_init(&cmdbuf->queries.wait, NULL);
375   util_dynarray_init(&cmdbuf->queries.signal, NULL);
376   dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device,
377                                 D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
378                                 false, &pool->alloc);
379   dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device,
380                                 D3D12_DESCRIPTOR_HEAP_TYPE_DSV,
381                                 false, &pool->alloc);
382   dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device,
383                                 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
384                                 true, &pool->alloc);
385   dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device,
386                                 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
387                                 true, &pool->alloc);
388
389   cmdbuf->events.ht =
390      _mesa_pointer_hash_table_create(NULL);
391   cmdbuf->queries.ht =
392      _mesa_pointer_hash_table_create(NULL);
393   cmdbuf->transition_barriers =
394      _mesa_pointer_hash_table_create(NULL);
395   cmdbuf->rtvs.ht =
396      _mesa_hash_table_create(NULL,
397                              dzn_cmd_buffer_rtv_key_hash_function,
398                              dzn_cmd_buffer_rtv_key_equals_function);
399   cmdbuf->dsvs.ht =
400      _mesa_hash_table_create(NULL,
401                              dzn_cmd_buffer_dsv_key_hash_function,
402                              dzn_cmd_buffer_dsv_key_equals_function);
403   if (!cmdbuf->events.ht || !cmdbuf->queries.ht ||
404       !cmdbuf->transition_barriers ||
405       !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) {
406      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
407      goto out;
408   }
409
410   cmdbuf->vk.destroy = dzn_cmd_buffer_destroy;
411
412   if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type,
413                                                   &IID_ID3D12CommandAllocator,
414                                                   (void **)&cmdbuf->cmdalloc))) {
415      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
416      goto out;
417   }
418
419   if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0, type,
420                                              cmdbuf->cmdalloc, NULL,
421                                              &IID_ID3D12GraphicsCommandList1,
422                                              (void **)&cmdbuf->cmdlist))) {
423      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
424      goto out;
425   }
426
427out:
428   if (result != VK_SUCCESS)
429      dzn_cmd_buffer_destroy(&cmdbuf->vk);
430   else
431      *out = dzn_cmd_buffer_to_handle(cmdbuf);
432
433   return result;
434}
435
436static VkResult
437dzn_cmd_buffer_reset(struct dzn_cmd_buffer *cmdbuf)
438{
439   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
440   const struct dzn_physical_device *pdev =
441      container_of(device->vk.physical, struct dzn_physical_device, vk);
442   const struct vk_command_pool *pool = cmdbuf->vk.pool;
443
444   /* Reset the state */
445   memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
446
447   /* TODO: Return resources to the pool */
448   list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs, link) {
449      list_del(&res->link);
450      ID3D12Resource_Release(res->res);
451      vk_free(&cmdbuf->vk.pool->alloc, res);
452   }
453
454   cmdbuf->error = VK_SUCCESS;
455   util_dynarray_clear(&cmdbuf->events.wait);
456   util_dynarray_clear(&cmdbuf->events.signal);
457   util_dynarray_clear(&cmdbuf->queries.reset);
458   util_dynarray_clear(&cmdbuf->queries.wait);
459   util_dynarray_clear(&cmdbuf->queries.signal);
460   hash_table_foreach(cmdbuf->rtvs.ht, he)
461      vk_free(&cmdbuf->vk.pool->alloc, he->data);
462   _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL);
463   cmdbuf->null_rtv.ptr = 0;
464   dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool);
465   hash_table_foreach(cmdbuf->dsvs.ht, he)
466      vk_free(&cmdbuf->vk.pool->alloc, he->data);
467   _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL);
468   hash_table_foreach(cmdbuf->queries.ht, he) {
469      struct dzn_cmd_buffer_query_pool_state *qpstate = he->data;
470      util_dynarray_fini(&qpstate->reset);
471      util_dynarray_fini(&qpstate->collect);
472      util_dynarray_fini(&qpstate->wait);
473      util_dynarray_fini(&qpstate->signal);
474      vk_free(&cmdbuf->vk.pool->alloc, he->data);
475   }
476   _mesa_hash_table_clear(cmdbuf->queries.ht, NULL);
477   _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
478   hash_table_foreach(cmdbuf->transition_barriers, he)
479      vk_free(&cmdbuf->vk.pool->alloc, he->data);
480   _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL);
481   dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool);
482   dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool);
483   dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool);
484   vk_command_buffer_reset(&cmdbuf->vk);
485
486   /* cmdlist->Reset() doesn't return the memory back the the command list
487    * allocator, and cmdalloc->Reset() can only be called if there's no live
488    * cmdlist allocated from the allocator, so we need to release and create
489    * a new command list.
490    */
491   ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist);
492   cmdbuf->cmdlist = NULL;
493   ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc);
494   D3D12_COMMAND_LIST_TYPE type =
495      pdev->queue_families[pool->queue_family_index].desc.Type;
496   if (FAILED(ID3D12Device1_CreateCommandList(device->dev, 0,
497                                              type,
498                                              cmdbuf->cmdalloc, NULL,
499                                              &IID_ID3D12GraphicsCommandList1,
500                                              (void **)&cmdbuf->cmdlist))) {
501      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
502   }
503
504   return cmdbuf->error;
505}
506
507VKAPI_ATTR VkResult VKAPI_CALL
508dzn_AllocateCommandBuffers(VkDevice device,
509                           const VkCommandBufferAllocateInfo *pAllocateInfo,
510                           VkCommandBuffer *pCommandBuffers)
511{
512   VK_FROM_HANDLE(dzn_device, dev, device);
513   VkResult result = VK_SUCCESS;
514   uint32_t i;
515
516   for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
517      result = dzn_cmd_buffer_create(pAllocateInfo,
518                                     &pCommandBuffers[i]);
519      if (result != VK_SUCCESS)
520         break;
521   }
522
523   if (result != VK_SUCCESS) {
524      dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool,
525                                                i, pCommandBuffers);
526      for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
527         pCommandBuffers[i] = VK_NULL_HANDLE;
528   }
529
530   return result;
531}
532
533VKAPI_ATTR VkResult VKAPI_CALL
534dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer,
535                       VkCommandBufferResetFlags flags)
536{
537   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
538
539   return dzn_cmd_buffer_reset(cmdbuf);
540}
541
542VKAPI_ATTR VkResult VKAPI_CALL
543dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
544                       const VkCommandBufferBeginInfo *info)
545{
546   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
547
548   /* If this is the first vkBeginCommandBuffer, we must *initialize* the
549    * command buffer's state. Otherwise, we must *reset* its state. In both
550    * cases we reset it.
551    *
552    * From the Vulkan 1.0 spec:
553    *
554    *    If a command buffer is in the executable state and the command buffer
555    *    was allocated from a command pool with the
556    *    VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then
557    *    vkBeginCommandBuffer implicitly resets the command buffer, behaving
558    *    as if vkResetCommandBuffer had been called with
559    *    VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts
560    *    the command buffer in the recording state.
561    */
562   return dzn_cmd_buffer_reset(cmdbuf);
563}
564
565static void
566dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf)
567{
568   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
569
570   if (cmdbuf->error != VK_SUCCESS)
571      goto out;
572
573   hash_table_foreach(cmdbuf->events.ht, he) {
574      enum dzn_event_state state = (uintptr_t)he->data;
575
576      if (state != DZN_EVENT_STATE_EXTERNAL_WAIT) {
577         struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state  == DZN_EVENT_STATE_SET };
578         struct dzn_cmd_event_signal *entry =
579            util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1);
580
581         if (!entry) {
582            cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
583            break;
584         }
585
586         *entry = signal;
587      }
588   }
589
590out:
591   _mesa_hash_table_clear(cmdbuf->events.ht, NULL);
592}
593
594static VkResult
595dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
596{
597   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
598
599   if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
600      return VK_SUCCESS;
601
602   unsigned old_sz = array->size;
603   void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS);
604   if (!ptr) {
605      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
606      return cmdbuf->error;
607   }
608
609   memset(ptr, 0, array->size - old_sz);
610   return VK_SUCCESS;
611}
612
613static bool
614dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit)
615{
616   uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
617
618   if (bit < nbits)
619      return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit);
620
621   return false;
622}
623
624static VkResult
625dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
626{
627   VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit);
628   if (result != VK_SUCCESS)
629      return result;
630
631   BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit);
632   return VK_SUCCESS;
633}
634
635static void
636dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit)
637{
638   if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS)
639      return;
640
641   BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit);
642}
643
644static VkResult
645dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf,
646                                   struct util_dynarray *array,
647                                   uint32_t bit, uint32_t count)
648{
649   VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1);
650   if (result != VK_SUCCESS)
651      return result;
652
653   BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1);
654   return VK_SUCCESS;
655}
656
657static void
658dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf,
659                                     struct util_dynarray *array,
660                                     uint32_t bit, uint32_t count)
661{
662   uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS;
663
664   if (!nbits)
665      return;
666
667   uint32_t end = MIN2(bit + count, nbits) - 1;
668
669   while (bit <= end) {
670      uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32));
671      BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1);
672      bit += subcount;
673   }
674}
675
676static struct dzn_cmd_buffer_query_pool_state *
677dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf)
678{
679   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
680   struct dzn_cmd_buffer_query_pool_state *state =
681      vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state),
682               8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
683   if (!state) {
684      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
685      return NULL;
686   }
687
688   util_dynarray_init(&state->reset, NULL);
689   util_dynarray_init(&state->collect, NULL);
690   util_dynarray_init(&state->wait, NULL);
691   util_dynarray_init(&state->signal, NULL);
692   return state;
693}
694
695static void
696dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
697                                        struct dzn_cmd_buffer_query_pool_state *state)
698{
699   util_dynarray_fini(&state->reset);
700   util_dynarray_fini(&state->collect);
701   util_dynarray_fini(&state->wait);
702   util_dynarray_fini(&state->signal);
703   vk_free(&cmdbuf->vk.pool->alloc, state);
704}
705
706static struct dzn_cmd_buffer_query_pool_state *
707dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf,
708                                    struct dzn_query_pool *qpool)
709{
710   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
711   struct dzn_cmd_buffer_query_pool_state *state = NULL;
712   struct hash_entry *he =
713      _mesa_hash_table_search(cmdbuf->queries.ht, qpool);
714
715   if (!he) {
716      state = dzn_cmd_buffer_create_query_pool_state(cmdbuf);
717      if (!state)
718         return NULL;
719
720      he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state);
721      if (!he) {
722         dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state);
723         cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
724         return NULL;
725      }
726   } else {
727      state = he->data;
728   }
729
730   return state;
731}
732
733static VkResult
734dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf,
735                               const struct dzn_query_pool *qpool,
736                               struct dzn_cmd_buffer_query_pool_state *state,
737                               uint32_t first_query,
738                               uint32_t query_count)
739{
740   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
741   uint32_t nbits = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS;
742   uint32_t start, end;
743
744   if (!nbits)
745      return VK_SUCCESS;
746
747   query_count = MIN2(query_count, nbits - first_query);
748   nbits = MIN2(first_query + query_count, nbits);
749
750   VkResult result =
751      dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1);
752   if (result != VK_SUCCESS)
753      return result;
754
755   dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1);
756
757   BITSET_WORD *collect =
758      util_dynarray_element(&state->collect, BITSET_WORD, 0);
759
760   for (start = first_query, end = first_query,
761        __bitset_next_range(&start, &end, collect, nbits);
762        start < nbits;
763        __bitset_next_range(&start, &end, collect, nbits)) {
764      ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist,
765                                                  qpool->heap,
766                                                  qpool->queries[start].type,
767                                                  start, end - start,
768                                                  qpool->resolve_buffer,
769                                                  qpool->query_size * start);
770   }
771
772   uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query);
773   uint32_t size = dzn_query_pool_get_result_size(qpool, query_count);
774
775   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
776                                            0, 1,
777                                            D3D12_RESOURCE_STATE_COPY_DEST,
778                                            D3D12_RESOURCE_STATE_COPY_SOURCE,
779                                            DZN_QUEUE_TRANSITION_FLUSH);
780
781   ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
782                                               qpool->collect_buffer, offset,
783                                               qpool->resolve_buffer, offset,
784                                               size);
785
786   for (start = first_query, end = first_query,
787        __bitset_next_range(&start, &end, collect, nbits);
788        start < nbits;
789        __bitset_next_range(&start, &end, collect, nbits)) {
790      uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
791      uint32_t count = end - start;
792
793      for (unsigned i = 0; i < count; i += step) {
794         uint32_t sub_count = MIN2(step, count - i);
795
796         ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist,
797                                                     qpool->collect_buffer,
798                                                     dzn_query_pool_get_availability_offset(qpool, start + i),
799                                                     device->queries.refs,
800                                                     DZN_QUERY_REFS_ALL_ONES_OFFSET,
801                                                     sizeof(uint64_t) * sub_count);
802      }
803
804      dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count);
805      dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, start, count);
806   }
807
808   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer,
809                                            0, 1,
810                                            D3D12_RESOURCE_STATE_COPY_SOURCE,
811                                            D3D12_RESOURCE_STATE_COPY_DEST,
812                                            0);
813   return VK_SUCCESS;
814}
815
816static VkResult
817dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf,
818                                 struct dzn_query_pool *qpool,
819                                 struct util_dynarray *bitset_array,
820                                 struct util_dynarray *ops_array)
821{
822   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
823   BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0);
824   uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS;
825   uint32_t start, end;
826
827   BITSET_FOREACH_RANGE(start, end, bitset, nbits) {
828      struct dzn_cmd_buffer_query_range range = { qpool, start, end - start };
829      struct dzn_cmd_buffer_query_range *entry =
830         util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1);
831
832      if (!entry) {
833         cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
834         return cmdbuf->error;
835      }
836
837      *entry = range;
838   }
839
840   return VK_SUCCESS;
841}
842
843static VkResult
844dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf)
845{
846   hash_table_foreach(cmdbuf->queries.ht, he) {
847      struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key;
848      struct dzn_cmd_buffer_query_pool_state *state = he->data;
849      VkResult result =
850         dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count);
851      if (result != VK_SUCCESS)
852         return result;
853
854      result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset);
855      if (result != VK_SUCCESS)
856         return result;
857
858      result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->wait, &cmdbuf->queries.wait);
859      if (result != VK_SUCCESS)
860         return result;
861
862      result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal);
863      if (result != VK_SUCCESS)
864         return result;
865   }
866
867   return VK_SUCCESS;
868}
869
870VKAPI_ATTR VkResult VKAPI_CALL
871dzn_EndCommandBuffer(VkCommandBuffer commandBuffer)
872{
873   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
874
875   if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
876      dzn_cmd_buffer_gather_events(cmdbuf);
877      dzn_cmd_buffer_gather_queries(cmdbuf);
878      HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist);
879      if (FAILED(hres))
880         cmdbuf->error = vk_error(cmdbuf->vk.base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
881   } else {
882      cmdbuf->error = cmdbuf->vk.cmd_queue.error;
883   }
884
885   return cmdbuf->error;
886}
887
888VKAPI_ATTR void VKAPI_CALL
889dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
890                        const VkDependencyInfo *info)
891{
892   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
893
894   bool execution_barrier =
895      !info->memoryBarrierCount &&
896      !info->bufferMemoryBarrierCount &&
897      !info->imageMemoryBarrierCount;
898
899   if (execution_barrier) {
900      /* Execution barrier can be emulated with a NULL UAV barrier (AKA
901       * pipeline flush). That's the best we can do with the standard D3D12
902       * barrier API.
903       */
904      D3D12_RESOURCE_BARRIER barrier = {
905         .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
906         .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
907         .UAV = { .pResource = NULL },
908      };
909
910      ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
911   }
912
913   /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers.
914    * Scopes are not taken into account, but that's inherent to the current
915    * D3D12 barrier API.
916    */
917   if (info->memoryBarrierCount) {
918      D3D12_RESOURCE_BARRIER barriers[2] = { 0 };
919
920      barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
921      barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
922      barriers[0].UAV.pResource = NULL;
923      barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
924      barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
925      barriers[1].Aliasing.pResourceBefore = NULL;
926      barriers[1].Aliasing.pResourceAfter = NULL;
927      ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers);
928   }
929
930   for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) {
931      VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer);
932      D3D12_RESOURCE_BARRIER barrier = { 0 };
933
934      /* UAV are used only for storage buffers, skip all other buffers. */
935      if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
936         continue;
937
938      barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
939      barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
940      barrier.UAV.pResource = buf->res;
941      ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
942   }
943
944   for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) {
945      const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i];
946      const VkImageSubresourceRange *range = &ibarrier->subresourceRange;
947      VK_FROM_HANDLE(dzn_image, image, ibarrier->image);
948
949      /* We use placed resource's simple model, in which only one resource
950       * pointing to a given heap is active at a given time. To make the
951       * resource active we need to add an aliasing barrier.
952       */
953      D3D12_RESOURCE_BARRIER aliasing_barrier = {
954         .Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING,
955         .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
956         .Aliasing = {
957            .pResourceBefore = NULL,
958            .pResourceAfter = image->res,
959         },
960      };
961
962      ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &aliasing_barrier);
963
964      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
965                                                         ibarrier->oldLayout,
966                                                         ibarrier->newLayout,
967                                                         DZN_QUEUE_TRANSITION_FLUSH);
968   }
969}
970
971static D3D12_CPU_DESCRIPTOR_HANDLE
972dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf,
973                       const struct dzn_image *image,
974                       const D3D12_DEPTH_STENCIL_VIEW_DESC *desc)
975{
976   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
977   struct dzn_cmd_buffer_dsv_key key = { image, *desc };
978   struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key);
979   struct dzn_cmd_buffer_dsv_entry *dsve;
980
981   if (!he) {
982      struct dzn_descriptor_heap *heap;
983      uint32_t slot;
984
985      // TODO: error handling
986      dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8,
987                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
988      dsve->key = key;
989      dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot);
990      dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
991      ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle);
992      _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve);
993   } else {
994      dsve = he->data;
995   }
996
997   return dsve->handle;
998}
999
1000static D3D12_CPU_DESCRIPTOR_HANDLE
1001dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf,
1002                       const struct dzn_image *image,
1003                       const D3D12_RENDER_TARGET_VIEW_DESC *desc)
1004{
1005   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1006   struct dzn_cmd_buffer_rtv_key key = { image, *desc };
1007   struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key);
1008   struct dzn_cmd_buffer_rtv_entry *rtve;
1009
1010   if (!he) {
1011      struct dzn_descriptor_heap *heap;
1012      uint32_t slot;
1013
1014      // TODO: error handling
1015      rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8,
1016                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1017      rtve->key = key;
1018      dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1019      rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1020      ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle);
1021      he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve);
1022   } else {
1023      rtve = he->data;
1024   }
1025
1026   return rtve->handle;
1027}
1028
1029static D3D12_CPU_DESCRIPTOR_HANDLE
1030dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf)
1031{
1032   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1033
1034   if (!cmdbuf->null_rtv.ptr) {
1035      struct dzn_descriptor_heap *heap;
1036      uint32_t slot;
1037      dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot);
1038      cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot);
1039
1040      D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 };
1041      desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
1042      desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
1043      desc.Texture2D.MipSlice = 0;
1044      desc.Texture2D.PlaneSlice = 0;
1045
1046      ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv);
1047   }
1048
1049   return cmdbuf->null_rtv;
1050}
1051
1052static VkResult
1053dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf,
1054                                  uint32_t size,
1055                                  D3D12_HEAP_TYPE heap_type,
1056                                  D3D12_RESOURCE_STATES init_state,
1057                                  ID3D12Resource **out)
1058{
1059   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1060   ID3D12Resource *res;
1061   *out = NULL;
1062
1063   /* Align size on 64k (the default alignment) */
1064   size = ALIGN_POT(size, 64 * 1024);
1065
1066   D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, heap_type);
1067   D3D12_RESOURCE_DESC rdesc = {
1068      .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
1069      .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
1070      .Width = size,
1071      .Height = 1,
1072      .DepthOrArraySize = 1,
1073      .MipLevels = 1,
1074      .Format = DXGI_FORMAT_UNKNOWN,
1075      .SampleDesc = { .Count = 1, .Quality = 0 },
1076      .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
1077      .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
1078   };
1079
1080   HRESULT hres =
1081      ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
1082                                            D3D12_HEAP_FLAG_NONE, &rdesc,
1083                                            init_state, NULL,
1084                                            &IID_ID3D12Resource,
1085                                            (void **)&res);
1086   if (FAILED(hres)) {
1087      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1088      return cmdbuf->error;
1089   }
1090
1091   struct dzn_internal_resource *entry =
1092      vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8,
1093               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1094   if (!entry) {
1095      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1096      ID3D12Resource_Release(res);
1097      return cmdbuf->error;
1098   }
1099
1100   entry->res = res;
1101   list_addtail(&entry->link, &cmdbuf->internal_bufs);
1102   *out = entry->res;
1103   return VK_SUCCESS;
1104}
1105
1106static void
1107dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf,
1108                                     const struct dzn_image *image,
1109                                     VkImageLayout layout,
1110                                     const VkClearColorValue *color,
1111                                     const VkImageSubresourceRange *range,
1112                                     uint32_t rect_count, D3D12_RECT *rects)
1113{
1114   enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1115   uint32_t blksize = util_format_get_blocksize(pfmt);
1116   uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1117   uint32_t raw[4] = { 0 };
1118
1119   assert(blksize <= sizeof(raw));
1120   assert(!(sizeof(buf) % blksize));
1121
1122   util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1123
1124   uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1125   while (fill_step % blksize)
1126      fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1127
1128   uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel);
1129   uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel);
1130   uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step);
1131   uint32_t res_size = max_h * row_pitch;
1132
1133   assert(fill_step <= sizeof(buf));
1134
1135   for (uint32_t i = 0; i < fill_step; i += blksize)
1136      memcpy(&buf[i], raw, blksize);
1137
1138   ID3D12Resource *src_res;
1139
1140   VkResult result =
1141      dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1142                                        D3D12_HEAP_TYPE_UPLOAD,
1143                                        D3D12_RESOURCE_STATE_GENERIC_READ,
1144                                        &src_res);
1145   if (result != VK_SUCCESS)
1146      return;
1147
1148   assert(!(res_size % fill_step));
1149
1150   uint8_t *cpu_ptr;
1151   ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1152   for (uint32_t i = 0; i < res_size; i += fill_step)
1153      memcpy(&cpu_ptr[i], buf, fill_step);
1154
1155   ID3D12Resource_Unmap(src_res, 0, NULL);
1156
1157   D3D12_TEXTURE_COPY_LOCATION src_loc = {
1158      .pResource = src_res,
1159      .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1160      .PlacedFootprint = {
1161         .Offset = 0,
1162         .Footprint = {
1163            .Width = max_w,
1164            .Height = max_h,
1165            .Depth = 1,
1166            .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step),
1167         },
1168      },
1169   };
1170
1171   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, src_res, 0, 1,
1172                                            D3D12_RESOURCE_STATE_GENERIC_READ,
1173                                            D3D12_RESOURCE_STATE_COPY_SOURCE,
1174                                            DZN_QUEUE_TRANSITION_FLUSH);
1175
1176   dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1177                                                      layout,
1178                                                      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1179                                                      DZN_QUEUE_TRANSITION_FLUSH);
1180
1181   assert(dzn_get_level_count(image, range) == 1);
1182   uint32_t layer_count = dzn_get_layer_count(image, range);
1183
1184   dzn_foreach_aspect(aspect, range->aspectMask) {
1185      VkImageSubresourceLayers subres = {
1186         .aspectMask = (VkImageAspectFlags)aspect,
1187         .mipLevel = range->baseMipLevel,
1188         .baseArrayLayer = range->baseArrayLayer,
1189         .layerCount = layer_count,
1190      };
1191
1192      for (uint32_t layer = 0; layer < layer_count; layer++) {
1193         D3D12_TEXTURE_COPY_LOCATION dst_loc =
1194            dzn_image_get_copy_loc(image, &subres, aspect, layer);
1195
1196         src_loc.PlacedFootprint.Footprint.Format =
1197            dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1198            dst_loc.PlacedFootprint.Footprint.Format :
1199            image->desc.Format;
1200
1201         for (uint32_t r = 0; r < rect_count; r++) {
1202            D3D12_BOX src_box = {
1203               .left = 0,
1204               .top = 0,
1205               .front = 0,
1206               .right = (UINT)(rects[r].right - rects[r].left),
1207               .bottom = (UINT)(rects[r].bottom - rects[r].top),
1208               .back = 1,
1209            };
1210
1211            ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist,
1212                                                         &dst_loc,
1213                                                         rects[r].left,
1214                                                         rects[r].top, 0,
1215                                                         &src_loc,
1216                                                         &src_box);
1217         }
1218      }
1219   }
1220
1221   dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1222                                                      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1223                                                      layout,
1224                                                      DZN_QUEUE_TRANSITION_FLUSH);
1225}
1226
1227static VkClearColorValue
1228adjust_clear_color(VkFormat format, const VkClearColorValue *col)
1229{
1230   VkClearColorValue out = *col;
1231
1232   // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things
1233   // manually where it matters, like here, in the clear path.
1234   if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
1235      DZN_SWAP(float, out.float32[0], out.float32[1]);
1236      DZN_SWAP(float, out.float32[2], out.float32[3]);
1237   }
1238
1239   return out;
1240}
1241
1242static void
1243dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf,
1244                                      const struct dzn_image *image,
1245                                      VkImageLayout layout,
1246                                      const VkClearColorValue *color,
1247                                      uint32_t range_count,
1248                                      const VkImageSubresourceRange *ranges)
1249{
1250   enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1251   uint32_t blksize = util_format_get_blocksize(pfmt);
1252   uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 };
1253   uint32_t raw[4] = { 0 };
1254
1255   assert(blksize <= sizeof(raw));
1256   assert(!(sizeof(buf) % blksize));
1257
1258   util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1);
1259
1260   uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1261   while (fill_step % blksize)
1262      fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
1263
1264   uint32_t res_size = 0;
1265   for (uint32_t r = 0; r < range_count; r++) {
1266      uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel);
1267      uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel);
1268      uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel);
1269      uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step);
1270
1271      res_size = MAX2(res_size, h * d * row_pitch);
1272   }
1273
1274   assert(fill_step <= sizeof(buf));
1275
1276   for (uint32_t i = 0; i < fill_step; i += blksize)
1277      memcpy(&buf[i], raw, blksize);
1278
1279   ID3D12Resource *src_res;
1280
1281   VkResult result =
1282      dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size,
1283                                        D3D12_HEAP_TYPE_UPLOAD,
1284                                        D3D12_RESOURCE_STATE_GENERIC_READ,
1285                                        &src_res);
1286   if (result != VK_SUCCESS)
1287      return;
1288
1289   assert(!(res_size % fill_step));
1290
1291   uint8_t *cpu_ptr;
1292   ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
1293   for (uint32_t i = 0; i < res_size; i += fill_step)
1294      memcpy(&cpu_ptr[i], buf, fill_step);
1295
1296   ID3D12Resource_Unmap(src_res, 0, NULL);
1297
1298   D3D12_TEXTURE_COPY_LOCATION src_loc = {
1299      .pResource = src_res,
1300      .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
1301      .PlacedFootprint = {
1302         .Offset = 0,
1303      },
1304   };
1305
1306   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, src_res, 0, 1,
1307                                            D3D12_RESOURCE_STATE_GENERIC_READ,
1308                                            D3D12_RESOURCE_STATE_COPY_SOURCE,
1309                                            DZN_QUEUE_TRANSITION_FLUSH);
1310
1311   for (uint32_t r = 0; r < range_count; r++) {
1312      uint32_t level_count = dzn_get_level_count(image, &ranges[r]);
1313      uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]);
1314
1315      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
1316                                                         layout,
1317                                                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1318                                                         DZN_QUEUE_TRANSITION_FLUSH);
1319
1320      dzn_foreach_aspect(aspect, ranges[r].aspectMask) {
1321         for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1322            uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl);
1323            uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl);
1324            uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl);
1325            VkImageSubresourceLayers subres = {
1326               .aspectMask = (VkImageAspectFlags)aspect,
1327               .mipLevel = ranges[r].baseMipLevel + lvl,
1328               .baseArrayLayer = ranges[r].baseArrayLayer,
1329               .layerCount = layer_count,
1330            };
1331
1332            for (uint32_t layer = 0; layer < layer_count; layer++) {
1333               D3D12_TEXTURE_COPY_LOCATION dst_loc =
1334                  dzn_image_get_copy_loc(image, &subres, aspect, layer);
1335
1336               src_loc.PlacedFootprint.Footprint.Format =
1337                  dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ?
1338                  dst_loc.PlacedFootprint.Footprint.Format :
1339                  image->desc.Format;
1340               src_loc.PlacedFootprint.Footprint.Width = w;
1341               src_loc.PlacedFootprint.Footprint.Height = h;
1342               src_loc.PlacedFootprint.Footprint.Depth = d;
1343               src_loc.PlacedFootprint.Footprint.RowPitch =
1344                  ALIGN_NPOT(w * blksize, fill_step);
1345               D3D12_BOX src_box = {
1346                  .left = 0,
1347                  .top = 0,
1348                  .front = 0,
1349                  .right = w,
1350                  .bottom = h,
1351                  .back = d,
1352               };
1353
1354               ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0,
1355                                                  &src_loc, &src_box);
1356
1357            }
1358         }
1359      }
1360
1361      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r],
1362                                                         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1363                                                         layout,
1364                                                         DZN_QUEUE_TRANSITION_FLUSH);
1365   }
1366}
1367
1368static void
1369dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf,
1370                                struct dzn_image_view *view,
1371                                VkImageLayout layout,
1372                                const VkClearValue *value,
1373                                VkImageAspectFlags aspects,
1374                                uint32_t base_layer,
1375                                uint32_t layer_count,
1376                                uint32_t rect_count,
1377                                D3D12_RECT *rects)
1378{
1379   struct dzn_image *image =
1380      container_of(view->vk.image, struct dzn_image, vk);
1381
1382   VkImageSubresourceRange range = {
1383      .aspectMask = aspects,
1384      .baseMipLevel = view->vk.base_mip_level,
1385      .levelCount = 1,
1386      .baseArrayLayer = view->vk.base_array_layer + base_layer,
1387      .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ?
1388                    view->vk.layer_count - base_layer : layer_count,
1389   };
1390
1391   layer_count = vk_image_subresource_layer_count(&image->vk, &range);
1392
1393   if (vk_format_is_depth_or_stencil(view->vk.format)) {
1394      D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
1395
1396      if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
1397         flags |= D3D12_CLEAR_FLAG_DEPTH;
1398      if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1399         flags |= D3D12_CLEAR_FLAG_STENCIL;
1400
1401      if (flags != 0) {
1402         dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1403                                                            layout,
1404                                                            VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1405                                                            DZN_QUEUE_TRANSITION_FLUSH);
1406
1407         D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0);
1408         D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
1409         ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags,
1410                                                value->depthStencil.depth,
1411                                                value->depthStencil.stencil,
1412                                                rect_count, rects);
1413
1414         dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1415                                                            VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1416                                                            layout,
1417                                                            DZN_QUEUE_TRANSITION_FLUSH);
1418      }
1419   } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1420      VkClearColorValue color = adjust_clear_color(view->vk.format, &value->color);
1421      bool clear_with_cpy = false;
1422      float vals[4];
1423
1424      if (vk_format_is_sint(view->vk.format)) {
1425         for (uint32_t i = 0; i < 4; i++) {
1426            vals[i] = color.int32[i];
1427            if (color.int32[i] != (int32_t)vals[i]) {
1428               clear_with_cpy = true;
1429               break;
1430            }
1431         }
1432      } else if (vk_format_is_uint(view->vk.format)) {
1433         for (uint32_t i = 0; i < 4; i++) {
1434            vals[i] = color.uint32[i];
1435            if (color.uint32[i] != (uint32_t)vals[i]) {
1436               clear_with_cpy = true;
1437               break;
1438            }
1439         }
1440      } else {
1441         for (uint32_t i = 0; i < 4; i++)
1442            vals[i] = color.float32[i];
1443      }
1444
1445      if (clear_with_cpy) {
1446         dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image,
1447                                              VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1448                                              &value->color,
1449                                              &range, rect_count, rects);
1450      } else {
1451         dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1452                                                            layout,
1453                                                            VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1454                                                            DZN_QUEUE_TRANSITION_FLUSH);
1455
1456         D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0);
1457         D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
1458         ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects);
1459
1460         dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
1461                                                            VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1462                                                            layout,
1463                                                            DZN_QUEUE_TRANSITION_FLUSH);
1464      }
1465   }
1466}
1467
1468static void
1469dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf,
1470                           const struct dzn_image *image,
1471                           VkImageLayout layout,
1472                           const VkClearColorValue *col,
1473                           uint32_t range_count,
1474                           const VkImageSubresourceRange *ranges)
1475{
1476   if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
1477      dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1478      return;
1479   }
1480
1481   VkClearColorValue color = adjust_clear_color(image->vk.format, col);
1482   float clear_vals[4];
1483
1484   enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
1485
1486   if (util_format_is_pure_sint(pfmt)) {
1487      for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
1488         clear_vals[c] = color.int32[c];
1489         if (color.int32[c] != (int32_t)clear_vals[c]) {
1490            dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1491            return;
1492         }
1493      }
1494   } else if (util_format_is_pure_uint(pfmt)) {
1495      for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) {
1496         clear_vals[c] = color.uint32[c];
1497         if (color.uint32[c] != (uint32_t)clear_vals[c]) {
1498            dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges);
1499            return;
1500         }
1501      }
1502   } else {
1503      memcpy(clear_vals, color.float32, sizeof(clear_vals));
1504   }
1505
1506   for (uint32_t r = 0; r < range_count; r++) {
1507      const VkImageSubresourceRange *range = &ranges[r];
1508      uint32_t level_count = dzn_get_level_count(image, range);
1509
1510      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1511                                                         layout,
1512                                                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1513                                                         DZN_QUEUE_TRANSITION_FLUSH);
1514      for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1515         VkImageSubresourceRange view_range = *range;
1516
1517         if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1518            view_range.baseArrayLayer = 0;
1519            view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl);
1520         }
1521
1522         D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl);
1523         D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc);
1524         ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL);
1525      }
1526
1527      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1528                                                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1529                                                         layout,
1530                                                         DZN_QUEUE_TRANSITION_FLUSH);
1531   }
1532}
1533
1534static void
1535dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf,
1536                        const struct dzn_image *image,
1537                        VkImageLayout layout,
1538                        const VkClearDepthStencilValue *zs,
1539                        uint32_t range_count,
1540                        const VkImageSubresourceRange *ranges)
1541{
1542   assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
1543
1544   for (uint32_t r = 0; r < range_count; r++) {
1545      const VkImageSubresourceRange *range = &ranges[r];
1546      uint32_t level_count = dzn_get_level_count(image, range);
1547
1548      D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
1549
1550      if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
1551         flags |= D3D12_CLEAR_FLAG_DEPTH;
1552      if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
1553         flags |= D3D12_CLEAR_FLAG_STENCIL;
1554
1555      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1556                                                         layout,
1557                                                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1558                                                         DZN_QUEUE_TRANSITION_FLUSH);
1559
1560      for (uint32_t lvl = 0; lvl < level_count; lvl++) {
1561         D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl);
1562         D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc);
1563         ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist,
1564                                                          handle, flags,
1565                                                          zs->depth,
1566                                                          zs->stencil,
1567                                                          0, NULL);
1568      }
1569
1570      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range,
1571                                                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1572                                                         layout,
1573                                                         DZN_QUEUE_TRANSITION_FLUSH);
1574   }
1575}
1576
1577static void
1578dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf,
1579                                   const VkCopyBufferToImageInfo2 *info,
1580                                   uint32_t r,
1581                                   VkImageAspectFlagBits aspect,
1582                                   uint32_t l)
1583{
1584   VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
1585   VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage);
1586
1587   ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1588
1589   VkBufferImageCopy2 region = info->pRegions[r];
1590   enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format);
1591   uint32_t blkh = util_format_get_blockheight(pfmt);
1592   uint32_t blkd = util_format_get_blockdepth(pfmt);
1593
1594   /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
1595    * to not be block aligned if it's reaching the image boundary, offsets still
1596    * have to be aligned. Align the image extent to make D3D12 happy.
1597    */
1598   dzn_image_align_extent(dst_image, &region.imageExtent);
1599
1600   D3D12_TEXTURE_COPY_LOCATION dst_img_loc =
1601      dzn_image_get_copy_loc(dst_image, &region.imageSubresource, aspect, l);
1602   D3D12_TEXTURE_COPY_LOCATION src_buf_loc =
1603      dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, &region, aspect, l);
1604
1605   if (dzn_buffer_supports_region_copy(&src_buf_loc)) {
1606      /* RowPitch and Offset are properly aligned, we can copy
1607       * the whole thing in one call.
1608       */
1609      D3D12_BOX src_box = {
1610         .left = 0,
1611         .top = 0,
1612         .front = 0,
1613         .right = region.imageExtent.width,
1614         .bottom = region.imageExtent.height,
1615         .back = region.imageExtent.depth,
1616      };
1617
1618      ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc,
1619                                                   region.imageOffset.x,
1620                                                   region.imageOffset.y,
1621                                                   region.imageOffset.z,
1622                                                   &src_buf_loc, &src_box);
1623      return;
1624   }
1625
1626   /* Copy line-by-line if things are not properly aligned. */
1627   D3D12_BOX src_box = {
1628      .top = 0,
1629      .front = 0,
1630      .bottom = blkh,
1631      .back = blkd,
1632   };
1633
1634   for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
1635      for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
1636         uint32_t src_x;
1637
1638         D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc =
1639            dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format,
1640                                         &region, &src_buf_loc,
1641                                         y, z, &src_x);
1642
1643         src_box.left = src_x;
1644         src_box.right = src_x + region.imageExtent.width;
1645         ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
1646                                                      &dst_img_loc,
1647                                                      region.imageOffset.x,
1648                                                      region.imageOffset.y + y,
1649                                                      region.imageOffset.z + z,
1650                                                      &src_buf_line_loc,
1651                                                      &src_box);
1652      }
1653   }
1654}
1655
1656static void
1657dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf,
1658                                   const VkCopyImageToBufferInfo2 *info,
1659                                   uint32_t r,
1660                                   VkImageAspectFlagBits aspect,
1661                                   uint32_t l)
1662{
1663   VK_FROM_HANDLE(dzn_image, src_image, info->srcImage);
1664   VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
1665
1666   ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1667
1668   VkBufferImageCopy2 region = info->pRegions[r];
1669   enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format);
1670   uint32_t blkh = util_format_get_blockheight(pfmt);
1671   uint32_t blkd = util_format_get_blockdepth(pfmt);
1672
1673   /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent
1674    * to not be block aligned if it's reaching the image boundary, offsets still
1675    * have to be aligned. Align the image extent to make D3D12 happy.
1676    */
1677   dzn_image_align_extent(src_image, &region.imageExtent);
1678
1679   D3D12_TEXTURE_COPY_LOCATION src_img_loc =
1680      dzn_image_get_copy_loc(src_image, &region.imageSubresource, aspect, l);
1681   D3D12_TEXTURE_COPY_LOCATION dst_buf_loc =
1682      dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, &region, aspect, l);
1683
1684   if (dzn_buffer_supports_region_copy(&dst_buf_loc)) {
1685      /* RowPitch and Offset are properly aligned on 256 bytes, we can copy
1686       * the whole thing in one call.
1687       */
1688      D3D12_BOX src_box = {
1689         .left = (UINT)region.imageOffset.x,
1690         .top = (UINT)region.imageOffset.y,
1691         .front = (UINT)region.imageOffset.z,
1692         .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
1693         .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height),
1694         .back = (UINT)(region.imageOffset.z + region.imageExtent.depth),
1695      };
1696
1697      ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc,
1698                                                   0, 0, 0, &src_img_loc,
1699                                                   &src_box);
1700      return;
1701   }
1702
1703   D3D12_BOX src_box = {
1704      .left = (UINT)region.imageOffset.x,
1705      .right = (UINT)(region.imageOffset.x + region.imageExtent.width),
1706   };
1707
1708   /* Copy line-by-line if things are not properly aligned. */
1709   for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) {
1710      src_box.front = region.imageOffset.z + z;
1711      src_box.back = src_box.front + blkd;
1712
1713      for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) {
1714         uint32_t dst_x;
1715
1716         D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc =
1717            dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format,
1718                                         &region, &dst_buf_loc,
1719                                         y, z, &dst_x);
1720
1721         src_box.top = region.imageOffset.y + y;
1722         src_box.bottom = src_box.top + blkh;
1723
1724         ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist,
1725                                                      &dst_buf_line_loc,
1726                                                      dst_x, 0, 0,
1727                                                      &src_img_loc,
1728                                                      &src_box);
1729      }
1730   }
1731}
1732
1733static void
1734dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf,
1735                              const VkCopyImageInfo2 *info,
1736                              D3D12_RESOURCE_DESC *tmp_desc,
1737                              D3D12_TEXTURE_COPY_LOCATION *tmp_loc,
1738                              uint32_t r,
1739                              VkImageAspectFlagBits aspect,
1740                              uint32_t l)
1741{
1742   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1743   VK_FROM_HANDLE(dzn_image, src, info->srcImage);
1744   VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
1745
1746   ID3D12Device2 *dev = device->dev;
1747   ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist;
1748
1749   VkImageCopy2 region = info->pRegions[r];
1750   dzn_image_align_extent(src, &region.extent);
1751
1752   const VkImageSubresourceLayers *src_subres = &region.srcSubresource;
1753   const VkImageSubresourceLayers *dst_subres = &region.dstSubresource;
1754   VkFormat src_format =
1755      dzn_image_get_plane_format(src->vk.format, aspect);
1756   VkFormat dst_format =
1757      dzn_image_get_plane_format(dst->vk.format, aspect);
1758
1759   enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format);
1760   uint32_t src_blkw = util_format_get_blockwidth(src_pfmt);
1761   uint32_t src_blkh = util_format_get_blockheight(src_pfmt);
1762   uint32_t src_blkd = util_format_get_blockdepth(src_pfmt);
1763   enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format);
1764   uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt);
1765   uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt);
1766   uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt);
1767   uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z;
1768   uint32_t depth = region.extent.depth;
1769   uint32_t dst_l = l, src_l = l;
1770
1771   assert(src_subres->aspectMask == dst_subres->aspectMask);
1772
1773   if (src->vk.image_type == VK_IMAGE_TYPE_3D &&
1774       dst->vk.image_type == VK_IMAGE_TYPE_2D) {
1775      assert(src_subres->layerCount == 1);
1776      src_l = 0;
1777      src_z += l;
1778      depth = 1;
1779   } else if (src->vk.image_type == VK_IMAGE_TYPE_2D &&
1780              dst->vk.image_type == VK_IMAGE_TYPE_3D) {
1781      assert(dst_subres->layerCount == 1);
1782      dst_l = 0;
1783      dst_z += l;
1784      depth = 1;
1785   } else {
1786      assert(src_subres->layerCount == dst_subres->layerCount);
1787   }
1788
1789   D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l);
1790   D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l);
1791
1792   D3D12_BOX src_box = {
1793      .left = (UINT)MAX2(region.srcOffset.x, 0),
1794      .top = (UINT)MAX2(region.srcOffset.y, 0),
1795      .front = (UINT)MAX2(src_z, 0),
1796      .right = (UINT)region.srcOffset.x + region.extent.width,
1797      .bottom = (UINT)region.srcOffset.y + region.extent.height,
1798      .back = (UINT)src_z + depth,
1799   };
1800
1801   if (!tmp_loc->pResource) {
1802      ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
1803                                                   region.dstOffset.x,
1804                                                   region.dstOffset.y,
1805                                                   dst_z, &src_loc,
1806                                                   &src_box);
1807      return;
1808   }
1809
1810   tmp_desc->Format =
1811      dzn_image_get_placed_footprint_format(src->vk.format, aspect);
1812   tmp_desc->Width = region.extent.width;
1813   tmp_desc->Height = region.extent.height;
1814
1815   ID3D12Device1_GetCopyableFootprints(dev, tmp_desc,
1816                                       0, 1, 0,
1817                                       &tmp_loc->PlacedFootprint,
1818                                       NULL, NULL, NULL);
1819
1820   tmp_loc->PlacedFootprint.Footprint.Depth = depth;
1821
1822   if (r > 0 || l > 0) {
1823      dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
1824                                               D3D12_RESOURCE_STATE_COPY_SOURCE,
1825                                               D3D12_RESOURCE_STATE_COPY_DEST,
1826                                               DZN_QUEUE_TRANSITION_FLUSH);
1827   }
1828
1829   ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box);
1830
1831   if (r > 0 || l > 0) {
1832      dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1,
1833                                               D3D12_RESOURCE_STATE_COPY_DEST,
1834                                               D3D12_RESOURCE_STATE_COPY_SOURCE,
1835                                               DZN_QUEUE_TRANSITION_FLUSH);
1836   }
1837
1838   tmp_desc->Format =
1839      dzn_image_get_placed_footprint_format(dst->vk.format, aspect);
1840   if (src_blkw != dst_blkw)
1841      tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw;
1842   if (src_blkh != dst_blkh)
1843      tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh;
1844
1845   ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc,
1846                                       0, 1, 0,
1847                                       &tmp_loc->PlacedFootprint,
1848                                       NULL, NULL, NULL);
1849
1850   if (src_blkd != dst_blkd) {
1851      tmp_loc->PlacedFootprint.Footprint.Depth =
1852         DIV_ROUND_UP(depth, src_blkd) * dst_blkd;
1853   } else {
1854      tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth;
1855   }
1856
1857   D3D12_BOX tmp_box = {
1858      .left = 0,
1859      .top = 0,
1860      .front = 0,
1861      .right = tmp_loc->PlacedFootprint.Footprint.Width,
1862      .bottom = tmp_loc->PlacedFootprint.Footprint.Height,
1863      .back = tmp_loc->PlacedFootprint.Footprint.Depth,
1864   };
1865
1866   ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc,
1867                                                region.dstOffset.x,
1868                                                region.dstOffset.y,
1869                                                dst_z,
1870                                                tmp_loc, &tmp_box);
1871}
1872
1873static void
1874dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf,
1875                                     VkImage image,
1876                                     VkImageAspectFlagBits aspect,
1877                                     const VkImageSubresourceLayers *subres,
1878                                     struct dzn_descriptor_heap *heap,
1879                                     uint32_t heap_slot)
1880{
1881   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1882   VK_FROM_HANDLE(dzn_image, img, image);
1883   VkImageViewCreateInfo iview_info = {
1884      .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1885      .image = image,
1886      .format = img->vk.format,
1887      .subresourceRange = {
1888         .aspectMask = (VkImageAspectFlags)aspect,
1889         .baseMipLevel = subres->mipLevel,
1890         .levelCount = 1,
1891         .baseArrayLayer = subres->baseArrayLayer,
1892         .layerCount = subres->layerCount,
1893      },
1894   };
1895
1896   if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
1897      iview_info.components.r = VK_COMPONENT_SWIZZLE_G;
1898      iview_info.components.g = VK_COMPONENT_SWIZZLE_G;
1899      iview_info.components.b = VK_COMPONENT_SWIZZLE_G;
1900      iview_info.components.a = VK_COMPONENT_SWIZZLE_G;
1901   } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
1902      iview_info.components.r = VK_COMPONENT_SWIZZLE_R;
1903      iview_info.components.g = VK_COMPONENT_SWIZZLE_R;
1904      iview_info.components.b = VK_COMPONENT_SWIZZLE_R;
1905      iview_info.components.a = VK_COMPONENT_SWIZZLE_R;
1906   }
1907
1908   switch (img->vk.image_type) {
1909   case VK_IMAGE_TYPE_1D:
1910      iview_info.viewType = img->vk.array_layers > 1 ?
1911                            VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D;
1912      break;
1913   case VK_IMAGE_TYPE_2D:
1914      iview_info.viewType = img->vk.array_layers > 1 ?
1915                            VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
1916      break;
1917   case VK_IMAGE_TYPE_3D:
1918      iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
1919      break;
1920   default:
1921      unreachable("Invalid type");
1922   }
1923
1924   struct dzn_image_view iview;
1925   dzn_image_view_init(device, &iview, &iview_info);
1926   dzn_descriptor_heap_write_image_view_desc(heap, heap_slot, false, false, &iview);
1927   dzn_image_view_finish(&iview);
1928
1929   D3D12_GPU_DESCRIPTOR_HANDLE handle =
1930      dzn_descriptor_heap_get_gpu_handle(heap, heap_slot);
1931   ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle);
1932}
1933
1934static void
1935dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf,
1936                                     struct dzn_image *img,
1937                                     VkImageAspectFlagBits aspect,
1938                                     uint32_t level, uint32_t layer)
1939{
1940   bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
1941   VkImageSubresourceRange range = {
1942      .aspectMask = (VkImageAspectFlags)aspect,
1943      .baseMipLevel = level,
1944      .levelCount = 1,
1945      .baseArrayLayer = layer,
1946      .layerCount = 1,
1947   };
1948
1949   if (ds) {
1950      D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0);
1951      D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc);
1952      ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, TRUE, &handle);
1953   } else {
1954      D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0);
1955      D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc);
1956      ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, FALSE, NULL);
1957   }
1958}
1959
1960static void
1961dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf,
1962                                 const struct dzn_image *src,
1963                                 const struct dzn_image *dst,
1964                                 VkImageAspectFlagBits aspect,
1965                                 VkFilter filter, bool resolve)
1966{
1967   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
1968   enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format);
1969   VkImageUsageFlags usage =
1970      vk_format_is_depth_or_stencil(dst->vk.format) ?
1971      VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT :
1972      VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
1973   struct dzn_meta_blit_key ctx_key = {
1974      .out_format = dzn_image_get_dxgi_format(dst->vk.format, usage, aspect),
1975      .samples = (uint32_t)src->vk.samples,
1976      .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ?
1977                        FRAG_RESULT_DEPTH :
1978                        aspect == VK_IMAGE_ASPECT_STENCIL_BIT ?
1979                        FRAG_RESULT_STENCIL :
1980                        FRAG_RESULT_DATA0),
1981      .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT :
1982                             util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT :
1983                             aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT :
1984                             GLSL_TYPE_FLOAT),
1985      .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D :
1986                                src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D :
1987                                src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS :
1988                                GLSL_SAMPLER_DIM_3D),
1989      .src_is_array = src->vk.array_layers > 1,
1990      .resolve = resolve,
1991      .linear_filter = filter == VK_FILTER_LINEAR,
1992      .padding = 0,
1993   };
1994
1995   const struct dzn_meta_blit *ctx =
1996      dzn_meta_blits_get_context(device, &ctx_key);
1997   assert(ctx);
1998
1999   ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig);
2000   ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state);
2001}
2002
2003static void
2004dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf,
2005                                  const struct dzn_image *src,
2006                                  const VkImageSubresourceLayers *src_subres,
2007                                  const VkOffset3D *src_offsets,
2008                                  const struct dzn_image *dst,
2009                                  const VkImageSubresourceLayers *dst_subres,
2010                                  const VkOffset3D *dst_offsets,
2011                                  bool normalize_src_coords)
2012{
2013   uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel);
2014   uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel);
2015   uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel);
2016   uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel);
2017
2018   float dst_pos[4] = {
2019      (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f),
2020      (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f),
2021   };
2022
2023   float src_pos[4] = {
2024      (float)src_offsets[0].x, (float)src_offsets[0].y,
2025      (float)src_offsets[1].x, (float)src_offsets[1].y,
2026   };
2027
2028   if (normalize_src_coords) {
2029      src_pos[0] /= src_w;
2030      src_pos[1] /= src_h;
2031      src_pos[2] /= src_w;
2032      src_pos[3] /= src_h;
2033   }
2034
2035   float coords[] = {
2036      dst_pos[0], dst_pos[1], src_pos[0], src_pos[1],
2037      dst_pos[2], dst_pos[1], src_pos[2], src_pos[1],
2038      dst_pos[0], dst_pos[3], src_pos[0], src_pos[3],
2039      dst_pos[2], dst_pos[3], src_pos[2], src_pos[3],
2040   };
2041
2042   ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0);
2043
2044   D3D12_VIEWPORT vp = {
2045      .TopLeftX = 0,
2046      .TopLeftY = 0,
2047      .Width = (float)dst_w,
2048      .Height = (float)dst_h,
2049      .MinDepth = 0,
2050      .MaxDepth = 1,
2051   };
2052   ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp);
2053
2054   D3D12_RECT scissor = {
2055      .left = MIN2(dst_offsets[0].x, dst_offsets[1].x),
2056      .top = MIN2(dst_offsets[0].y, dst_offsets[1].y),
2057      .right = MAX2(dst_offsets[0].x, dst_offsets[1].x),
2058      .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y),
2059   };
2060   ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor);
2061}
2062
2063static void
2064dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf,
2065                                   struct dzn_image *src, VkImageLayout src_layout,
2066                                   const VkImageSubresourceLayers *src_subres,
2067                                   struct dzn_image *dst, VkImageLayout dst_layout,
2068                                   const VkImageSubresourceLayers *dst_subres,
2069                                   VkImageAspectFlagBits aspect,
2070                                   bool post)
2071{
2072   VkImageSubresourceRange src_range = {
2073      .aspectMask = src_subres->aspectMask,
2074      .baseMipLevel = src_subres->mipLevel,
2075      .levelCount = 1,
2076      .baseArrayLayer = src_subres->baseArrayLayer,
2077      .layerCount = src_subres->layerCount,
2078   };
2079   VkImageSubresourceRange dst_range = {
2080      .aspectMask = dst_subres->aspectMask,
2081      .baseMipLevel = dst_subres->mipLevel,
2082      .levelCount = 1,
2083      .baseArrayLayer = dst_subres->baseArrayLayer,
2084      .layerCount = dst_subres->layerCount,
2085   };
2086
2087   if (!post) {
2088      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2089                                                         src_layout,
2090                                                         VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2091                                                         DZN_QUEUE_TRANSITION_FLUSH);
2092      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2093                                                         dst_layout,
2094                                                         VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2095                                                         DZN_QUEUE_TRANSITION_FLUSH);
2096   } else {
2097      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range,
2098                                                         VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2099                                                         src_layout,
2100                                                         DZN_QUEUE_TRANSITION_FLUSH);
2101      dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range,
2102                                                         VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
2103                                                         dst_layout,
2104                                                         DZN_QUEUE_TRANSITION_FLUSH);
2105   }
2106}
2107
2108static void
2109dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf,
2110                           const VkBlitImageInfo2 *info,
2111                           struct dzn_descriptor_heap *heap,
2112                           uint32_t *heap_slot,
2113                           uint32_t r)
2114{
2115   VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2116   VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2117
2118   const VkImageBlit2 *region = &info->pRegions[r];
2119   bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D;
2120   bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D;
2121
2122   dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2123      dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, false);
2124      dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2125                                         src, info->srcImageLayout, &region->srcSubresource,
2126                                         dst, info->dstImageLayout, &region->dstSubresource,
2127                                         aspect, false);
2128      dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage,
2129                                           aspect, &region->srcSubresource,
2130                                           heap, (*heap_slot)++);
2131      dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
2132                                        src, &region->srcSubresource, region->srcOffsets,
2133                                        dst, &region->dstSubresource, region->dstOffsets,
2134                                        src->vk.samples == 1);
2135
2136      uint32_t dst_depth =
2137         region->dstOffsets[1].z > region->dstOffsets[0].z ?
2138         region->dstOffsets[1].z - region->dstOffsets[0].z :
2139         region->dstOffsets[0].z - region->dstOffsets[1].z;
2140      uint32_t src_depth =
2141         region->srcOffsets[1].z > region->srcOffsets[0].z ?
2142         region->srcOffsets[1].z - region->srcOffsets[0].z :
2143         region->srcOffsets[0].z - region->srcOffsets[1].z;
2144
2145      uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
2146      uint32_t dst_level = region->dstSubresource.mipLevel;
2147
2148      float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1;
2149      if (region->srcOffsets[0].z > region->srcOffsets[1].z)
2150         src_slice_step = -src_slice_step;
2151      float src_z_coord =
2152         src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0;
2153      uint32_t slice_count = dst_is_3d ? dst_depth : layer_count;
2154      uint32_t dst_z_coord =
2155         dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer;
2156      if (region->dstOffsets[0].z > region->dstOffsets[1].z)
2157         dst_z_coord--;
2158
2159      uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ?
2160                                1 : -1;
2161
2162      /* Normalize the src coordinates/step */
2163      if (src_is_3d) {
2164         src_z_coord /= src->vk.extent.depth;
2165         src_slice_step /= src->vk.extent.depth;
2166      }
2167
2168      for (uint32_t slice = 0; slice < slice_count; slice++) {
2169         dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord);
2170         ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
2171         ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
2172         src_z_coord += src_slice_step;
2173         dst_z_coord += dst_slice_step;
2174      }
2175
2176      dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2177                                         src, info->srcImageLayout, &region->srcSubresource,
2178                                         dst, info->dstImageLayout, &region->dstSubresource,
2179                                         aspect, true);
2180   }
2181}
2182
2183static void
2184dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf,
2185                              const VkResolveImageInfo2 *info,
2186                              struct dzn_descriptor_heap *heap,
2187                              uint32_t *heap_slot,
2188                              uint32_t r)
2189{
2190   VK_FROM_HANDLE(dzn_image, src, info->srcImage);
2191   VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
2192
2193   const VkImageResolve2 *region = &info->pRegions[r];
2194
2195   dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
2196      dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, true);
2197      dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2198                                         src, info->srcImageLayout, &region->srcSubresource,
2199                                         dst, info->dstImageLayout, &region->dstSubresource,
2200                                         aspect, false);
2201      dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect,
2202                                           &region->srcSubresource,
2203                                           heap, (*heap_slot)++);
2204
2205      VkOffset3D src_offset[2] = {
2206         {
2207            .x = region->srcOffset.x,
2208            .y = region->srcOffset.y,
2209         },
2210         {
2211            .x = (int32_t)(region->srcOffset.x + region->extent.width),
2212            .y = (int32_t)(region->srcOffset.y + region->extent.height),
2213         },
2214      };
2215      VkOffset3D dst_offset[2] = {
2216         {
2217            .x = region->dstOffset.x,
2218            .y = region->dstOffset.y,
2219         },
2220         {
2221            .x = (int32_t)(region->dstOffset.x + region->extent.width),
2222            .y = (int32_t)(region->dstOffset.y + region->extent.height),
2223         },
2224      };
2225
2226      dzn_cmd_buffer_blit_set_2d_region(cmdbuf,
2227                                        src, &region->srcSubresource, src_offset,
2228                                        dst, &region->dstSubresource, dst_offset,
2229                                        false);
2230
2231      uint32_t layer_count = dzn_get_layer_count(src, &region->srcSubresource);
2232      for (uint32_t layer = 0; layer < layer_count; layer++) {
2233         float src_z_coord = layer;
2234
2235         dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf,
2236                                              dst, aspect, region->dstSubresource.mipLevel,
2237                                              region->dstSubresource.baseArrayLayer + layer);
2238         ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16);
2239         ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0);
2240      }
2241
2242      dzn_cmd_buffer_blit_issue_barriers(cmdbuf,
2243                                         src, info->srcImageLayout, &region->srcSubresource,
2244                                         dst, info->dstImageLayout, &region->dstSubresource,
2245                                         aspect, true);
2246   }
2247}
2248
2249static void
2250dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2251{
2252   const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
2253
2254   if (!pipeline)
2255      return;
2256
2257   ID3D12PipelineState *old_pipeline_state =
2258      cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL;
2259
2260   if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) {
2261      if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2262         struct dzn_graphics_pipeline *gfx =
2263            (struct dzn_graphics_pipeline *)pipeline;
2264         ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
2265         ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology);
2266         dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant);
2267      } else {
2268         ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig);
2269      }
2270   }
2271
2272   ID3D12PipelineState *new_pipeline_state = pipeline->state;
2273
2274   if (old_pipeline_state != new_pipeline_state) {
2275      ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state);
2276      cmdbuf->state.pipeline = pipeline;
2277   }
2278}
2279
2280static void
2281dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2282{
2283   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2284   struct dzn_descriptor_state *desc_state =
2285      &cmdbuf->state.bindpoint[bindpoint].desc_state;
2286   struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = {
2287      desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV],
2288      desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]
2289   };
2290   uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 };
2291   bool update_root_desc_table[NUM_POOL_TYPES] = { 0 };
2292   const struct dzn_pipeline *pipeline =
2293      cmdbuf->state.bindpoint[bindpoint].pipeline;
2294
2295   if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_HEAPS))
2296      goto set_heaps;
2297
2298   dzn_foreach_pool_type (type) {
2299      uint32_t desc_count = pipeline->desc_count[type];
2300      if (!desc_count)
2301         continue;
2302
2303      struct dzn_descriptor_heap_pool *pool =
2304         type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ?
2305         &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool;
2306      struct dzn_descriptor_heap *dst_heap = NULL;
2307      uint32_t dst_heap_offset = 0;
2308
2309      dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count,
2310                                           &dst_heap, &dst_heap_offset);
2311      new_heap_offsets[type] = dst_heap_offset;
2312      update_root_desc_table[type] = true;
2313
2314      for (uint32_t s = 0; s < MAX_SETS; s++) {
2315         const struct dzn_descriptor_set *set = desc_state->sets[s].set;
2316         if (!set) continue;
2317
2318         uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type];
2319         uint32_t set_desc_count = pipeline->sets[s].range_desc_count[type];
2320         if (set_desc_count) {
2321            mtx_lock(&set->pool->defragment_lock);
2322            dzn_descriptor_heap_copy(dst_heap, dst_heap_offset + set_heap_offset,
2323                                     &set->pool->heaps[type], set->heap_offsets[type],
2324                                     set_desc_count);
2325            mtx_unlock(&set->pool->defragment_lock);
2326         }
2327
2328         if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) {
2329            uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count;
2330            for (uint32_t o = 0; o < dynamic_buffer_count; o++) {
2331               uint32_t desc_heap_offset =
2332                  pipeline->sets[s].dynamic_buffer_heap_offsets[o].srv;
2333               struct dzn_buffer_desc bdesc = set->dynamic_buffers[o];
2334               bdesc.offset += desc_state->sets[s].dynamic_offsets[o];
2335
2336               dzn_descriptor_heap_write_buffer_desc(dst_heap,
2337                                                     dst_heap_offset + set_heap_offset + desc_heap_offset,
2338                                                     false, &bdesc);
2339
2340               if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav != ~0) {
2341                  desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav;
2342                  dzn_descriptor_heap_write_buffer_desc(dst_heap,
2343                                                        dst_heap_offset + set_heap_offset + desc_heap_offset,
2344                                                        true, &bdesc);
2345               }
2346            }
2347         }
2348      }
2349
2350      new_heaps[type] = dst_heap;
2351   }
2352
2353set_heaps:
2354   if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] ||
2355       new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) {
2356      ID3D12DescriptorHeap *desc_heaps[2];
2357      uint32_t num_desc_heaps = 0;
2358      if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV])
2359         desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap;
2360      if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])
2361         desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap;
2362      ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps);
2363
2364      for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++)
2365         cmdbuf->state.heaps[h] = new_heaps[h];
2366   }
2367
2368   for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) {
2369      D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r];
2370
2371      if (!update_root_desc_table[type])
2372         continue;
2373
2374      D3D12_GPU_DESCRIPTOR_HANDLE handle =
2375         dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]);
2376
2377      if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
2378         ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle);
2379      else
2380         ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle);
2381   }
2382}
2383
2384static void
2385dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2386{
2387   if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS))
2388      return;
2389
2390   const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline;
2391   uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx;
2392
2393   if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2394      ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
2395                                                     sizeof(cmdbuf->state.sysvals.gfx) / 4,
2396                                                     &cmdbuf->state.sysvals.gfx, 0);
2397   } else {
2398      ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx,
2399                                                    sizeof(cmdbuf->state.sysvals.compute) / 4,
2400                                                    &cmdbuf->state.sysvals.compute, 0);
2401   }
2402}
2403
2404static void
2405dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf)
2406{
2407   const struct dzn_graphics_pipeline *pipeline =
2408      (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
2409
2410   if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) ||
2411       !pipeline->vp.count)
2412      return;
2413
2414   ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports);
2415}
2416
2417static void
2418dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf)
2419{
2420   const struct dzn_graphics_pipeline *pipeline =
2421      (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
2422
2423   if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS))
2424      return;
2425
2426   if (!pipeline->scissor.count) {
2427      /* Apply a scissor delimiting the render area. */
2428      ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area);
2429      return;
2430   }
2431
2432   D3D12_RECT scissors[MAX_SCISSOR];
2433
2434   memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count);
2435   for (uint32_t i = 0; i < pipeline->scissor.count; i++) {
2436      scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left);
2437      scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top);
2438      scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right);
2439      scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom);
2440   }
2441
2442   ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors);
2443}
2444
2445static void
2446dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf)
2447{
2448   unsigned start, end;
2449
2450   BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS)
2451      ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, cmdbuf->state.vb.views);
2452
2453   BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS);
2454}
2455
2456static void
2457dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf)
2458{
2459   if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB))
2460      return;
2461
2462   ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view);
2463}
2464
2465static void
2466dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint)
2467{
2468   struct dzn_cmd_buffer_push_constant_state *state =
2469      bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ?
2470      &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute;
2471
2472   uint32_t offset = state->offset / 4;
2473   uint32_t end = ALIGN(state->end, 4) / 4;
2474   uint32_t count = end - offset;
2475
2476   if (!count)
2477      return;
2478
2479   uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx;
2480   uint32_t *vals = state->values + offset;
2481
2482   if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
2483      ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
2484   else
2485      ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset);
2486
2487   state->offset = 0;
2488   state->end = 0;
2489}
2490
2491static void
2492dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf)
2493{
2494   if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) {
2495      const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)
2496         cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2497      uint32_t ref =
2498         gfx->zsa.stencil_test.front.uses_ref ?
2499         cmdbuf->state.zsa.stencil_test.front.ref :
2500         cmdbuf->state.zsa.stencil_test.back.ref;
2501      ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref);
2502   }
2503}
2504
2505static void
2506dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf)
2507{
2508   if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS)
2509      ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist,
2510                                                  cmdbuf->state.blend.constants);
2511}
2512
2513static void
2514dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf)
2515{
2516   if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) {
2517      ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist,
2518                                                  cmdbuf->state.zsa.depth_bounds.min,
2519                                                  cmdbuf->state.zsa.depth_bounds.max);
2520   }
2521}
2522
2523static VkResult
2524dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count)
2525{
2526   uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4;
2527   uint32_t triangle_count = MAX2(*vertex_count, 2) - 2;
2528
2529   *vertex_count = triangle_count * 3;
2530   if (!*vertex_count)
2531      return VK_SUCCESS;
2532
2533   ID3D12Resource *index_buf;
2534   VkResult result =
2535      dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size,
2536                                        D3D12_HEAP_TYPE_UPLOAD,
2537                                        D3D12_RESOURCE_STATE_GENERIC_READ,
2538                                        &index_buf);
2539   if (result != VK_SUCCESS)
2540      return result;
2541
2542   void *cpu_ptr;
2543   ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr);
2544
2545   /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
2546   if (index_size == 2) {
2547      uint16_t *indices = (uint16_t *)cpu_ptr;
2548      for (uint32_t t = 0; t < triangle_count; t++) {
2549         indices[t * 3] = t + 1;
2550         indices[(t * 3) + 1] = t + 2;
2551         indices[(t * 3) + 2] = 0;
2552      }
2553      cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
2554   } else {
2555      uint32_t *indices = (uint32_t *)cpu_ptr;
2556      for (uint32_t t = 0; t < triangle_count; t++) {
2557         indices[t * 3] = t + 1;
2558         indices[(t * 3) + 1] = t + 2;
2559         indices[(t * 3) + 2] = 0;
2560      }
2561      cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2562   }
2563
2564   cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size;
2565   cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf);
2566   cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2567   return VK_SUCCESS;
2568}
2569
2570static VkResult
2571dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf,
2572                                          uint32_t *index_count,
2573                                          uint32_t *first_index)
2574{
2575   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2576   uint32_t triangle_count = MAX2(*index_count, 2) - 2;
2577
2578   *index_count = triangle_count * 3;
2579   if (!*index_count)
2580      return VK_SUCCESS;
2581
2582   /* New index is always 32bit to make the compute shader rewriting the
2583    * index simpler */
2584   ID3D12Resource *new_index_buf;
2585   VkResult result =
2586      dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4,
2587                                        D3D12_HEAP_TYPE_DEFAULT,
2588                                        D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2589                                        &new_index_buf);
2590   if (result != VK_SUCCESS)
2591      return result;
2592
2593   D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu =
2594      cmdbuf->state.ib.view.BufferLocation;
2595
2596   ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *)
2597      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2598   ASSERTED bool prim_restart =
2599      dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL;
2600
2601   assert(!prim_restart);
2602
2603   enum dzn_index_type index_type =
2604      dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false);
2605   const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
2606      &device->triangle_fan[index_type];
2607
2608   struct dzn_triangle_fan_rewrite_index_params params = {
2609      .first_index = *first_index,
2610   };
2611
2612   ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
2613   ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
2614   ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf));
2615   ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4,
2616                                                 &params, 0);
2617   ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu);
2618   ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1);
2619
2620   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1,
2621                                            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2622                                            D3D12_RESOURCE_STATE_INDEX_BUFFER,
2623                                            DZN_QUEUE_TRANSITION_FLUSH);
2624
2625   /* We don't mess up with the driver state when executing our internal
2626    * compute shader, but we still change the D3D12 state, so let's mark
2627    * things dirty if needed.
2628    */
2629   cmdbuf->state.pipeline = NULL;
2630   if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
2631      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
2632         DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2633   }
2634
2635   cmdbuf->state.ib.view.SizeInBytes = *index_count * 4;
2636   cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf);
2637   cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2638   cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2639   *first_index = 0;
2640   return VK_SUCCESS;
2641}
2642
2643static void
2644dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed)
2645{
2646   if (indexed)
2647      dzn_cmd_buffer_update_ibview(cmdbuf);
2648
2649   dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2650   dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2651   dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2652   dzn_cmd_buffer_update_viewports(cmdbuf);
2653   dzn_cmd_buffer_update_scissors(cmdbuf);
2654   dzn_cmd_buffer_update_vbviews(cmdbuf);
2655   dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS);
2656   dzn_cmd_buffer_update_zsa(cmdbuf);
2657   dzn_cmd_buffer_update_blend_constants(cmdbuf);
2658   dzn_cmd_buffer_update_depth_bounds(cmdbuf);
2659
2660   /* Reset the dirty states */
2661   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty = 0;
2662   cmdbuf->state.dirty = 0;
2663}
2664
2665static uint32_t
2666dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed)
2667{
2668   struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
2669      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2670
2671   if (!pipeline->ia.triangle_fan)
2672      return 0;
2673
2674   uint32_t max_triangles;
2675
2676   if (indexed) {
2677      uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2;
2678      uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size;
2679
2680      max_triangles = MAX2(max_indices, 2) - 2;
2681   } else {
2682      uint32_t max_vertex = 0;
2683      for (uint32_t i = 0; i < pipeline->vb.count; i++) {
2684         max_vertex =
2685            MAX2(max_vertex,
2686                 cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes);
2687      }
2688
2689      max_triangles = MAX2(max_vertex, 2) - 2;
2690   }
2691
2692   return max_triangles * 3;
2693}
2694
2695static void
2696dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf,
2697                             ID3D12Resource *draw_buf,
2698                             size_t draw_buf_offset,
2699                             ID3D12Resource *count_buf,
2700                             size_t count_buf_offset,
2701                             uint32_t max_draw_count,
2702                             uint32_t draw_buf_stride,
2703                             bool indexed)
2704{
2705   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
2706   struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *)
2707      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
2708   uint32_t min_draw_buf_stride =
2709      indexed ?
2710      sizeof(struct dzn_indirect_indexed_draw_params) :
2711      sizeof(struct dzn_indirect_draw_params);
2712   bool prim_restart =
2713      dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL;
2714
2715   draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride;
2716   assert(draw_buf_stride >= min_draw_buf_stride);
2717   assert((draw_buf_stride & 3) == 0);
2718
2719   uint32_t triangle_fan_index_buf_stride =
2720      dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) *
2721      sizeof(uint32_t);
2722   uint32_t exec_buf_stride =
2723      triangle_fan_index_buf_stride > 0 ?
2724      sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
2725      sizeof(struct dzn_indirect_draw_exec_params);
2726   uint32_t triangle_fan_exec_buf_stride =
2727      sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
2728   uint32_t exec_buf_size = max_draw_count * exec_buf_stride;
2729   uint32_t exec_buf_draw_offset = 0;
2730
2731   // We reserve the first slot for the draw_count value when indirect count is
2732   // involved.
2733   if (count_buf != NULL) {
2734      exec_buf_size += exec_buf_stride;
2735      exec_buf_draw_offset = exec_buf_stride;
2736   }
2737
2738   ID3D12Resource *exec_buf;
2739   VkResult result =
2740      dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size,
2741                                        D3D12_HEAP_TYPE_DEFAULT,
2742                                        D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2743                                        &exec_buf);
2744   if (result != VK_SUCCESS)
2745      return;
2746
2747   D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu =
2748      ID3D12Resource_GetGPUVirtualAddress(draw_buf) + draw_buf_offset;
2749   ID3D12Resource *triangle_fan_index_buf = NULL;
2750   ID3D12Resource *triangle_fan_exec_buf = NULL;
2751
2752   if (triangle_fan_index_buf_stride) {
2753      result =
2754         dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
2755                                           max_draw_count * triangle_fan_index_buf_stride,
2756                                           D3D12_HEAP_TYPE_DEFAULT,
2757                                           D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2758                                           &triangle_fan_index_buf);
2759      if (result != VK_SUCCESS)
2760         return;
2761
2762      result =
2763         dzn_cmd_buffer_alloc_internal_buf(cmdbuf,
2764                                           max_draw_count * triangle_fan_exec_buf_stride,
2765                                           D3D12_HEAP_TYPE_DEFAULT,
2766                                           D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2767                                           &triangle_fan_exec_buf);
2768      if (result != VK_SUCCESS)
2769         return;
2770   }
2771
2772   struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = {
2773      .draw_buf_stride = draw_buf_stride,
2774      .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride,
2775      .triangle_fan_index_buf_start =
2776         triangle_fan_index_buf ?
2777         ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0,
2778      .exec_buf_start =
2779         prim_restart ?
2780         ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0,
2781   };
2782   uint32_t params_size;
2783   if (triangle_fan_index_buf_stride > 0 && prim_restart)
2784      params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params);
2785   else if (triangle_fan_index_buf_stride > 0)
2786      params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
2787   else
2788      params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
2789
2790   enum dzn_indirect_draw_type draw_type;
2791
2792   if (indexed && triangle_fan_index_buf_stride > 0) {
2793      if (prim_restart && count_buf)
2794         draw_type =  DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
2795      else if (prim_restart && !count_buf)
2796         draw_type =  DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART;
2797      else if (!prim_restart && count_buf)
2798         draw_type = DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN;
2799      else
2800         draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN;
2801   } else if (!indexed && triangle_fan_index_buf_stride > 0) {
2802      draw_type = count_buf ?
2803                  DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN :
2804                  DZN_INDIRECT_DRAW_TRIANGLE_FAN;
2805   } else if (indexed) {
2806      draw_type = count_buf ?
2807                  DZN_INDIRECT_INDEXED_DRAW_COUNT :
2808                  DZN_INDIRECT_INDEXED_DRAW;
2809   } else {
2810      draw_type = count_buf ? DZN_INDIRECT_DRAW_COUNT : DZN_INDIRECT_DRAW;
2811   }
2812
2813   struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type];
2814   uint32_t root_param_idx = 0;
2815
2816   ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig);
2817   ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state);
2818   ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
2819                                                           params_size / 4, (const void *)&params, 0);
2820   ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++,
2821                                                               draw_buf_gpu);
2822   ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
2823                                                                ID3D12Resource_GetGPUVirtualAddress(exec_buf));
2824   if (count_buf) {
2825      ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
2826                                                                  root_param_idx++,
2827                                                                  ID3D12Resource_GetGPUVirtualAddress(count_buf) +
2828                                                                  count_buf_offset);
2829   }
2830
2831   if (triangle_fan_exec_buf) {
2832      ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist,
2833                                                                   root_param_idx++,
2834                                                                   ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf));
2835   }
2836
2837   ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1);
2838
2839   D3D12_INDEX_BUFFER_VIEW ib_view = { 0 };
2840
2841   if (triangle_fan_exec_buf) {
2842      enum dzn_index_type index_type =
2843         indexed ?
2844         dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, prim_restart) :
2845         DZN_NO_INDEX;
2846      struct dzn_meta_triangle_fan_rewrite_index *rewrite_index =
2847         &device->triangle_fan[index_type];
2848
2849      struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 };
2850
2851      assert(rewrite_index->root_sig);
2852      assert(rewrite_index->pipeline_state);
2853      assert(rewrite_index->cmd_sig);
2854
2855      dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1,
2856                                               D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2857                                               D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
2858                                               DZN_QUEUE_TRANSITION_FLUSH);
2859
2860      ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig);
2861      ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state);
2862      root_param_idx = 0;
2863      ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++,
2864                                                                   ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf));
2865      ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++,
2866                                                              sizeof(rewrite_index_params) / 4,
2867                                                              (const void *)&rewrite_index_params, 0);
2868
2869      if (indexed) {
2870         ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist,
2871                                                                     root_param_idx++,
2872                                                                     cmdbuf->state.ib.view.BufferLocation);
2873      }
2874
2875      ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig,
2876                                                 max_draw_count, triangle_fan_exec_buf, 0,
2877                                                 count_buf ? exec_buf : NULL, 0);
2878
2879      dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1,
2880                                               D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2881                                               D3D12_RESOURCE_STATE_INDEX_BUFFER,
2882                                               DZN_QUEUE_TRANSITION_FLUSH);
2883
2884      /* After our triangle-fan lowering the draw is indexed */
2885      indexed = true;
2886      ib_view = cmdbuf->state.ib.view;
2887      cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf);
2888      cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride;
2889      cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
2890      cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2891   }
2892
2893   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
2894                                            D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
2895                                            D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
2896                                            DZN_QUEUE_TRANSITION_FLUSH);
2897
2898   /* We don't mess up with the driver state when executing our internal
2899    * compute shader, but we still change the D3D12 state, so let's mark
2900    * things dirty if needed.
2901    */
2902   cmdbuf->state.pipeline = NULL;
2903   if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) {
2904      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
2905         DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
2906   }
2907
2908   cmdbuf->state.sysvals.gfx.first_vertex = 0;
2909   cmdbuf->state.sysvals.gfx.base_instance = 0;
2910   cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed;
2911   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
2912      DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
2913
2914   dzn_cmd_buffer_prepare_draw(cmdbuf, indexed);
2915
2916   /* Restore the old IB view if we modified it during the triangle fan lowering */
2917   if (ib_view.SizeInBytes) {
2918      cmdbuf->state.ib.view = ib_view;
2919      cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
2920   }
2921
2922   enum dzn_indirect_draw_cmd_sig_type cmd_sig_type =
2923      triangle_fan_index_buf_stride > 0 ?
2924      DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG :
2925      indexed ?
2926      DZN_INDIRECT_INDEXED_DRAW_CMD_SIG :
2927      DZN_INDIRECT_DRAW_CMD_SIG;
2928   ID3D12CommandSignature *cmdsig =
2929      dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type);
2930
2931   if (!cmdsig) {
2932      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2933      return;
2934   }
2935
2936   ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig,
2937                                              max_draw_count,
2938                                              exec_buf, exec_buf_draw_offset,
2939                                              count_buf ? exec_buf : NULL, 0);
2940}
2941
2942static void
2943dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf)
2944{
2945   dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2946   dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2947   dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2948   dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE);
2949
2950   /* Reset the dirty states */
2951   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty = 0;
2952}
2953
2954VKAPI_ATTR void VKAPI_CALL
2955dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
2956                   const VkCopyBufferInfo2 *info)
2957{
2958   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2959   VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer);
2960   VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer);
2961
2962   for (int i = 0; i < info->regionCount; i++) {
2963      const VkBufferCopy2 *region = info->pRegions + i;
2964
2965      ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset,
2966                                        src_buffer->res, region->srcOffset,
2967                                        region->size);
2968   }
2969}
2970
2971VKAPI_ATTR void VKAPI_CALL
2972dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
2973                          const VkCopyBufferToImageInfo2 *info)
2974{
2975   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2976
2977   for (int i = 0; i < info->regionCount; i++) {
2978      const VkBufferImageCopy2 *region = info->pRegions + i;
2979
2980      dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
2981         for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
2982            dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l);
2983      }
2984   }
2985}
2986
2987VKAPI_ATTR void VKAPI_CALL
2988dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
2989                          const VkCopyImageToBufferInfo2 *info)
2990{
2991   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
2992
2993   for (int i = 0; i < info->regionCount; i++) {
2994      const VkBufferImageCopy2 *region = info->pRegions + i;
2995
2996      dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) {
2997         for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++)
2998            dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l);
2999      }
3000   }
3001}
3002
3003VKAPI_ATTR void VKAPI_CALL
3004dzn_CmdCopyImage2(VkCommandBuffer commandBuffer,
3005                  const VkCopyImageInfo2 *info)
3006{
3007   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3008   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3009   VK_FROM_HANDLE(dzn_image, src, info->srcImage);
3010   VK_FROM_HANDLE(dzn_image, dst, info->dstImage);
3011
3012   assert(src->vk.samples == dst->vk.samples);
3013
3014   bool requires_temp_res = false;
3015
3016   for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) {
3017      const VkImageCopy2 *region = &info->pRegions[i];
3018
3019      dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3020         assert(aspect & region->dstSubresource.aspectMask);
3021
3022         if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format,
3023                                               VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) &&
3024             src->vk.tiling != VK_IMAGE_TILING_LINEAR &&
3025             dst->vk.tiling != VK_IMAGE_TILING_LINEAR) {
3026            requires_temp_res = true;
3027            break;
3028         }
3029      }
3030   }
3031
3032   bool use_blit = false;
3033   if (src->vk.samples > 1) {
3034      use_blit = requires_temp_res;
3035
3036      for (int i = 0; i < info->regionCount; i++) {
3037         const VkImageCopy2 *region = info->pRegions + i;
3038         if (region->srcOffset.x != 0 || region->srcOffset.y != 0 ||
3039             region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) ||
3040             region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) ||
3041             region->dstOffset.x != 0 || region->dstOffset.y != 0 ||
3042             region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) ||
3043             region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel))
3044            use_blit = true;
3045      }
3046   }
3047
3048   if (use_blit) {
3049      /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is
3050       * is issued on a transfer queue, but we don't have any better option
3051       * right now...
3052       */
3053      STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount);
3054
3055      VkBlitImageInfo2 blit_info = {
3056         .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2,
3057         .srcImage = info->srcImage,
3058         .srcImageLayout = info->srcImageLayout,
3059         .dstImage = info->dstImage,
3060         .dstImageLayout = info->dstImageLayout,
3061         .regionCount = info->regionCount,
3062         .pRegions = blit_regions,
3063         .filter = VK_FILTER_NEAREST,
3064      };
3065
3066      for (uint32_t r = 0; r < info->regionCount; r++) {
3067         blit_regions[r] = (VkImageBlit2) {
3068            .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
3069            .srcSubresource = info->pRegions[r].srcSubresource,
3070            .srcOffsets = {
3071                info->pRegions[r].srcOffset,
3072                info->pRegions[r].srcOffset,
3073            },
3074            .dstSubresource = info->pRegions[r].dstSubresource,
3075            .dstOffsets = {
3076                info->pRegions[r].dstOffset,
3077                info->pRegions[r].dstOffset,
3078            },
3079         };
3080
3081         blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width;
3082         blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height;
3083         blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth;
3084         blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width;
3085         blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height;
3086         blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth;
3087      }
3088
3089      dzn_CmdBlitImage2(commandBuffer, &blit_info);
3090
3091      STACK_ARRAY_FINISH(blit_regions);
3092      return;
3093   }
3094
3095   D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 };
3096   D3D12_RESOURCE_DESC tmp_desc = {
3097      .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
3098      .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
3099      .DepthOrArraySize = 1,
3100      .MipLevels = 1,
3101      .Format = src->desc.Format,
3102      .SampleDesc = { .Count = 1, .Quality = 0 },
3103      .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
3104      .Flags = D3D12_RESOURCE_FLAG_NONE,
3105   };
3106
3107   if (requires_temp_res) {
3108      ID3D12Device2 *dev = device->dev;
3109      VkImageAspectFlags aspect = 0;
3110      uint64_t max_size = 0;
3111
3112      if (vk_format_has_depth(src->vk.format))
3113         aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
3114      else if (vk_format_has_stencil(src->vk.format))
3115         aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
3116      else
3117         aspect = VK_IMAGE_ASPECT_COLOR_BIT;
3118
3119      for (uint32_t i = 0; i < info->regionCount; i++) {
3120         const VkImageCopy2 *region = &info->pRegions[i];
3121         uint64_t region_size = 0;
3122
3123         tmp_desc.Format =
3124            dzn_image_get_dxgi_format(src->vk.format,
3125                                      VK_IMAGE_USAGE_TRANSFER_DST_BIT,
3126                                      aspect);
3127         tmp_desc.Width = region->extent.width;
3128         tmp_desc.Height = region->extent.height;
3129
3130         ID3D12Device1_GetCopyableFootprints(dev, &src->desc,
3131                                             0, 1, 0,
3132                                             NULL, NULL, NULL,
3133                                             &region_size);
3134         max_size = MAX2(max_size, region_size * region->extent.depth);
3135      }
3136
3137      VkResult result =
3138         dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size,
3139                                           D3D12_HEAP_TYPE_DEFAULT,
3140                                           D3D12_RESOURCE_STATE_COPY_DEST,
3141                                           &tmp_loc.pResource);
3142      if (result != VK_SUCCESS)
3143         return;
3144
3145      tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
3146   }
3147
3148   for (int i = 0; i < info->regionCount; i++) {
3149      const VkImageCopy2 *region = &info->pRegions[i];
3150
3151      dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) {
3152         for (uint32_t l = 0; l < region->srcSubresource.layerCount; l++)
3153            dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l);
3154      }
3155   }
3156}
3157
3158VKAPI_ATTR void VKAPI_CALL
3159dzn_CmdBlitImage2(VkCommandBuffer commandBuffer,
3160                  const VkBlitImageInfo2 *info)
3161{
3162   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3163   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3164
3165   if (info->regionCount == 0)
3166      return;
3167
3168   uint32_t desc_count = 0;
3169   for (uint32_t r = 0; r < info->regionCount; r++)
3170      desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
3171
3172   struct dzn_descriptor_heap *heap;
3173   uint32_t heap_slot;
3174   VkResult result =
3175      dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
3176                                           desc_count, &heap, &heap_slot);
3177
3178   if (result != VK_SUCCESS) {
3179      cmdbuf->error = result;
3180      return;
3181   }
3182
3183   if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) {
3184      ID3D12DescriptorHeap * const heaps[] = { heap->heap };
3185      cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap;
3186      ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps);
3187   }
3188
3189   ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
3190
3191   for (uint32_t r = 0; r < info->regionCount; r++)
3192      dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, r);
3193
3194   cmdbuf->state.pipeline = NULL;
3195   cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
3196   if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
3197      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3198         DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3199   }
3200}
3201
3202VKAPI_ATTR void VKAPI_CALL
3203dzn_CmdResolveImage2(VkCommandBuffer commandBuffer,
3204                     const VkResolveImageInfo2 *info)
3205{
3206   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3207   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
3208
3209   if (info->regionCount == 0)
3210      return;
3211
3212   uint32_t desc_count = 0;
3213   for (uint32_t r = 0; r < info->regionCount; r++)
3214      desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask);
3215
3216   struct dzn_descriptor_heap *heap;
3217   uint32_t heap_slot;
3218   VkResult result =
3219      dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device,
3220                                           desc_count, &heap, &heap_slot);
3221   if (result != VK_SUCCESS) {
3222      cmdbuf->error = result;
3223      return;
3224   }
3225
3226   if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) {
3227      ID3D12DescriptorHeap * const heaps[] = { heap->heap };
3228      cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap;
3229      ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, ARRAY_SIZE(heaps), heaps);
3230   }
3231
3232   ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
3233
3234   uint32_t heap_offset = 0;
3235   for (uint32_t r = 0; r < info->regionCount; r++)
3236      dzn_cmd_buffer_resolve_region(cmdbuf, info, heap, &heap_offset, r);
3237
3238   cmdbuf->state.pipeline = NULL;
3239   cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS;
3240   if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) {
3241      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3242         DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3243   }
3244}
3245
3246VKAPI_ATTR void VKAPI_CALL
3247dzn_CmdClearColorImage(VkCommandBuffer commandBuffer,
3248                       VkImage image,
3249                       VkImageLayout imageLayout,
3250                       const VkClearColorValue *pColor,
3251                       uint32_t rangeCount,
3252                       const VkImageSubresourceRange *pRanges)
3253{
3254   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3255   VK_FROM_HANDLE(dzn_image, img, image);
3256
3257   dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges);
3258}
3259
3260VKAPI_ATTR void VKAPI_CALL
3261dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
3262                              VkImage image,
3263                              VkImageLayout imageLayout,
3264                              const VkClearDepthStencilValue *pDepthStencil,
3265                              uint32_t rangeCount,
3266                              const VkImageSubresourceRange *pRanges)
3267{
3268   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3269   VK_FROM_HANDLE(dzn_image, img, image);
3270
3271   dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges);
3272}
3273
3274VKAPI_ATTR void VKAPI_CALL
3275dzn_CmdDispatch(VkCommandBuffer commandBuffer,
3276                uint32_t groupCountX,
3277                uint32_t groupCountY,
3278                uint32_t groupCountZ)
3279{
3280   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3281
3282   cmdbuf->state.sysvals.compute.group_count_x = groupCountX;
3283   cmdbuf->state.sysvals.compute.group_count_y = groupCountY;
3284   cmdbuf->state.sysvals.compute.group_count_z = groupCountZ;
3285   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
3286      DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3287
3288   dzn_cmd_buffer_prepare_dispatch(cmdbuf);
3289   ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ);
3290}
3291
3292VKAPI_ATTR void VKAPI_CALL
3293dzn_CmdFillBuffer(VkCommandBuffer commandBuffer,
3294                  VkBuffer dstBuffer,
3295                  VkDeviceSize dstOffset,
3296                  VkDeviceSize size,
3297                  uint32_t data)
3298{
3299   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3300   VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
3301
3302   if (size == VK_WHOLE_SIZE)
3303      size = buf->size - dstOffset;
3304
3305   size &= ~3ULL;
3306
3307   ID3D12Resource *src_res;
3308   VkResult result =
3309      dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
3310                                        D3D12_HEAP_TYPE_UPLOAD,
3311                                        D3D12_RESOURCE_STATE_GENERIC_READ,
3312                                        &src_res);
3313   if (result != VK_SUCCESS)
3314      return;
3315
3316   uint32_t *cpu_ptr;
3317   ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr);
3318   for (uint32_t i = 0; i < size / 4; i++)
3319      cpu_ptr[i] = data;
3320
3321   ID3D12Resource_Unmap(src_res, 0, NULL);
3322
3323   ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size);
3324}
3325
3326VKAPI_ATTR void VKAPI_CALL
3327dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
3328                    VkBuffer dstBuffer,
3329                    VkDeviceSize dstOffset,
3330                    VkDeviceSize size,
3331                    const void *data)
3332{
3333   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3334   VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
3335
3336   if (size == VK_WHOLE_SIZE)
3337      size = buf->size - dstOffset;
3338
3339   /*
3340    * The spec says:
3341    *   4, or VK_WHOLE_SIZE to fill the range from offset to the end of the
3342    *   buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer
3343    *   is not a multiple of 4, then the nearest smaller multiple is used."
3344    */
3345   size &= ~3ULL;
3346
3347   ID3D12Resource *src_res;
3348   VkResult result =
3349      dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size,
3350                                        D3D12_HEAP_TYPE_UPLOAD,
3351                                        D3D12_RESOURCE_STATE_GENERIC_READ,
3352                                        &src_res);
3353   if (result != VK_SUCCESS)
3354      return;
3355
3356   void *cpu_ptr;
3357   ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr);
3358   memcpy(cpu_ptr, data, size),
3359   ID3D12Resource_Unmap(src_res, 0, NULL);
3360
3361   ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, 0, size);
3362}
3363
3364VKAPI_ATTR void VKAPI_CALL
3365dzn_CmdClearAttachments(VkCommandBuffer commandBuffer,
3366                        uint32_t attachmentCount,
3367                        const VkClearAttachment *pAttachments,
3368                        uint32_t rectCount,
3369                        const VkClearRect *pRects)
3370{
3371   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3372
3373   for (unsigned i = 0; i < attachmentCount; i++) {
3374      VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
3375      struct dzn_image_view *view = NULL;
3376
3377      if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
3378         assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count);
3379         view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview;
3380         layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout;
3381      } else {
3382         if (cmdbuf->state.render.attachments.depth.iview &&
3383             (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) {
3384            view = cmdbuf->state.render.attachments.depth.iview;
3385            layout = cmdbuf->state.render.attachments.depth.layout;
3386         }
3387
3388         if (cmdbuf->state.render.attachments.stencil.iview &&
3389             (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) {
3390            assert(!view || view == cmdbuf->state.render.attachments.depth.iview);
3391            view = cmdbuf->state.render.attachments.stencil.iview;
3392            layout = cmdbuf->state.render.attachments.stencil.layout;
3393         }
3394      }
3395
3396      if (!view)
3397         continue;
3398
3399      for (uint32_t j = 0; j < rectCount; j++) {
3400         D3D12_RECT rect;
3401
3402         dzn_translate_rect(&rect, &pRects[j].rect);
3403         dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout,
3404                                         &pAttachments[i].clearValue,
3405                                         pAttachments[i].aspectMask,
3406                                         pRects[j].baseArrayLayer,
3407                                         pRects[j].layerCount,
3408                                         1, &rect);
3409      }
3410   }
3411}
3412
3413static void
3414dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf,
3415                                            const struct dzn_rendering_attachment *att,
3416                                            VkImageAspectFlagBits aspect)
3417{
3418   struct dzn_image_view *src = att->iview;
3419   struct dzn_image_view *dst = att->resolve.iview;
3420
3421   if (!src || !dst)
3422      return;
3423
3424   VkImageLayout src_layout = att->layout;
3425   VkImageLayout dst_layout = att->resolve.layout;
3426   struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk);
3427   D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect);
3428   struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk);
3429   D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect);
3430
3431   VkImageSubresourceRange src_range = {
3432      .aspectMask = (VkImageAspectFlags)aspect,
3433      .baseMipLevel = src->vk.base_mip_level,
3434      .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
3435      .baseArrayLayer = src->vk.base_array_layer,
3436      .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
3437   };
3438
3439   VkImageSubresourceRange dst_range = {
3440      .aspectMask = (VkImageAspectFlags)aspect,
3441      .baseMipLevel = dst->vk.base_mip_level,
3442      .levelCount = MIN2(src->vk.level_count, dst->vk.level_count),
3443      .baseArrayLayer = dst->vk.base_array_layer,
3444      .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count),
3445   };
3446
3447   dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
3448                                                     src_state,
3449                                                     D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
3450                                                     DZN_QUEUE_TRANSITION_FLUSH);
3451   dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
3452                                                     dst_state,
3453                                                     D3D12_RESOURCE_STATE_RESOLVE_DEST,
3454                                                     DZN_QUEUE_TRANSITION_FLUSH);
3455
3456   for (uint32_t level = 0; level < src_range.levelCount; level++) {
3457      for (uint32_t layer = 0; layer < src_range.layerCount; layer++) {
3458         uint32_t src_subres =
3459            dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer);
3460         uint32_t dst_subres =
3461            dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer);
3462
3463         ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist,
3464                                                       dst_img->res, dst_subres,
3465                                                       src_img->res, src_subres,
3466                                                       dst->srv_desc.Format);
3467      }
3468   }
3469
3470   dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range,
3471                                                     D3D12_RESOURCE_STATE_RESOLVE_SOURCE,
3472                                                     src_state,
3473                                                     DZN_QUEUE_TRANSITION_FLUSH);
3474   dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range,
3475                                                     D3D12_RESOURCE_STATE_RESOLVE_DEST,
3476                                                     dst_state,
3477                                                     DZN_QUEUE_TRANSITION_FLUSH);
3478}
3479
3480static void
3481dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf,
3482                                            const VkRenderingAttachmentInfo *att,
3483                                            VkImageAspectFlagBits aspect)
3484{
3485   const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout =
3486      vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA);
3487   VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3488
3489   if (!initial_layout || !iview)
3490      return;
3491
3492   struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk);
3493   const VkImageSubresourceRange range = {
3494      .aspectMask = aspect,
3495      .baseMipLevel = iview->vk.base_mip_level,
3496      .levelCount = iview->vk.level_count,
3497      .baseArrayLayer = iview->vk.base_array_layer,
3498      .layerCount = iview->vk.layer_count,
3499   };
3500
3501   dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range,
3502                                                      initial_layout->initialLayout,
3503                                                      att->imageLayout,
3504                                                      DZN_QUEUE_TRANSITION_FLUSH);
3505}
3506
3507VKAPI_ATTR void VKAPI_CALL
3508dzn_CmdBeginRendering(VkCommandBuffer commandBuffer,
3509                      const VkRenderingInfo *pRenderingInfo)
3510{
3511   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3512
3513   D3D12_RECT new_render_area = {
3514      .left = pRenderingInfo->renderArea.offset.x,
3515      .top = pRenderingInfo->renderArea.offset.y,
3516      .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width),
3517      .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height),
3518   };
3519
3520   // The render area has an impact on the scissor state.
3521   if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) {
3522      cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3523      cmdbuf->state.render.area = new_render_area;
3524   }
3525
3526   cmdbuf->state.render.flags = pRenderingInfo->flags;
3527   cmdbuf->state.render.layer_count = pRenderingInfo->layerCount;
3528   cmdbuf->state.render.view_mask = pRenderingInfo->viewMask;
3529
3530   D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 };
3531   D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 };
3532
3533   cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount;
3534   for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
3535      const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i];
3536      VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3537
3538      cmdbuf->state.render.attachments.colors[i].iview = iview;
3539      cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout;
3540      cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode;
3541      cmdbuf->state.render.attachments.colors[i].resolve.iview =
3542         dzn_image_view_from_handle(att->resolveImageView);
3543      cmdbuf->state.render.attachments.colors[i].resolve.layout =
3544         att->resolveImageLayout;
3545      cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp;
3546
3547      if (!iview) {
3548         rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf);
3549         continue;
3550      }
3551
3552      struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
3553      rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc);
3554      dzn_rendering_attachment_initial_transition(cmdbuf, att,
3555                                                  VK_IMAGE_ASPECT_COLOR_BIT);
3556   }
3557
3558   if (pRenderingInfo->pDepthAttachment) {
3559      const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
3560
3561      cmdbuf->state.render.attachments.depth.iview =
3562         dzn_image_view_from_handle(att->imageView);
3563      cmdbuf->state.render.attachments.depth.layout = att->imageLayout;
3564      cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode;
3565      cmdbuf->state.render.attachments.depth.resolve.iview =
3566         dzn_image_view_from_handle(att->resolveImageView);
3567      cmdbuf->state.render.attachments.depth.resolve.layout =
3568         att->resolveImageLayout;
3569      cmdbuf->state.render.attachments.depth.store_op = att->storeOp;
3570      dzn_rendering_attachment_initial_transition(cmdbuf, att,
3571                                                  VK_IMAGE_ASPECT_DEPTH_BIT);
3572   }
3573
3574   if (pRenderingInfo->pStencilAttachment) {
3575      const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
3576
3577      cmdbuf->state.render.attachments.stencil.iview =
3578         dzn_image_view_from_handle(att->imageView);
3579      cmdbuf->state.render.attachments.stencil.layout = att->imageLayout;
3580      cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode;
3581      cmdbuf->state.render.attachments.stencil.resolve.iview =
3582         dzn_image_view_from_handle(att->resolveImageView);
3583      cmdbuf->state.render.attachments.stencil.resolve.layout =
3584         att->resolveImageLayout;
3585      cmdbuf->state.render.attachments.stencil.store_op = att->storeOp;
3586      dzn_rendering_attachment_initial_transition(cmdbuf, att,
3587                                                  VK_IMAGE_ASPECT_STENCIL_BIT);
3588   }
3589
3590   if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
3591      struct dzn_image_view *z_iview =
3592         pRenderingInfo->pDepthAttachment ?
3593         dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) :
3594         NULL;
3595      struct dzn_image_view *s_iview =
3596         pRenderingInfo->pStencilAttachment ?
3597         dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) :
3598         NULL;
3599      struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
3600      assert(!z_iview || !s_iview || z_iview == s_iview);
3601
3602      if (iview) {
3603         struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk);
3604
3605         zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc);
3606      }
3607   }
3608
3609   ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist,
3610                                                 pRenderingInfo->colorAttachmentCount,
3611                                                 pRenderingInfo->colorAttachmentCount ? rt_handles : NULL,
3612                                                 FALSE, zs_handle.ptr ? &zs_handle : NULL);
3613
3614   for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) {
3615      const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a];
3616      VK_FROM_HANDLE(dzn_image_view, iview, att->imageView);
3617
3618      if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3619         dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout,
3620                                         &att->clearValue,
3621                                         VK_IMAGE_ASPECT_COLOR_BIT, 0,
3622                                         VK_REMAINING_ARRAY_LAYERS, 1,
3623                                         &cmdbuf->state.render.area);
3624      }
3625   }
3626
3627   if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
3628      const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment;
3629      const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment;
3630      struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL;
3631      struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL;
3632      struct dzn_image_view *iview = z_iview ? z_iview : s_iview;
3633      VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED;
3634
3635      assert(!z_iview || !s_iview || z_iview == s_iview);
3636
3637      VkImageAspectFlags aspects = 0;
3638      VkClearValue clear_val;
3639
3640      if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3641         aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
3642         clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth;
3643         layout = z_att->imageLayout;
3644      }
3645
3646      if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
3647         aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
3648         clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil;
3649         layout = s_att->imageLayout;
3650      }
3651
3652      if (aspects != 0) {
3653         dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout,
3654                                         &clear_val, aspects, 0,
3655                                         VK_REMAINING_ARRAY_LAYERS, 1,
3656                                         &cmdbuf->state.render.area);
3657      }
3658   }
3659}
3660
3661VKAPI_ATTR void VKAPI_CALL
3662dzn_CmdEndRendering(VkCommandBuffer commandBuffer)
3663{
3664   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3665
3666   for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) {
3667      dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3668                                                  &cmdbuf->state.render.attachments.colors[i],
3669                                                  VK_IMAGE_ASPECT_COLOR_BIT);
3670   }
3671
3672   dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3673                                               &cmdbuf->state.render.attachments.depth,
3674                                               VK_IMAGE_ASPECT_DEPTH_BIT);
3675   dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf,
3676                                               &cmdbuf->state.render.attachments.stencil,
3677                                               VK_IMAGE_ASPECT_STENCIL_BIT);
3678
3679   memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render));
3680}
3681
3682VKAPI_ATTR void VKAPI_CALL
3683dzn_CmdBindPipeline(VkCommandBuffer commandBuffer,
3684                    VkPipelineBindPoint pipelineBindPoint,
3685                    VkPipeline pipe)
3686{
3687   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3688   VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe);
3689
3690   cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline;
3691   cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
3692   if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3693      const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline;
3694
3695      if (!gfx->vp.dynamic) {
3696         memcpy(cmdbuf->state.viewports, gfx->vp.desc,
3697                gfx->vp.count * sizeof(cmdbuf->state.viewports[0]));
3698         cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
3699      }
3700
3701      if (!gfx->scissor.dynamic) {
3702         memcpy(cmdbuf->state.scissors, gfx->scissor.desc,
3703                gfx->scissor.count * sizeof(cmdbuf->state.scissors[0]));
3704         cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3705      }
3706
3707      if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) {
3708         cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref;
3709         cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref;
3710         cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
3711      }
3712
3713      if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) {
3714         cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min;
3715         cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max;
3716         cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
3717      }
3718
3719      if (!gfx->blend.dynamic_constants) {
3720         memcpy(cmdbuf->state.blend.constants, gfx->blend.constants,
3721                sizeof(cmdbuf->state.blend.constants));
3722         cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
3723      }
3724
3725      for (uint32_t vb = 0; vb < gfx->vb.count; vb++)
3726         cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb];
3727
3728      if (gfx->vb.count > 0)
3729         BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1);
3730   }
3731}
3732
3733VKAPI_ATTR void VKAPI_CALL
3734dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
3735                          VkPipelineBindPoint pipelineBindPoint,
3736                          VkPipelineLayout layout,
3737                          uint32_t firstSet,
3738                          uint32_t descriptorSetCount,
3739                          const VkDescriptorSet *pDescriptorSets,
3740                          uint32_t dynamicOffsetCount,
3741                          const uint32_t *pDynamicOffsets)
3742{
3743   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3744   VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout);
3745
3746   struct dzn_descriptor_state *desc_state =
3747      &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state;
3748   uint32_t dirty = 0;
3749
3750   for (uint32_t i = 0; i < descriptorSetCount; i++) {
3751      uint32_t idx = firstSet + i;
3752      VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]);
3753
3754      if (desc_state->sets[idx].set != set) {
3755         desc_state->sets[idx].set = set;
3756         dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3757      }
3758
3759      uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count;
3760      if (dynamic_buffer_count) {
3761         assert(dynamicOffsetCount >= dynamic_buffer_count);
3762
3763         for (uint32_t j = 0; j < dynamic_buffer_count; j++)
3764            desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j];
3765
3766         dynamicOffsetCount -= dynamic_buffer_count;
3767         pDynamicOffsets += dynamic_buffer_count;
3768         dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS;
3769      }
3770   }
3771
3772   cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty;
3773}
3774
3775VKAPI_ATTR void VKAPI_CALL
3776dzn_CmdSetViewport(VkCommandBuffer commandBuffer,
3777                   uint32_t firstViewport,
3778                   uint32_t viewportCount,
3779                   const VkViewport *pViewports)
3780{
3781   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3782
3783   STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT);
3784
3785   for (uint32_t i = 0; i < viewportCount; i++) {
3786      uint32_t vp = i + firstViewport;
3787
3788      dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]);
3789
3790      if (pViewports[i].minDepth > pViewports[i].maxDepth)
3791         cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
3792      else
3793         cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT);
3794
3795      if (pViewports[i].height > 0)
3796         cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp);
3797      else
3798         cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp);
3799   }
3800
3801   if (viewportCount) {
3802      cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS;
3803      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3804         DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3805   }
3806}
3807
3808VKAPI_ATTR void VKAPI_CALL
3809dzn_CmdSetScissor(VkCommandBuffer commandBuffer,
3810                  uint32_t firstScissor,
3811                  uint32_t scissorCount,
3812                  const VkRect2D *pScissors)
3813{
3814   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3815
3816   for (uint32_t i = 0; i < scissorCount; i++)
3817      dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]);
3818
3819   if (scissorCount)
3820      cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS;
3821}
3822
3823VKAPI_ATTR void VKAPI_CALL
3824dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
3825                     VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
3826                     const void *pValues)
3827{
3828   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3829   struct dzn_cmd_buffer_push_constant_state *states[2];
3830   uint32_t num_states = 0;
3831
3832   if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
3833      states[num_states++] = &cmdbuf->state.push_constant.gfx;
3834
3835   if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
3836      states[num_states++] = &cmdbuf->state.push_constant.compute;
3837
3838   for (uint32_t i = 0; i < num_states; i++) {
3839      memcpy(((char *)states[i]->values) + offset, pValues, size);
3840      states[i]->offset =
3841         states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset;
3842      states[i]->end = MAX2(states[i]->end, offset + size);
3843   }
3844}
3845
3846VKAPI_ATTR void VKAPI_CALL
3847dzn_CmdDraw(VkCommandBuffer commandBuffer,
3848            uint32_t vertexCount,
3849            uint32_t instanceCount,
3850            uint32_t firstVertex,
3851            uint32_t firstInstance)
3852{
3853   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3854
3855   const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
3856      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3857
3858   cmdbuf->state.sysvals.gfx.first_vertex = firstVertex;
3859   cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
3860   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3861      DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3862
3863   if (pipeline->ia.triangle_fan) {
3864      D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
3865
3866      VkResult result =
3867         dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount);
3868      if (result != VK_SUCCESS || !vertexCount)
3869         return;
3870
3871      cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
3872      dzn_cmd_buffer_prepare_draw(cmdbuf, true);
3873      ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0,
3874                                            firstVertex, firstInstance);
3875
3876      /* Restore the IB view if we modified it when lowering triangle fans. */
3877      if (ib_view.SizeInBytes > 0) {
3878         cmdbuf->state.ib.view = ib_view;
3879         cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3880      }
3881   } else {
3882      cmdbuf->state.sysvals.gfx.is_indexed_draw = false;
3883      dzn_cmd_buffer_prepare_draw(cmdbuf, false);
3884      ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount,
3885                                     firstVertex, firstInstance);
3886   }
3887}
3888
3889VKAPI_ATTR void VKAPI_CALL
3890dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer,
3891                   uint32_t indexCount,
3892                   uint32_t instanceCount,
3893                   uint32_t firstIndex,
3894                   int32_t vertexOffset,
3895                   uint32_t firstInstance)
3896{
3897   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3898
3899   const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *)
3900      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline;
3901
3902   if (pipeline->ia.triangle_fan &&
3903       dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) {
3904      /* The indexed+primitive-restart+triangle-fan combination is a mess,
3905       * since we have to walk the index buffer, skip entries with the
3906       * special 0xffff/0xffffffff values, and push triangle list indices
3907       * for the remaining values. All of this has an impact on the index
3908       * count passed to the draw call, which forces us to use the indirect
3909       * path.
3910       */
3911      struct dzn_indirect_indexed_draw_params params = {
3912         .index_count = indexCount,
3913         .instance_count = instanceCount,
3914         .first_index = firstIndex,
3915         .vertex_offset = vertexOffset,
3916         .first_instance = firstInstance,
3917      };
3918
3919      ID3D12Resource *draw_buf;
3920      VkResult result =
3921         dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params),
3922                                           D3D12_HEAP_TYPE_UPLOAD,
3923                                           D3D12_RESOURCE_STATE_GENERIC_READ,
3924                                           &draw_buf);
3925      if (result != VK_SUCCESS)
3926         return;
3927
3928      void *cpu_ptr;
3929      ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr);
3930      memcpy(cpu_ptr, &params, sizeof(params));
3931
3932      ID3D12Resource_Unmap(draw_buf, 0, NULL);
3933
3934      dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, 0, NULL, 0, 1, sizeof(params), true);
3935      return;
3936   }
3937
3938   cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset;
3939   cmdbuf->state.sysvals.gfx.base_instance = firstInstance;
3940   cmdbuf->state.sysvals.gfx.is_indexed_draw = true;
3941   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |=
3942      DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
3943
3944   D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view;
3945
3946   if (pipeline->ia.triangle_fan) {
3947      VkResult result =
3948         dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex);
3949      if (result != VK_SUCCESS || !indexCount)
3950         return;
3951   }
3952
3953   dzn_cmd_buffer_prepare_draw(cmdbuf, true);
3954   ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex,
3955                                         vertexOffset, firstInstance);
3956
3957   /* Restore the IB view if we modified it when lowering triangle fans. */
3958   if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) {
3959      cmdbuf->state.ib.view = ib_view;
3960      cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
3961   }
3962}
3963
3964VKAPI_ATTR void VKAPI_CALL
3965dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer,
3966                    VkBuffer buffer,
3967                    VkDeviceSize offset,
3968                    uint32_t drawCount,
3969                    uint32_t stride)
3970{
3971   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3972   VK_FROM_HANDLE(dzn_buffer, buf, buffer);
3973
3974   dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false);
3975}
3976
3977VKAPI_ATTR void VKAPI_CALL
3978dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
3979                           VkBuffer buffer,
3980                           VkDeviceSize offset,
3981                           uint32_t drawCount,
3982                           uint32_t stride)
3983{
3984   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
3985   VK_FROM_HANDLE(dzn_buffer, buf, buffer);
3986
3987   dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true);
3988}
3989
3990VKAPI_ATTR void VKAPI_CALL
3991dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
3992                         VkBuffer buffer,
3993                         VkDeviceSize offset,
3994                         VkBuffer countBuffer,
3995                         VkDeviceSize countBufferOffset,
3996                         uint32_t maxDrawCount,
3997                         uint32_t stride)
3998{
3999   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4000   VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4001   VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
4002
4003   dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
4004                                count_buf->res, countBufferOffset,
4005                                maxDrawCount, stride, false);
4006}
4007
4008VKAPI_ATTR void VKAPI_CALL
4009dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
4010                                VkBuffer buffer,
4011                                VkDeviceSize offset,
4012                                VkBuffer countBuffer,
4013                                VkDeviceSize countBufferOffset,
4014                                uint32_t maxDrawCount,
4015                                uint32_t stride)
4016{
4017   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4018   VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4019   VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer);
4020
4021   dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset,
4022                                count_buf->res, countBufferOffset,
4023                                maxDrawCount, stride, true);
4024}
4025
4026VKAPI_ATTR void VKAPI_CALL
4027dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
4028                         uint32_t firstBinding,
4029                         uint32_t bindingCount,
4030                         const VkBuffer *pBuffers,
4031                         const VkDeviceSize *pOffsets)
4032{
4033   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4034
4035   if (!bindingCount)
4036      return;
4037
4038   D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views;
4039
4040   for (uint32_t i = 0; i < bindingCount; i++) {
4041      VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]);
4042
4043      vbviews[firstBinding + i].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + pOffsets[i];
4044      vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i];
4045   }
4046
4047   BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding,
4048                    firstBinding + bindingCount - 1);
4049}
4050
4051VKAPI_ATTR void VKAPI_CALL
4052dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
4053                       VkBuffer buffer,
4054                       VkDeviceSize offset,
4055                       VkIndexType indexType)
4056{
4057   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4058   VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4059
4060   cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(buf->res) + offset;
4061   cmdbuf->state.ib.view.SizeInBytes = buf->size - offset;
4062   switch (indexType) {
4063   case VK_INDEX_TYPE_UINT16:
4064      cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT;
4065      cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
4066      break;
4067   case VK_INDEX_TYPE_UINT32:
4068      cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT;
4069      cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
4070      break;
4071   default: unreachable("Invalid index type");
4072   }
4073
4074   cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB;
4075
4076   const struct dzn_graphics_pipeline *pipeline =
4077      (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline;
4078
4079   if (pipeline && dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
4080      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4081}
4082
4083VKAPI_ATTR void VKAPI_CALL
4084dzn_CmdResetEvent(VkCommandBuffer commandBuffer,
4085                  VkEvent event,
4086                  VkPipelineStageFlags stageMask)
4087{
4088   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4089   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4090   VK_FROM_HANDLE(dzn_event, evt, event);
4091
4092   if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET))
4093      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4094}
4095
4096VKAPI_ATTR void VKAPI_CALL
4097dzn_CmdSetEvent(VkCommandBuffer commandBuffer,
4098                VkEvent event,
4099                VkPipelineStageFlags stageMask)
4100{
4101   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4102   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4103   VK_FROM_HANDLE(dzn_event, evt, event);
4104
4105   if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET))
4106      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4107}
4108
4109VKAPI_ATTR void VKAPI_CALL
4110dzn_CmdWaitEvents(VkCommandBuffer commandBuffer,
4111                  uint32_t eventCount,
4112                  const VkEvent *pEvents,
4113                  VkPipelineStageFlags srcStageMask,
4114                  VkPipelineStageFlags dstStageMask,
4115                  uint32_t memoryBarrierCount,
4116                  const VkMemoryBarrier *pMemoryBarriers,
4117                  uint32_t bufferMemoryBarrierCount,
4118                  const VkBufferMemoryBarrier *pBufferMemoryBarriers,
4119                  uint32_t imageMemoryBarrierCount,
4120                  const VkImageMemoryBarrier *pImageMemoryBarriers)
4121{
4122   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4123   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4124
4125   /* Intra-command list wait is handle by this pipeline flush, which is
4126    * overkill, but that's the best we can do with the standard D3D12 barrier
4127    * API.
4128    *
4129    * Inter-command list is taken care of by the serialization done at the
4130    * ExecuteCommandList() level:
4131    * "Calling ExecuteCommandLists twice in succession (from the same thread,
4132    *  or different threads) guarantees that the first workload (A) finishes
4133    *  before the second workload (B)"
4134    *
4135    * HOST -> DEVICE signaling is ignored and we assume events are always
4136    * signaled when we reach the vkCmdWaitEvents() point.:
4137    * "Command buffers in the submission can include vkCmdWaitEvents commands
4138    *  that wait on events that will not be signaled by earlier commands in the
4139    *  queue. Such events must be signaled by the application using vkSetEvent,
4140    *  and the vkCmdWaitEvents commands that wait upon them must not be inside
4141    *  a render pass instance.
4142    *  The event must be set before the vkCmdWaitEvents command is executed."
4143    */
4144   bool flush_pipeline = false;
4145
4146   for (uint32_t i = 0; i < eventCount; i++) {
4147      VK_FROM_HANDLE(dzn_event, event, pEvents[i]);
4148
4149      struct hash_entry *he =
4150         _mesa_hash_table_search(cmdbuf->events.ht, event);
4151      if (he) {
4152         enum dzn_event_state state = (uintptr_t)he->data;
4153         assert(state != DZN_EVENT_STATE_RESET);
4154         flush_pipeline = state == DZN_EVENT_STATE_SET;
4155      } else {
4156         if (!_mesa_hash_table_insert(cmdbuf->events.ht, event,
4157                                      (void *)(uintptr_t)DZN_EVENT_STATE_EXTERNAL_WAIT)) {
4158            cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4159            return;
4160         }
4161
4162         struct dzn_event **entry =
4163            util_dynarray_grow(&cmdbuf->events.wait, struct dzn_event *, 1);
4164
4165         if (!entry) {
4166            cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4167            return;
4168         }
4169
4170         *entry = event;
4171      }
4172   }
4173
4174   if (flush_pipeline) {
4175      D3D12_RESOURCE_BARRIER barrier = {
4176         .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
4177         .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
4178         .UAV = { .pResource = NULL },
4179      };
4180
4181      ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
4182   }
4183}
4184
4185VKAPI_ATTR void VKAPI_CALL
4186dzn_CmdBeginQuery(VkCommandBuffer commandBuffer,
4187                  VkQueryPool queryPool,
4188                  uint32_t query,
4189                  VkQueryControlFlags flags)
4190{
4191   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4192   VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4193
4194   struct dzn_cmd_buffer_query_pool_state *state =
4195      dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4196   if (!state)
4197      return;
4198
4199   qpool->queries[query].type = dzn_query_pool_get_query_type(qpool, flags);
4200   dzn_cmd_buffer_dynbitset_clear(cmdbuf, &state->collect, query);
4201   ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4202}
4203
4204VKAPI_ATTR void VKAPI_CALL
4205dzn_CmdEndQuery(VkCommandBuffer commandBuffer,
4206                VkQueryPool queryPool,
4207                uint32_t query)
4208{
4209   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4210   VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4211
4212   struct dzn_cmd_buffer_query_pool_state *state =
4213      dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4214   if (!state)
4215      return;
4216
4217   dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
4218   ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4219}
4220
4221VKAPI_ATTR void VKAPI_CALL
4222dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
4223                       VkPipelineStageFlags2 stage,
4224                       VkQueryPool queryPool,
4225                       uint32_t query)
4226{
4227   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4228   VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4229
4230   struct dzn_cmd_buffer_query_pool_state *state =
4231      dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4232   if (!state)
4233      return;
4234
4235   /* Execution barrier so the timestamp gets written after the pipeline flush. */
4236   D3D12_RESOURCE_BARRIER barrier = {
4237      .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
4238      .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
4239      .UAV = { .pResource = NULL },
4240   };
4241
4242   ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier);
4243
4244   qpool->queries[query].type = D3D12_QUERY_TYPE_TIMESTAMP;
4245   dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query);
4246   ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query);
4247}
4248
4249
4250VKAPI_ATTR void VKAPI_CALL
4251dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
4252                      VkQueryPool queryPool,
4253                      uint32_t firstQuery,
4254                      uint32_t queryCount)
4255{
4256   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4257   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4258   VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4259
4260   struct dzn_cmd_buffer_query_pool_state *state =
4261      dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4262
4263   if (!state)
4264      return;
4265
4266   uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t);
4267
4268   for (uint32_t q = 0; q < queryCount; q += q_step) {
4269      uint32_t q_count = MIN2(queryCount - q, q_step);
4270
4271      ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
4272                                        dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
4273                                        device->queries.refs,
4274                                        DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
4275                                        q_count * sizeof(uint64_t));
4276   }
4277
4278   q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size;
4279
4280   for (uint32_t q = 0; q < queryCount; q += q_step) {
4281      ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer,
4282                                        dzn_query_pool_get_result_offset(qpool, firstQuery + q),
4283                                        device->queries.refs,
4284                                        DZN_QUERY_REFS_ALL_ZEROS_OFFSET,
4285                                        qpool->query_size);
4286   }
4287
4288   dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount);
4289   dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount);
4290}
4291
4292VKAPI_ATTR void VKAPI_CALL
4293dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
4294                            VkQueryPool queryPool,
4295                            uint32_t firstQuery,
4296                            uint32_t queryCount,
4297                            VkBuffer dstBuffer,
4298                            VkDeviceSize dstOffset,
4299                            VkDeviceSize stride,
4300                            VkQueryResultFlags flags)
4301{
4302   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4303   VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
4304   VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer);
4305
4306   struct dzn_cmd_buffer_query_pool_state *qpstate =
4307      dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool);
4308   if (!qpstate)
4309      return;
4310
4311   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
4312      for (uint32_t i = 0; i < queryCount; i++) {
4313         if (!dzn_cmd_buffer_dynbitset_test(&qpstate->collect, firstQuery + i) &&
4314             !dzn_cmd_buffer_dynbitset_test(&qpstate->signal, firstQuery + i))
4315            dzn_cmd_buffer_dynbitset_set(cmdbuf, &qpstate->wait, firstQuery + i);
4316      }
4317   }
4318
4319   VkResult result =
4320      dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount);
4321   if (result != VK_SUCCESS)
4322      return;
4323
4324   bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) &&
4325                   stride == qpool->query_size &&
4326                   !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
4327#define ALL_STATS \
4328        (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \
4329         VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \
4330         VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \
4331         VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \
4332         VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \
4333         VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \
4334         VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \
4335         VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \
4336         VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \
4337         VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \
4338         VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT)
4339   if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS &&
4340       qpool->pipeline_statistics != ALL_STATS)
4341      raw_copy = false;
4342#undef ALL_STATS
4343
4344   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
4345                                            D3D12_RESOURCE_STATE_COPY_DEST,
4346                                            D3D12_RESOURCE_STATE_COPY_SOURCE,
4347                                            DZN_QUEUE_TRANSITION_FLUSH);
4348
4349   if (raw_copy) {
4350      ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
4351                                        qpool->collect_buffer,
4352                                        dzn_query_pool_get_result_offset(qpool, firstQuery),
4353                                        dzn_query_pool_get_result_size(qpool, queryCount));
4354   } else {
4355      uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t);
4356
4357      for (uint32_t q = 0; q < queryCount; q++) {
4358         uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q);
4359         uint32_t dst_counter_offset = 0;
4360
4361         if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
4362            for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
4363               if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
4364                  continue;
4365
4366               ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
4367                                                 qpool->collect_buffer,
4368                                                 res_offset + (c * sizeof(uint64_t)),
4369                                                 step);
4370               dst_counter_offset += step;
4371            }
4372         } else {
4373            ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset,
4374                                              qpool->collect_buffer,
4375                                              res_offset, step);
4376            dst_counter_offset += step;
4377         }
4378
4379         if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
4380            ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset,
4381                                              qpool->collect_buffer,
4382                                              dzn_query_pool_get_availability_offset(qpool, firstQuery + q),
4383                                              step);
4384         }
4385
4386         dstOffset += stride;
4387      }
4388   }
4389
4390   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1,
4391                                            D3D12_RESOURCE_STATE_COPY_SOURCE,
4392                                            D3D12_RESOURCE_STATE_COPY_DEST,
4393                                            0);
4394}
4395
4396VKAPI_ATTR void VKAPI_CALL
4397dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
4398                        VkBuffer buffer,
4399                        VkDeviceSize offset)
4400{
4401   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4402   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4403   VK_FROM_HANDLE(dzn_buffer, buf, buffer);
4404
4405   cmdbuf->state.sysvals.compute.group_count_x = 0;
4406   cmdbuf->state.sysvals.compute.group_count_y = 0;
4407   cmdbuf->state.sysvals.compute.group_count_z = 0;
4408   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |=
4409      DZN_CMD_BINDPOINT_DIRTY_SYSVALS;
4410
4411   dzn_cmd_buffer_prepare_dispatch(cmdbuf);
4412
4413   struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *)
4414      cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline;
4415   ID3D12CommandSignature *cmdsig =
4416      dzn_compute_pipeline_get_indirect_cmd_sig(pipeline);
4417
4418   if (!cmdsig) {
4419      cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4420      return;
4421   }
4422
4423   ID3D12Resource *exec_buf;
4424   VkResult result =
4425      dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
4426                                        D3D12_HEAP_TYPE_DEFAULT,
4427                                        D3D12_RESOURCE_STATE_COPY_DEST,
4428                                        &exec_buf);
4429   if (result != VK_SUCCESS)
4430      return;
4431
4432   ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0,
4433                                     buf->res,
4434                                     offset,
4435                                     sizeof(D3D12_DISPATCH_ARGUMENTS));
4436   ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS),
4437                                     buf->res,
4438                                     offset,
4439                                     sizeof(D3D12_DISPATCH_ARGUMENTS));
4440
4441   dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1,
4442                                            D3D12_RESOURCE_STATE_COPY_DEST,
4443                                            D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
4444                                            DZN_QUEUE_TRANSITION_FLUSH);
4445
4446   ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0);
4447}
4448
4449VKAPI_ATTR void VKAPI_CALL
4450dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer,
4451                    float lineWidth)
4452{
4453   assert(lineWidth == 1.0f);
4454}
4455
4456VKAPI_ATTR void VKAPI_CALL
4457dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer,
4458                    float depthBiasConstantFactor,
4459                    float depthBiasClamp,
4460                    float depthBiasSlopeFactor)
4461{
4462   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4463
4464   cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor;
4465   cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp;
4466   cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor;
4467   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4468}
4469
4470VKAPI_ATTR void VKAPI_CALL
4471dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
4472                         const float blendConstants[4])
4473{
4474   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4475
4476   memcpy(cmdbuf->state.blend.constants, blendConstants,
4477          sizeof(cmdbuf->state.blend.constants));
4478   cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS;
4479}
4480
4481VKAPI_ATTR void VKAPI_CALL
4482dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
4483                      float minDepthBounds,
4484                      float maxDepthBounds)
4485{
4486   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4487   struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk);
4488   struct dzn_physical_device *pdev =
4489      container_of(device->vk.physical, struct dzn_physical_device, vk);
4490
4491   if (pdev->options2.DepthBoundsTestSupported) {
4492      cmdbuf->state.zsa.depth_bounds.min = minDepthBounds;
4493      cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds;
4494      cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS;
4495   }
4496}
4497
4498VKAPI_ATTR void VKAPI_CALL
4499dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
4500                             VkStencilFaceFlags faceMask,
4501                             uint32_t compareMask)
4502{
4503   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4504
4505   if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
4506      cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask;
4507      cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask;
4508   }
4509
4510   if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
4511      cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask;
4512      cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask;
4513   }
4514
4515   cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK;
4516   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4517}
4518
4519VKAPI_ATTR void VKAPI_CALL
4520dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
4521                           VkStencilFaceFlags faceMask,
4522                           uint32_t writeMask)
4523{
4524   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4525
4526   if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
4527      cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask;
4528      cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask;
4529   }
4530
4531   if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
4532      cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask;
4533      cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask;
4534   }
4535
4536   cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK;
4537   cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE;
4538}
4539
4540VKAPI_ATTR void VKAPI_CALL
4541dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer,
4542                           VkStencilFaceFlags faceMask,
4543                           uint32_t reference)
4544{
4545   VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer);
4546
4547   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
4548      cmdbuf->state.zsa.stencil_test.front.ref = reference;
4549
4550   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
4551      cmdbuf->state.zsa.stencil_test.back.ref = reference;
4552
4553   cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF;
4554}
4555