1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_backend_vk.h"
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <functional>
21 #include <vulkan/vulkan_core.h>
22 
23 #include <base/containers/array_view.h>
24 #include <base/containers/fixed_string.h>
25 #include <base/containers/string_view.h>
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <core/plugin/intf_class_register.h>
29 #include <render/datastore/render_data_store_render_pods.h>
30 #include <render/device/pipeline_state_desc.h>
31 #include <render/namespace.h>
32 #include <render/nodecontext/intf_render_backend_node.h>
33 #include <render/vulkan/intf_device_vk.h>
34 
35 #if (RENDER_PERF_ENABLED == 1)
36 #include "perf/gpu_query.h"
37 #include "perf/gpu_query_manager.h"
38 #include "vulkan/gpu_query_vk.h"
39 #endif
40 
41 #include "device/gpu_buffer.h"
42 #include "device/gpu_image.h"
43 #include "device/gpu_resource_handle_util.h"
44 #include "device/gpu_resource_manager.h"
45 #include "device/gpu_sampler.h"
46 #include "device/pipeline_state_object.h"
47 #include "device/render_frame_sync.h"
48 #include "nodecontext/node_context_descriptor_set_manager.h"
49 #include "nodecontext/node_context_pool_manager.h"
50 #include "nodecontext/node_context_pso_manager.h"
51 #include "nodecontext/render_barrier_list.h"
52 #include "nodecontext/render_command_list.h"
53 #include "nodecontext/render_node_graph_node_store.h"
54 #include "render_backend.h"
55 #include "render_graph.h"
56 #include "util/log.h"
57 #include "util/render_frame_util.h"
58 #include "vulkan/gpu_buffer_vk.h"
59 #include "vulkan/gpu_image_vk.h"
60 #include "vulkan/gpu_sampler_vk.h"
61 #include "vulkan/gpu_semaphore_vk.h"
62 #include "vulkan/node_context_descriptor_set_manager_vk.h"
63 #include "vulkan/node_context_pool_manager_vk.h"
64 #include "vulkan/pipeline_state_object_vk.h"
65 #include "vulkan/render_frame_sync_vk.h"
66 #include "vulkan/swapchain_vk.h"
67 #include "vulkan/validate_vk.h"
68 
69 using namespace BASE_NS;
70 
71 using CORE_NS::GetInstance;
72 using CORE_NS::IParallelTaskQueue;
73 using CORE_NS::IPerformanceDataManager;
74 using CORE_NS::IPerformanceDataManagerFactory;
75 using CORE_NS::ITaskQueueFactory;
76 using CORE_NS::IThreadPool;
77 using CORE_NS::UID_TASK_QUEUE_FACTORY;
78 
79 RENDER_BEGIN_NAMESPACE()
80 namespace {
81 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)82 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
83 {
84     const VkBufferDeviceAddressInfo addressInfo {
85         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
86         nullptr,                                      // pNext
87         buffer,                                       // buffer
88     };
89     return vkGetBufferDeviceAddress(device, &addressInfo);
90 }
91 #endif
92 #if (RENDER_PERF_ENABLED == 1)
CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)93 void CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)
94 {
95     dst.drawCount += src.drawCount;
96     dst.drawIndirectCount += src.drawIndirectCount;
97     dst.dispatchCount += src.dispatchCount;
98     dst.dispatchIndirectCount += src.dispatchIndirectCount;
99     dst.bindPipelineCount += src.bindPipelineCount;
100     dst.renderPassCount += src.renderPassCount;
101     dst.updateDescriptorSetCount += src.updateDescriptorSetCount;
102     dst.bindDescriptorSetCount += src.bindDescriptorSetCount;
103     dst.triangleCount += src.triangleCount;
104     dst.instanceCount += src.instanceCount;
105 }
106 #endif
107 } // namespace
108 
109 // Helper class for running std::function as a ThreadPool task.
110 class FunctionTask final : public IThreadPool::ITask {
111 public:
Create(std::function<void()> func)112     static Ptr Create(std::function<void()> func)
113     {
114         return Ptr { new FunctionTask(BASE_NS::move(func)) };
115     }
116 
FunctionTask(std::function<void()> func)117     explicit FunctionTask(std::function<void()> func) : func_(BASE_NS::move(func)) {};
118 
119     void operator()() override
120     {
121         func_();
122     }
123 
124 protected:
125     void Destroy() override
126     {
127         delete this;
128     }
129 
130 private:
131     std::function<void()> func_;
132 };
133 
134 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
135 namespace {
136 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
137 }
138 #endif
139 
RenderBackendVk( Device& dev, GpuResourceManager& gpuResourceManager, const CORE_NS::IParallelTaskQueue::Ptr& queue)140 RenderBackendVk::RenderBackendVk(
141     Device& dev, GpuResourceManager& gpuResourceManager, const CORE_NS::IParallelTaskQueue::Ptr& queue)
142     : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
143       queue_(queue.get())
144 {
145 #if (RENDER_PERF_ENABLED == 1)
146 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
147     gpuQueryMgr_ = make_unique<GpuQueryManager>();
148 
149     constexpr uint32_t maxQueryObjectCount { 512u };
150     constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
151     const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
152     const GpuBufferDesc desc {
153         BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT,                        // usageFlags
154         CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
155         0,                                                                              // engineCreationFlags
156         fullByteSize,                                                                   // byteSize
157     };
158     perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
159     perfGpuTimerData_.currentOffset = 0;
160     perfGpuTimerData_.frameByteSize = byteSize;
161     perfGpuTimerData_.fullByteSize = fullByteSize;
162     { // zero initialize
163         uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
164         memset_s(bufferData, fullByteSize, 0, fullByteSize);
165         perfGpuTimerData_.gpuBuffer->Unmap();
166     }
167 #endif
168 #endif
169 }
170 
AcquirePresentationInfo( RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)171 void RenderBackendVk::AcquirePresentationInfo(
172     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
173 {
174     if (device_.HasSwapchain()) {
175         presentationData_.present = true;
176         // resized to same for convenience
177         presentationData_.infos.resize(backBufferConfig.swapchainData.size());
178         for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
179             const auto& swapData = backBufferConfig.swapchainData[swapIdx];
180             PresentationInfo pi;
181             const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
182 
183             if (const SwapchainVk* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain(swapData.handle));
184                 swapchain) {
185                 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
186                 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
187                 const uint32_t semaphoreIdx = swapchain->GetNextAcquireSwapchainSemaphoreIndex();
188                 PLUGIN_ASSERT(semaphoreIdx < platSwapchain.swapchainImages.semaphores.size());
189                 pi.swapchainSemaphore = platSwapchain.swapchainImages.semaphores[semaphoreIdx];
190                 pi.swapchain = platSwapchain.swapchain;
191                 pi.useSwapchain = true;
192                 // NOTE: for legacy default backbuffer reasons there might the same swapchain multiple times ATM
193                 for (const auto& piRef : presentationData_.infos) {
194                     if (piRef.swapchain == pi.swapchain) {
195                         pi.useSwapchain = false;
196                     }
197                 }
198                 // NOTE: do not re-acquire default backbuffer swapchain if it's in used with different handle
199                 if (pi.useSwapchain) {
200                     const VkResult result = vkAcquireNextImageKHR(device, // device
201                         vkSwapchain,                                      // swapchin
202                         UINT64_MAX,                                       // timeout
203                         pi.swapchainSemaphore,                            // semaphore
204                         (VkFence) nullptr,                                // fence
205                         &pi.swapchainImageIndex);                         // pImageIndex
206 
207                     switch (result) {
208                         // Success
209                         case VK_SUCCESS:
210                         case VK_TIMEOUT:
211                         case VK_NOT_READY:
212                         case VK_SUBOPTIMAL_KHR:
213                             pi.validAcquire = true;
214                             break;
215 
216                         // Failure
217                         case VK_ERROR_OUT_OF_HOST_MEMORY:
218                         case VK_ERROR_OUT_OF_DEVICE_MEMORY:
219                             PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
220                             return;
221                         case VK_ERROR_DEVICE_LOST:
222                             PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
223                             return;
224                         case VK_ERROR_OUT_OF_DATE_KHR:
225                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
226                             return;
227                         case VK_ERROR_SURFACE_LOST_KHR:
228                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
229                             return;
230 
231                         case VK_EVENT_SET:
232                         case VK_EVENT_RESET:
233                         case VK_INCOMPLETE:
234                         case VK_ERROR_INITIALIZATION_FAILED:
235                         case VK_ERROR_MEMORY_MAP_FAILED:
236                         case VK_ERROR_LAYER_NOT_PRESENT:
237                         case VK_ERROR_EXTENSION_NOT_PRESENT:
238                         case VK_ERROR_FEATURE_NOT_PRESENT:
239                         case VK_ERROR_INCOMPATIBLE_DRIVER:
240                         case VK_ERROR_TOO_MANY_OBJECTS:
241                         case VK_ERROR_FORMAT_NOT_SUPPORTED:
242                         case VK_ERROR_FRAGMENTED_POOL:
243                         case VK_ERROR_OUT_OF_POOL_MEMORY:
244                         case VK_ERROR_INVALID_EXTERNAL_HANDLE:
245                         case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
246                         case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
247                         case VK_ERROR_VALIDATION_FAILED_EXT:
248                         case VK_ERROR_INVALID_SHADER_NV:
249                         // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
250                         case VK_ERROR_FRAGMENTATION_EXT:
251                         case VK_ERROR_NOT_PERMITTED_EXT:
252                         // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
253                         case VK_RESULT_MAX_ENUM:
254                         default:
255                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
256                             PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
257                             device_.SetDeviceStatus(false);
258                             break;
259                     }
260 
261                     if (pi.swapchainImageIndex >= static_cast<uint32_t>(platSwapchain.swapchainImages.images.size())) {
262                         PLUGIN_LOG_E("swapchain image index (%u) should be smaller than (%u)", pi.swapchainImageIndex,
263                             static_cast<uint32_t>(platSwapchain.swapchainImages.images.size()));
264                     }
265 
266                     const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
267                     const RenderHandle handle = swapchainData.remappableSwapchainImage;
268                     if (pi.swapchainImageIndex < swapchainData.imageViewCount) {
269                         // remap image to backbuffer
270                         const RenderHandle currentSwapchainHandle = swapchainData.imageViews[pi.swapchainImageIndex];
271                         // special swapchain remapping
272                         gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(handle, currentSwapchainHandle);
273                     }
274                     pi.renderGraphProcessedState = swapData.backBufferState;
275                     pi.imageLayout = swapData.layout;
276                     if (pi.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC) {
277                         pi.presentationLayoutChangeNeeded = true;
278                         pi.renderNodeCommandListIndex =
279                             static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
280 
281                         const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
282                         PLUGIN_ASSERT(swapImage);
283                         pi.swapchainImage = swapImage->GetPlatformData().image;
284                     }
285                 }
286             }
287             presentationData_.infos[swapIdx] = pi;
288         }
289     }
290 }
291 
292 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
293 {
294     if (!backBufferConfig.swapchainData.empty()) {
295         if (device_.HasSwapchain() && presentationData_.present) {
296             PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8u);
297             uint32_t swapchainCount = 0U;
298             VkSwapchainKHR vkSwapchains[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { VK_NULL_HANDLE, VK_NULL_HANDLE,
299                 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
300             uint32_t vkSwapImageIndices[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { 0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U };
301             for (const auto& presRef : presentationData_.infos) {
302                 // NOTE: default backbuffer might be present multiple times
303                 // the flag useSwapchain should be false in these cases
304                 if (presRef.useSwapchain && presRef.swapchain && presRef.validAcquire) {
305                     PLUGIN_ASSERT(presRef.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
306                     vkSwapImageIndices[swapchainCount] = presRef.swapchainImageIndex;
307                     vkSwapchains[swapchainCount++] = presRef.swapchain;
308                 }
309             }
310 #if (RENDER_PERF_ENABLED == 1)
311             commonCpuTimers_.present.Begin();
312 #endif
313 
314             // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid
315             // semaphore)
316             if (swapchainCount > 0U) {
317                 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
318                 uint32_t waitSemaphoreCount = 0;
319                 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
320                     waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
321                     waitSemaphoreCount = 1;
322                 }
323 
324                 const VkPresentInfoKHR presentInfo {
325                     VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
326                     nullptr,                            // pNext
327                     waitSemaphoreCount,                 // waitSemaphoreCount
328                     &waitSemaphore,                     // pWaitSemaphores
329                     swapchainCount,                     // swapchainCount
330                     vkSwapchains,                       // pSwapchains
331                     vkSwapImageIndices,                 // pImageIndices
332                     nullptr                             // pResults
333                 };
334 
335                 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
336                 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
337                     &presentInfo);                                             // pPresentInfo
338 
339                 switch (result) {
340                         // Success
341                     case VK_SUCCESS:
342                         break;
343                     case VK_SUBOPTIMAL_KHR:
344 #if (RENDER_VALIDATION_ENABLED == 1)
345                         PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
346 #endif
347                         break;
348 
349                         // Failure
350                     case VK_ERROR_OUT_OF_HOST_MEMORY:
351                     case VK_ERROR_OUT_OF_DEVICE_MEMORY:
352                         PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
353                         return;
354                     case VK_ERROR_DEVICE_LOST:
355                         PLUGIN_LOG_E("vkQueuePresentKHR device lost");
356                         return;
357                     case VK_ERROR_OUT_OF_DATE_KHR:
358                         PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
359                         return;
360                     case VK_ERROR_SURFACE_LOST_KHR:
361                         PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
362                         return;
363 
364                     case VK_NOT_READY:
365                     case VK_TIMEOUT:
366                     case VK_EVENT_SET:
367                     case VK_EVENT_RESET:
368                     case VK_INCOMPLETE:
369                     case VK_ERROR_INITIALIZATION_FAILED:
370                     case VK_ERROR_MEMORY_MAP_FAILED:
371                     case VK_ERROR_LAYER_NOT_PRESENT:
372                     case VK_ERROR_EXTENSION_NOT_PRESENT:
373                     case VK_ERROR_FEATURE_NOT_PRESENT:
374                     case VK_ERROR_INCOMPATIBLE_DRIVER:
375                     case VK_ERROR_TOO_MANY_OBJECTS:
376                     case VK_ERROR_FORMAT_NOT_SUPPORTED:
377                     case VK_ERROR_FRAGMENTED_POOL:
378                     case VK_ERROR_OUT_OF_POOL_MEMORY:
379                     case VK_ERROR_INVALID_EXTERNAL_HANDLE:
380                     case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
381                     case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
382                     case VK_ERROR_VALIDATION_FAILED_EXT:
383                     case VK_ERROR_INVALID_SHADER_NV:
384                     case VK_ERROR_FRAGMENTATION_EXT:
385                     case VK_ERROR_NOT_PERMITTED_EXT:
386                     case VK_RESULT_MAX_ENUM:
387                     default:
388                         PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
389                         PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
390                         break;
391                 }
392             }
393 #if (RENDER_PERF_ENABLED == 1)
394             commonCpuTimers_.present.End();
395 #endif
396         } else {
397 #if (RENDER_VALIDATION_ENABLED == 1)
398             PLUGIN_LOG_ONCE_E(
399                 "RenderBackendVk::Present_layout", "Presentation layout has not been updated, cannot present.");
400 #endif
401         }
402     }
403 }
404 
405 void RenderBackendVk::Render(
406     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
407 {
408     // NOTE: all command lists are validated before entering here
409 #if (RENDER_PERF_ENABLED == 1)
410     commonCpuTimers_.full.Begin();
411     commonCpuTimers_.acquire.Begin();
412 #endif
413 
414     commandBufferSubmitter_ = {};
415     commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
416 
417     presentationData_.present = false;
418     presentationData_.infos.clear();
419 
420 #if (RENDER_PERF_ENABLED == 1)
421     commonCpuTimers_.acquire.End();
422 
423     StartFrameTimers(renderCommandFrameData);
424     commonCpuTimers_.execute.Begin();
425 #endif
426 
427     // command list process loop/execute
428     // first tries to acquire swapchain if needed in a task
429     RenderProcessCommandLists(renderCommandFrameData, backBufferConfig);
430 
431 #if (RENDER_PERF_ENABLED == 1)
432     commonCpuTimers_.execute.End();
433     commonCpuTimers_.submit.Begin();
434 #endif
435 
436     PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
437     // submit vulkan command buffers
438     // checks that presentation info has valid acquire
439     RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
440 
441 #if (RENDER_PERF_ENABLED == 1)
442     commonCpuTimers_.submit.End();
443     commonCpuTimers_.full.End();
444     EndFrameTimers();
445 #endif
446 }
447 
448 void RenderBackendVk::RenderProcessSubmitCommandLists(
449     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
450 {
451     // NOTE: currently backtraces to final valid command buffer semaphore
452     uint32_t finalCommandBufferSubmissionIndex = ~0u;
453     commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
454     bool swapchainSemaphoreWaited = false;
455     for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
456          --cmdBufferIdx) {
457         if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
458             (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
459                 VK_NULL_HANDLE)) {
460             finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
461             break;
462         }
463     }
464 
465     for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
466         const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
467         if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
468             continue;
469         }
470 
471         const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
472 
473         uint32_t waitSemaphoreCount = 0u;
474         constexpr const uint32_t maxWaitSemaphoreCount =
475             PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + DeviceConstants::MAX_SWAPCHAIN_COUNT;
476         VkSemaphore waitSemaphores[maxWaitSemaphoreCount];
477         VkPipelineStageFlags waitSemaphorePipelineStageFlags[maxWaitSemaphoreCount];
478         for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
479             const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
480             PLUGIN_ASSERT(waitIdx < (uint32_t)commandBufferSubmitter_.commandBuffers.size());
481 
482             VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
483             if (waitSemaphore != VK_NULL_HANDLE) {
484                 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
485                 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
486                 waitSemaphoreCount++;
487             }
488         }
489 
490         if ((!swapchainSemaphoreWaited) && (renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
491             (!presentationData_.infos.empty())) {
492             swapchainSemaphoreWaited = true;
493             // go through all swapchain semaphores
494             for (const auto& presRef : presentationData_.infos) {
495                 if (presRef.swapchainSemaphore) {
496                     waitSemaphores[waitSemaphoreCount] = presRef.swapchainSemaphore;
497                     waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
498                     waitSemaphoreCount++;
499                 }
500             }
501         }
502 
503         uint32_t signalSemaphoreCount = 0u;
504         PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8U);
505         constexpr uint32_t maxSignalSemaphoreCount { 1U + DeviceConstants::MAX_SWAPCHAIN_COUNT };
506         VkSemaphore semaphores[maxSignalSemaphoreCount] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE,
507             VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
508         VkFence fence = VK_NULL_HANDLE;
509         if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
510             // add fence signaling to last submission for frame sync
511             if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
512                 fence = frameSync->GetFrameFence().fence;
513                 frameSync->FrameFenceIsSignalled();
514             }
515             // signal external semaphores
516             if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
517                 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
518                 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
519                 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
520                 if (externalSignals.size() == externalSemaphores.size()) {
521                     for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
522                         // needs to be false
523                         if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
524                             if (const GpuSemaphoreVk* gs = (const GpuSemaphoreVk*)externalSemaphores[sigIdx].get();
525                                 gs) {
526                                 semaphores[signalSemaphoreCount++] = gs->GetPlatformData().semaphore;
527                                 externalSignals[sigIdx].signaled = true;
528                             }
529                         }
530                     }
531                 }
532             }
533 
534             if (presentationData_.present) {
535                 commandBufferSubmitter_.presentationWaitSemaphore =
536                     commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
537                 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
538             }
539             // add additional semaphores
540             for (const auto& swapRef : backBufferConfig.swapchainData) {
541                 // should have been checked in render graph already
542                 if ((signalSemaphoreCount < maxSignalSemaphoreCount) && swapRef.config.gpuSemaphoreHandle) {
543                     semaphores[signalSemaphoreCount++] =
544                         VulkanHandleCast<VkSemaphore>(swapRef.config.gpuSemaphoreHandle);
545                 }
546             }
547         } else if (renderContextRef.submitDepencies.signalSemaphore) {
548             semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
549         }
550         PLUGIN_ASSERT(signalSemaphoreCount <= maxSignalSemaphoreCount);
551 
552         const VkSubmitInfo submitInfo {
553             VK_STRUCTURE_TYPE_SUBMIT_INFO,                        // sType
554             nullptr,                                              // pNext
555             waitSemaphoreCount,                                   // waitSemaphoreCount
556             (waitSemaphoreCount == 0) ? nullptr : waitSemaphores, // pWaitSemaphores
557             waitSemaphorePipelineStageFlags,                      // pWaitDstStageMask
558             1,                                                    // commandBufferCount
559             &cmdSubmitterRef.commandBuffer,                       // pCommandBuffers
560             signalSemaphoreCount,                                 // signalSemaphoreCount
561             (signalSemaphoreCount == 0) ? nullptr : semaphores,   // pSignalSemaphores
562         };
563 
564         const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
565         if (queue) {
566             VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
567                 1,                                  // submitCount
568                 &submitInfo,                        // pSubmits
569                 fence));                            // fence
570         }
571     }
572 }
573 
574 void RenderBackendVk::RenderProcessCommandLists(
575     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
576 {
577     const uint32_t cmdBufferCount = static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());
578     if (queue_) {
579         constexpr uint64_t acquireTaskIdendifier { ~0U };
580         vector<uint64_t> afterIdentifiers;
581         afterIdentifiers.reserve(1u); // need for swapchain acquire wait
582         // submit acquire task if needed
583         if ((!backBufferConfig.swapchainData.empty()) && device_.HasSwapchain()) {
584             queue_->Submit(
585                 acquireTaskIdendifier, FunctionTask::Create([this, &renderCommandFrameData, &backBufferConfig]() {
586                     AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
587                 }));
588         }
589         uint64_t secondaryIdx = cmdBufferCount;
590         for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < cmdBufferCount;) {
591             afterIdentifiers.clear();
592             // add wait for acquire if needed
593             if (cmdBufferIdx >= renderCommandFrameData.firstSwapchainNodeIdx) {
594                 afterIdentifiers.push_back(acquireTaskIdendifier);
595             }
596             // NOTE: idx increase
597             const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
598             const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
599             PLUGIN_ASSERT(mrpData.subpassCount > 0);
600             const uint32_t rcCount = mrpData.subpassCount;
601             if (mrpData.secondaryCmdLists) {
602                 afterIdentifiers.reserve(afterIdentifiers.size() + rcCount);
603                 for (uint32_t secondIdx = 0; secondIdx < rcCount; ++secondIdx) {
604                     const uint64_t submitId = secondaryIdx++;
605                     afterIdentifiers.push_back(submitId);
606                     PLUGIN_ASSERT((cmdBufferIdx + secondIdx) < cmdBufferCount);
607                     queue_->SubmitAfter(afterIdentifiers, submitId,
608                         FunctionTask::Create([this, cmdBufferIdx, secondIdx, &renderCommandFrameData]() {
609                             const uint32_t currCmdBufferIdx = cmdBufferIdx + secondIdx;
610                             MultiRenderCommandListDesc mrcDesc;
611                             mrcDesc.multiRenderCommandListCount = 1u;
612                             mrcDesc.baseContext = nullptr;
613                             mrcDesc.secondaryCommandBuffer = true;
614                             RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currCmdBufferIdx];
615                             const DebugNames debugNames { ref2.debugName,
616                                 renderCommandFrameData.renderCommandContexts[currCmdBufferIdx].debugName };
617                             RenderSingleCommandList(ref2, currCmdBufferIdx, mrcDesc, debugNames);
618                         }));
619                 }
620                 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
621                     cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
622                         MultiRenderCommandListDesc mrcDesc;
623                         mrcDesc.multiRenderCommandListCount = rcCount;
624                         RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
625                         const DebugNames debugNames { ref2.debugName,
626                             renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
627                         RenderPrimaryRenderPass(renderCommandFrameData, ref2, cmdBufferIdx, mrcDesc, debugNames);
628                     }));
629             } else {
630                 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
631                     cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
632                         MultiRenderCommandListDesc mrcDesc;
633                         mrcDesc.multiRenderCommandListCount = rcCount;
634                         if (rcCount > 1) {
635                             mrcDesc.multiRenderNodeCmdList = true;
636                             mrcDesc.baseContext = &renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
637                         }
638                         for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
639                             const uint32_t currIdx = cmdBufferIdx + rcIdx;
640                             mrcDesc.multiRenderCommandListIndex = rcIdx;
641                             RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
642                             const DebugNames debugNames { ref2.debugName,
643                                 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
644                             RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
645                         }
646                     }));
647             }
648             // idx increase
649             cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
650         }
651 
652         // execute and wait for completion.
653         queue_->Execute();
654         queue_->Clear();
655     } else {
656         AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
657         for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < (uint32_t)renderCommandFrameData.renderCommandContexts.size();) {
658             // NOTE: idx increase
659             const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
660             const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
661             PLUGIN_ASSERT(mrpData.subpassCount > 0);
662             const uint32_t rcCount = mrpData.subpassCount;
663 
664             MultiRenderCommandListDesc mrcDesc;
665             mrcDesc.multiRenderCommandListCount = rcCount;
666             mrcDesc.baseContext = (rcCount > 1) ? &renderCommandFrameData.renderCommandContexts[cmdBufferIdx] : nullptr;
667 
668             for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
669                 const uint32_t currIdx = cmdBufferIdx + rcIdx;
670                 mrcDesc.multiRenderCommandListIndex = rcIdx;
671                 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
672                 const DebugNames debugNames { ref2.debugName,
673                     renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
674                 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
675             }
676             cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
677         }
678     }
679 }
680 
681 void RenderBackendVk::RenderPrimaryRenderPass(const RenderCommandFrameData& renderCommandFrameData,
682     RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
683     const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
684 {
685     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
686     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
687     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
688 
689     const ContextCommandPoolVk& ptrCmdPool =
690         (static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
691     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool.commandBuffer;
692 
693     // begin cmd buffer
694     const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
695     constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
696     const bool valid = ptrCmdPool.commandPool && cmdBuffer.commandBuffer;
697     if (valid) {
698         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
699             ptrCmdPool.commandPool,                   // commandPool
700             commandPoolResetFlags));                  // flags
701     }
702 
703     constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
704         VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
705     };
706     const VkCommandBufferBeginInfo commandBufferBeginInfo {
707         VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
708         nullptr,                                     // pNext
709         commandBufferUsageFlags,                     // flags
710         nullptr,                                     // pInheritanceInfo
711     };
712     if (valid) {
713         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
714             &commandBufferBeginInfo));                                   // pBeginInfo
715     }
716 
717     StateCache stateCache;
718 
719     const MultiRenderPassCommandListData mrpcld = renderCommandList.GetMultiRenderCommandListData();
720     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
721     const uint32_t commandCount = static_cast<uint32_t>(rcRef.size());
722     const RenderCommandBeginRenderPass* rcBeginRenderPass =
723         (mrpcld.rpBeginCmdIndex < commandCount)
724             ? static_cast<const RenderCommandBeginRenderPass*>(rcRef[mrpcld.rpBeginCmdIndex].rc)
725             : nullptr;
726     const RenderCommandEndRenderPass* rcEndRenderPass =
727         (mrpcld.rpEndCmdIndex < commandCount)
728             ? static_cast<const RenderCommandEndRenderPass*>(rcRef[mrpcld.rpEndCmdIndex].rc)
729             : nullptr;
730 
731     if (rcBeginRenderPass && rcEndRenderPass) {
732         if (mrpcld.rpBarrierCmdIndex < commandCount) {
733             const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
734             PLUGIN_ASSERT(rcRef[mrpcld.rpBarrierCmdIndex].type == RenderCommandType::BARRIER_POINT);
735             const RenderCommandBarrierPoint& barrierPoint =
736                 *static_cast<RenderCommandBarrierPoint*>(rcRef[mrpcld.rpBarrierCmdIndex].rc);
737             // handle all barriers before render command that needs resource syncing
738             RenderCommand(barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
739         }
740 
741         // begin render pass
742         stateCache.primaryRenderPass = true;
743         RenderCommand(*rcBeginRenderPass, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
744         stateCache.primaryRenderPass = false;
745 
746         // get secondary command buffers from correct indices and execute
747         for (uint32_t idx = 0; idx < multiRenderCommandListDesc.multiRenderCommandListCount; ++idx) {
748             const uint32_t currCmdBufIdx = cmdBufIdx + idx;
749             PLUGIN_ASSERT(currCmdBufIdx < renderCommandFrameData.renderCommandContexts.size());
750             const RenderCommandContext& currContext = renderCommandFrameData.renderCommandContexts[currCmdBufIdx];
751             NodeContextPoolManagerVk& contextPoolVk =
752                 *static_cast<NodeContextPoolManagerVk*>(currContext.nodeContextPoolMgr);
753 
754             const array_view<const RenderCommandWithType> mlaRcRef = currContext.renderCommandList->GetRenderCommands();
755             const auto& mla = currContext.renderCommandList->GetMultiRenderCommandListData();
756             const uint32_t mlaCommandCount = static_cast<uint32_t>(mlaRcRef.size());
757             // next subpass only called from second render pass on
758             if ((idx > 0) && (mla.rpBeginCmdIndex < mlaCommandCount)) {
759                 RenderCommandBeginRenderPass renderPass =
760                     *static_cast<RenderCommandBeginRenderPass*>(mlaRcRef[mla.rpBeginCmdIndex].rc);
761                 renderPass.renderPassDesc.subpassContents =
762                     SubpassContents::CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS;
763                 stateCache.renderCommandBeginRenderPass = nullptr; // reset
764                 RenderCommand(
765                     renderPass, cmdBuffer, *currContext.nodeContextPsoMgr, *currContext.nodeContextPoolMgr, stateCache);
766             }
767             RenderExecuteSecondaryCommandLists(cmdBuffer, contextPoolVk.GetContextSecondaryCommandPool().commandBuffer);
768         }
769 
770         // end render pass (replace the primary render pass)
771         stateCache.renderCommandBeginRenderPass = rcBeginRenderPass;
772         // NOTE: render graph has batched the subpasses to have END_SUBPASS, we need END_RENDER_PASS
773         constexpr RenderCommandEndRenderPass rcerp = {};
774         RenderCommand(rcerp, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
775     }
776 
777     // end cmd buffer
778     if (valid) {
779         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
780     }
781 
782     commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
783 }
784 
785 void RenderBackendVk::RenderExecuteSecondaryCommandLists(
786     const LowLevelCommandBufferVk& cmdBuffer, const LowLevelCommandBufferVk& executeCmdBuffer)
787 {
788     if (cmdBuffer.commandBuffer && executeCmdBuffer.commandBuffer) {
789         vkCmdExecuteCommands(cmdBuffer.commandBuffer, // commandBuffer
790             1u,                                       // commandBufferCount
791             &executeCmdBuffer.commandBuffer);         // pCommandBuffers
792     }
793 }
794 
795 VkCommandBufferInheritanceInfo RenderBackendVk::RenderGetCommandBufferInheritanceInfo(
796     const RenderCommandList& renderCommandList, NodeContextPoolManager& poolMgr)
797 {
798     NodeContextPoolManagerVk& poolMgrVk = static_cast<NodeContextPoolManagerVk&>(poolMgr);
799 
800     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
801     const uint32_t cmdCount = static_cast<uint32_t>(rcRef.size());
802 
803     const MultiRenderPassCommandListData mrpCmdData = renderCommandList.GetMultiRenderCommandListData();
804     PLUGIN_ASSERT(mrpCmdData.rpBeginCmdIndex < cmdCount);
805     PLUGIN_ASSERT(mrpCmdData.rpEndCmdIndex < cmdCount);
806     if (mrpCmdData.rpBeginCmdIndex < cmdCount) {
807         const auto& ref = rcRef[mrpCmdData.rpBeginCmdIndex];
808         PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
809         const RenderCommandBeginRenderPass& renderCmd = *static_cast<const RenderCommandBeginRenderPass*>(ref.rc);
810         LowLevelRenderPassDataVk lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
811 
812         const uint32_t subpass = renderCmd.subpassStartIndex;
813         return VkCommandBufferInheritanceInfo {
814             VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // sType
815             nullptr,                                           // pNext
816             lowLevelRenderPassData.renderPass,                 // renderPass
817             subpass,                                           // subpass
818             VK_NULL_HANDLE,                                    // framebuffer
819             VK_FALSE,                                          // occlusionQueryEnable
820             0,                                                 // queryFlags
821             0,                                                 // pipelineStatistics
822         };
823     } else {
824         return VkCommandBufferInheritanceInfo {};
825     }
826 }
827 
828 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
829     const MultiRenderCommandListDesc& mrclDesc, const DebugNames& debugNames)
830 {
831     // these are validated in render graph
832     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
833     const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
834     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
835     NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
836     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
837 
838     contextPoolMgr.BeginBackendFrame();
839     ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
840     nodeContextPsoMgr.BeginBackendFrame();
841 
842     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
843 
844     StateCache stateCache = {}; // state cache for this render command list
845     stateCache.backendNode = renderCommandCtx.renderBackendNode;
846     stateCache.secondaryCommandBuffer = mrclDesc.secondaryCommandBuffer;
847 
848     // command buffer has been wait with a single frame fence
849     const bool multiCmdList = (mrclDesc.multiRenderNodeCmdList);
850     const bool beginCommandBuffer = (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == 0));
851     const bool endCommandBuffer =
852         (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == mrclDesc.multiRenderCommandListCount - 1));
853     const ContextCommandPoolVk* ptrCmdPool = nullptr;
854     if (mrclDesc.multiRenderNodeCmdList) {
855         PLUGIN_ASSERT(mrclDesc.baseContext);
856         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(mrclDesc.baseContext->nodeContextPoolMgr))
857                           ->GetContextCommandPool();
858     } else if (mrclDesc.secondaryCommandBuffer) {
859         PLUGIN_ASSERT(stateCache.secondaryCommandBuffer);
860         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextSecondaryCommandPool();
861     } else {
862         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
863     }
864 
865     // update cmd list context descriptor sets
866     UpdateCommandListDescriptorSets(renderCommandList, stateCache, nodeContextDescriptorSetMgr);
867 
868     PLUGIN_ASSERT(ptrCmdPool);
869     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
870 
871 #if (RENDER_PERF_ENABLED == 1)
872 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
873     const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
874     const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
875 #endif
876     PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
877     PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
878 #endif
879 
880     if (beginCommandBuffer) {
881         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
882         constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
883         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
884             ptrCmdPool->commandPool,                  // commandPool
885             commandPoolResetFlags));                  // flags
886 
887         VkCommandBufferUsageFlags commandBufferUsageFlags { VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
888         VkCommandBufferInheritanceInfo inheritanceInfo {};
889         if (stateCache.secondaryCommandBuffer) {
890             commandBufferUsageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
891             inheritanceInfo = RenderGetCommandBufferInheritanceInfo(renderCommandList, contextPoolMgr);
892         }
893         const VkCommandBufferBeginInfo commandBufferBeginInfo {
894             VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,                    // sType
895             nullptr,                                                        // pNext
896             commandBufferUsageFlags,                                        // flags
897             mrclDesc.secondaryCommandBuffer ? (&inheritanceInfo) : nullptr, // pInheritanceInfo
898         };
899 
900         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
901             &commandBufferBeginInfo));                                   // pBeginInfo
902 
903 #if (RENDER_PERF_ENABLED == 1)
904         if (perfDataSet) {
905 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
906             if (validGpuQueries) {
907                 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
908                 PLUGIN_ASSERT(gpuQuery);
909 
910                 gpuQuery->NextQueryIndex();
911 
912                 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
913                     VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stateCache);
914             }
915 #endif
916             perfDataSet->cpuTimer.Begin();
917         }
918 #endif
919     }
920 
921 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
922     if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
923         const VkDebugUtilsLabelEXT label {
924             VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
925             nullptr,                                 // pNext
926             debugNames.renderCommandListName.data(), // pLabelName
927             { 1.f, 1.f, 1.f, 1.f }                   // color[4]
928         };
929         deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
930     }
931 #endif
932 
933     for (const auto& ref : rcRef) {
934         if (!stateCache.validCommandList) {
935 #if (RENDER_VALIDATION_ENABLED == 1)
936             PLUGIN_LOG_ONCE_E("invalidated_be_cmd_list_" + debugNames.renderCommandListName,
937                 "RENDER_VALIDATION: (RN:%s) backend render commands are invalidated",
938                 debugNames.renderCommandListName.data());
939 #endif
940             break;
941         }
942 
943         PLUGIN_ASSERT(ref.rc);
944 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
945         if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
946             const uint32_t index = (uint32_t)ref.type < countof(COMMAND_NAMES) ? (uint32_t)ref.type : 0;
947             const VkDebugUtilsLabelEXT label {
948                 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
949                 nullptr,                                 // pNext
950                 COMMAND_NAMES[index],                    // pLabelName
951                 { 0.87f, 0.83f, 0.29f, 1.f }             // color[4]
952             };
953             deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
954         }
955 #endif
956 
957         switch (ref.type) {
958             case RenderCommandType::BARRIER_POINT: {
959                 if (!stateCache.secondaryCommandBuffer) {
960                     const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
961                     // handle all barriers before render command that needs resource syncing
962                     RenderCommand(
963                         barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
964                 }
965                 break;
966             }
967             case RenderCommandType::DRAW: {
968                 RenderCommand(
969                     *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
970                 break;
971             }
972             case RenderCommandType::DRAW_INDIRECT: {
973                 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
974                     contextPoolMgr, stateCache);
975                 break;
976             }
977             case RenderCommandType::DISPATCH: {
978                 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
979                     contextPoolMgr, stateCache);
980                 break;
981             }
982             case RenderCommandType::DISPATCH_INDIRECT: {
983                 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
984                     contextPoolMgr, stateCache);
985                 break;
986             }
987             case RenderCommandType::BIND_PIPELINE: {
988                 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
989                     contextPoolMgr, stateCache);
990                 break;
991             }
992             case RenderCommandType::BEGIN_RENDER_PASS: {
993                 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
994                     contextPoolMgr, stateCache);
995                 break;
996             }
997             case RenderCommandType::NEXT_SUBPASS: {
998                 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
999                     contextPoolMgr, stateCache);
1000                 break;
1001             }
1002             case RenderCommandType::END_RENDER_PASS: {
1003                 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1004                     contextPoolMgr, stateCache);
1005                 break;
1006             }
1007             case RenderCommandType::BIND_VERTEX_BUFFERS: {
1008                 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1009                     contextPoolMgr, stateCache);
1010                 break;
1011             }
1012             case RenderCommandType::BIND_INDEX_BUFFER: {
1013                 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1014                     contextPoolMgr, stateCache);
1015                 break;
1016             }
1017             case RenderCommandType::COPY_BUFFER: {
1018                 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1019                     contextPoolMgr, stateCache);
1020                 break;
1021             }
1022             case RenderCommandType::COPY_BUFFER_IMAGE: {
1023                 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1024                     contextPoolMgr, stateCache);
1025                 break;
1026             }
1027             case RenderCommandType::COPY_IMAGE: {
1028                 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1029                     contextPoolMgr, stateCache);
1030                 break;
1031             }
1032             case RenderCommandType::BIND_DESCRIPTOR_SETS: {
1033                 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1034                     contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
1035                 break;
1036             }
1037             case RenderCommandType::PUSH_CONSTANT: {
1038                 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1039                     contextPoolMgr, stateCache);
1040                 break;
1041             }
1042             case RenderCommandType::BLIT_IMAGE: {
1043                 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1044                     contextPoolMgr, stateCache);
1045                 break;
1046             }
1047             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
1048                 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
1049                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1050                 break;
1051             }
1052             case RenderCommandType::CLEAR_COLOR_IMAGE: {
1053                 RenderCommand(*static_cast<RenderCommandClearColorImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1054                     contextPoolMgr, stateCache);
1055                 break;
1056             }
1057             // dynamic states
1058             case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
1059                 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1060                     contextPoolMgr, stateCache);
1061                 break;
1062             }
1063             case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
1064                 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1065                     contextPoolMgr, stateCache);
1066                 break;
1067             }
1068             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
1069                 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1070                     contextPoolMgr, stateCache);
1071                 break;
1072             }
1073             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
1074                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1075                     contextPoolMgr, stateCache);
1076                 break;
1077             }
1078             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
1079                 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
1080                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1081                 break;
1082             }
1083             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
1084                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1085                     contextPoolMgr, stateCache);
1086                 break;
1087             }
1088             case RenderCommandType::DYNAMIC_STATE_STENCIL: {
1089                 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1090                     contextPoolMgr, stateCache);
1091                 break;
1092             }
1093             case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
1094                 RenderCommand(*static_cast<RenderCommandDynamicStateFragmentShadingRate*>(ref.rc), cmdBuffer,
1095                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1096                 break;
1097             }
1098             case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
1099                 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
1100                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1101                 break;
1102             }
1103             //
1104             case RenderCommandType::WRITE_TIMESTAMP: {
1105                 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1106                     contextPoolMgr, stateCache);
1107                 break;
1108             }
1109             case RenderCommandType::UNDEFINED:
1110             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
1111             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
1112             default: {
1113                 PLUGIN_ASSERT(false && "non-valid render command");
1114                 break;
1115             }
1116         }
1117 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1118         if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1119             deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1120         }
1121 #endif
1122     }
1123 
1124     if ((!presentationData_.infos.empty())) {
1125         RenderPresentationLayout(cmdBuffer, cmdBufIdx);
1126     }
1127 
1128 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1129     if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1130         deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1131     }
1132 #endif
1133 
1134 #if (RENDER_PERF_ENABLED == 1)
1135     // copy counters
1136     if (perfDataSet) {
1137         CopyPerfCounters(stateCache.perfCounters, perfDataSet->perfCounters);
1138     }
1139 #endif
1140 
1141     if (endCommandBuffer) {
1142 #if (RENDER_PERF_ENABLED == 1)
1143         if (perfDataSet) {
1144             perfDataSet->cpuTimer.End();
1145         }
1146 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1147         if (validGpuQueries) {
1148             WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
1149                 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, stateCache);
1150         }
1151 #endif
1152         CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
1153 #endif
1154 
1155         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
1156 
1157         if (mrclDesc.secondaryCommandBuffer) {
1158             commandBufferSubmitter_.commandBuffers[cmdBufIdx] = {};
1159         } else {
1160             commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
1161         }
1162     }
1163 }
1164 
1165 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1166     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1167 {
1168     const RenderHandle psoHandle = renderCmd.psoHandle;
1169     const VkPipelineBindPoint pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
1170 
1171     stateCache.psoHandle = psoHandle;
1172 
1173     VkPipeline pipeline { VK_NULL_HANDLE };
1174     VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
1175     if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
1176         const ComputePipelineStateObjectVk* pso = static_cast<const ComputePipelineStateObjectVk*>(
1177             psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
1178         if (pso) {
1179             const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1180             pipeline = plat.pipeline;
1181             pipelineLayout = plat.pipelineLayout;
1182         }
1183     } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
1184         PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1185         if (stateCache.renderCommandBeginRenderPass) {
1186             uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
1187             if (stateCache.pipelineDescSetHash != 0) {
1188                 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
1189             }
1190             const GraphicsPipelineStateObjectVk* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
1191                 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
1192                     stateCache.renderCommandBeginRenderPass->subpasses,
1193                     stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
1194                     &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
1195             if (pso) {
1196                 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1197                 pipeline = plat.pipeline;
1198                 pipelineLayout = plat.pipelineLayout;
1199             }
1200         }
1201     }
1202 
1203     // NOTE: render front-end expects pso binding after begin render pass
1204     // in some situations the render pass might change and therefore the pipeline changes
1205     // in some situations the render pass is the same and the rebinding is not needed
1206     const bool newPipeline = (pipeline != stateCache.pipeline) ? true : false;
1207     const bool valid = (pipeline != VK_NULL_HANDLE) ? true : false;
1208     if (valid && newPipeline) {
1209         stateCache.pipeline = pipeline;
1210         stateCache.pipelineLayout = pipelineLayout;
1211         stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
1212         vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
1213             pipelineBindPoint,                  // pipelineBindPoint
1214             pipeline);                          // pipeline
1215 #if (RENDER_PERF_ENABLED == 1)
1216         stateCache.perfCounters.bindPipelineCount++;
1217 #endif
1218     }
1219 }
1220 
1221 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1222     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1223 {
1224     if (stateCache.validBindings) {
1225         if (renderCmd.indexCount) {
1226             vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
1227                 renderCmd.indexCount,              // indexCount
1228                 renderCmd.instanceCount,           // instanceCount
1229                 renderCmd.firstIndex,              // firstIndex
1230                 renderCmd.vertexOffset,            // vertexOffset
1231                 renderCmd.firstInstance);          // firstInstance
1232 #if (RENDER_PERF_ENABLED == 1)
1233             stateCache.perfCounters.drawCount++;
1234             stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1235             stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1236 #endif
1237         } else {
1238             vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
1239                 renderCmd.vertexCount,      // vertexCount
1240                 renderCmd.instanceCount,    // instanceCount
1241                 renderCmd.firstVertex,      // firstVertex
1242                 renderCmd.firstInstance);   // firstInstance
1243 #if (RENDER_PERF_ENABLED == 1)
1244             stateCache.perfCounters.drawCount++;
1245             stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1246             stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
1247                                                      * renderCmd.instanceCount;
1248 #endif
1249         }
1250     }
1251 }
1252 
1253 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1254     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1255 {
1256     if (stateCache.validBindings) {
1257         if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1258             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1259             const VkBuffer buffer = plat.buffer;
1260             const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1261             if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1262                 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
1263                     buffer,                                    // buffer
1264                     offset,                                    // offset
1265                     renderCmd.drawCount,                       // drawCount
1266                     renderCmd.stride);                         // stride
1267             } else {
1268                 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
1269                     buffer,                             // buffer
1270                     (VkDeviceSize)renderCmd.offset,     // offset
1271                     renderCmd.drawCount,                // drawCount
1272                     renderCmd.stride);                  // stride
1273             }
1274 #if (RENDER_PERF_ENABLED == 1)
1275             stateCache.perfCounters.drawIndirectCount++;
1276 #endif
1277         }
1278     }
1279 }
1280 
1281 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1282     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1283 {
1284     if (stateCache.validBindings) {
1285         vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
1286             renderCmd.groupCountX,          // groupCountX
1287             renderCmd.groupCountY,          // groupCountY
1288             renderCmd.groupCountZ);         // groupCountZ
1289 #if (RENDER_PERF_ENABLED == 1)
1290         stateCache.perfCounters.dispatchCount++;
1291 #endif
1292     }
1293 }
1294 
1295 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
1296     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1297     const StateCache& stateCache)
1298 {
1299     if (stateCache.validBindings) {
1300         if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1301             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1302             const VkBuffer buffer = plat.buffer;
1303             const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1304             vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
1305                 buffer,                                 // buffer
1306                 offset);                                // offset
1307 #if (RENDER_PERF_ENABLED == 1)
1308             stateCache.perfCounters.dispatchIndirectCount++;
1309 #endif
1310         }
1311     }
1312 }
1313 
1314 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1315     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1316     StateCache& stateCache)
1317 {
1318     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1319     stateCache.renderCommandBeginRenderPass = &renderCmd;
1320 
1321     NodeContextPoolManagerVk& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1322     // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1323     stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1324 
1325     // early out for multi render command list render pass
1326     if (stateCache.secondaryCommandBuffer) {
1327         return; // early out
1328     }
1329     const bool validRpFbo = (stateCache.lowLevelRenderPassData.renderPass != VK_NULL_HANDLE) &&
1330                             (stateCache.lowLevelRenderPassData.framebuffer != VK_NULL_HANDLE);
1331     // invalidate the whole command list
1332     if (!validRpFbo) {
1333         stateCache.validCommandList = false;
1334         return; // early out
1335     }
1336 
1337     if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1338 #if (RENDER_VULKAN_COMBINE_MULTI_COMMAND_LIST_MSAA_SUBPASSES_ENABLED == 1)
1339         // fix for e.g. moltenvk msaa resolve not working with mac (we do not execute subpasses)
1340         if ((!stateCache.renderCommandBeginRenderPass->subpasses.empty()) &&
1341             stateCache.renderCommandBeginRenderPass->subpasses[0].resolveAttachmentCount == 0) {
1342             const VkSubpassContents subpassContents =
1343                 static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1344             vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1345                 subpassContents);                  // contents
1346         }
1347 #else
1348         const VkSubpassContents subpassContents =
1349             static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1350         vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1351             subpassContents);                  // contents
1352 #endif
1353         return; // early out
1354     }
1355 
1356     const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1357 
1358     VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1359     bool hasClearValues = false;
1360     for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1361         const auto& ref = renderPassDesc.attachments[idx];
1362         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1363             ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1364             const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1365             VkClearValue clearValue;
1366             if (RenderHandleUtil::IsDepthImage(handle)) {
1367                 PLUGIN_STATIC_ASSERT(sizeof(clearValue.depthStencil) == sizeof(ref.clearValue.depthStencil));
1368                 clearValue.depthStencil.depth = ref.clearValue.depthStencil.depth;
1369                 clearValue.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1370             } else {
1371                 PLUGIN_STATIC_ASSERT(sizeof(clearValue.color) == sizeof(ref.clearValue.color));
1372                 if (!CloneData(&clearValue.color, sizeof(clearValue.color), &ref.clearValue.color,
1373                         sizeof(ref.clearValue.color))) {
1374                     PLUGIN_LOG_E("Copying of clearValue.color failed.");
1375                 }
1376             }
1377             clearValues[idx] = clearValue;
1378             hasClearValues = true;
1379         }
1380     }
1381 
1382     // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1383     // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1384     const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1385 
1386     VkRect2D renderArea {
1387         { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1388         { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1389     };
1390     // render area needs to be inside frame buffer
1391     const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1392     renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1393     renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1394     renderArea.extent.width = Math::min(renderArea.extent.width,
1395         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1396     renderArea.extent.height = Math::min(renderArea.extent.height,
1397         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1398 
1399     const VkRenderPassBeginInfo renderPassBeginInfo {
1400         VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,      // sType
1401         nullptr,                                       // pNext
1402         stateCache.lowLevelRenderPassData.renderPass,  // renderPass
1403         stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1404         renderArea,                                    // renderArea
1405         clearValueCount,                               // clearValueCount
1406         clearValues,                                   // pClearValues
1407     };
1408 
1409     // NOTE: could be patched in render graph
1410     const VkSubpassContents subpassContents =
1411         stateCache.primaryRenderPass ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE;
1412     vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1413         &renderPassBeginInfo,                  // pRenderPassBegin
1414         subpassContents);                      // contents
1415 #if (RENDER_PERF_ENABLED == 1)
1416     stateCache.perfCounters.renderPassCount++;
1417 #endif
1418 }
1419 
1420 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1421     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1422 {
1423     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1424 
1425     const VkSubpassContents subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1426     vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1427         subpassContents);                  // contents
1428 }
1429 
1430 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1431     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1432 {
1433     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1434 
1435     // early out for multi render command list render pass
1436     if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1437         return; // NOTE
1438     }
1439 
1440     stateCache.renderCommandBeginRenderPass = nullptr;
1441     stateCache.lowLevelRenderPassData = {};
1442 
1443     if (!stateCache.secondaryCommandBuffer) {
1444         vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1445     }
1446 }
1447 
1448 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1449     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1450     const StateCache& stateCache)
1451 {
1452     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1453     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1454 
1455     const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1456 
1457     VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1458     VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1459     const GpuBufferVk* gpuBuffer = nullptr;
1460     RenderHandle currBufferHandle;
1461     for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1462         const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1463         // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1464         // do not re-fetch the buffer if not needed
1465         if (currBufferHandle.id != currVb.bufferHandle.id) {
1466             currBufferHandle = currVb.bufferHandle;
1467             gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1468         }
1469         if (gpuBuffer) {
1470             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1471             const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1472             vertexBuffers[idx] = plat.buffer;
1473             offsets[idx] = offset;
1474         }
1475     }
1476 
1477     vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1478         0,                                       // firstBinding
1479         vertexBufferCount,                       // bindingCount
1480         vertexBuffers,                           // pBuffers
1481         offsets);                                // pOffsets
1482 }
1483 
1484 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1485     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1486     const StateCache& stateCache)
1487 {
1488     const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1489 
1490     PLUGIN_ASSERT(gpuBuffer);
1491     if (gpuBuffer) {
1492         const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1493         const VkBuffer buffer = plat.buffer;
1494         const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1495         const VkIndexType indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1496 
1497         vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1498             buffer,                                // buffer
1499             offset,                                // offset
1500             indexType);                            // indexType
1501     }
1502 }
1503 
1504 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1505     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1506 {
1507     const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1508     const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1509     if (srcImagePtr && dstImagePtr) {
1510         const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1511         const GpuImagePlatformDataVk& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1512 
1513         const ImageBlit& ib = renderCmd.imageBlit;
1514         const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1515                                            ? srcPlatImage.arrayLayers
1516                                            : ib.srcSubresource.layerCount;
1517         const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1518                                            ? dstPlatImage.arrayLayers
1519                                            : ib.dstSubresource.layerCount;
1520 
1521         const VkImageSubresourceLayers srcSubresourceLayers {
1522             (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1523             ib.srcSubresource.mipLevel,                             // mipLevel
1524             ib.srcSubresource.baseArrayLayer,                       // baseArrayLayer
1525             srcLayerCount,                                          // layerCount
1526         };
1527         const VkImageSubresourceLayers dstSubresourceLayers {
1528             (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1529             ib.dstSubresource.mipLevel,                             // mipLevel
1530             ib.dstSubresource.baseArrayLayer,                       // baseArrayLayer
1531             dstLayerCount,                                          // layerCount
1532         };
1533 
1534         const VkImageBlit imageBlit {
1535             srcSubresourceLayers, // srcSubresource
1536             { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1537                 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1538                     (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1539             dstSubresourceLayers,                        // dstSubresource
1540             { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1541                 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1542                     (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1543         };
1544 
1545         vkCmdBlitImage(cmdBuf.commandBuffer,         // commandBuffer
1546             srcPlatImage.image,                      // srcImage
1547             (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1548             dstPlatImage.image,                      // dstImage
1549             (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1550             1,                                       // regionCount
1551             &imageBlit,                              // pRegions
1552             (VkFilter)renderCmd.filter);             // filter
1553     }
1554 }
1555 
1556 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1557     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1558 {
1559     const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1560     const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1561 
1562     PLUGIN_ASSERT(srcGpuBuffer);
1563     PLUGIN_ASSERT(dstGpuBuffer);
1564 
1565     if (srcGpuBuffer && dstGpuBuffer) {
1566         const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1567         const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1568         const VkBufferCopy bufferCopy {
1569             renderCmd.bufferCopy.srcOffset,
1570             renderCmd.bufferCopy.dstOffset,
1571             renderCmd.bufferCopy.size,
1572         };
1573 
1574         if (bufferCopy.size > 0) {
1575             vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1576                 srcBuffer,                        // srcBuffer
1577                 dstBuffer,                        // dstBuffer
1578                 1,                                // regionCount
1579                 &bufferCopy);                     // pRegions
1580         }
1581     }
1582 }
1583 
1584 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1585     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1586     const StateCache& stateCache)
1587 {
1588     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1589         PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1590         return;
1591     }
1592 
1593     const GpuBufferVk* gpuBuffer = nullptr;
1594     const GpuImageVk* gpuImage = nullptr;
1595     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1596         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1597         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1598     } else {
1599         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1600         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1601     }
1602 
1603     if (gpuBuffer && gpuImage) {
1604         const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1605         const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1606         const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1607         const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1608                                         ? platImage.arrayLayers
1609                                         : subresourceLayer.layerCount;
1610         const VkImageSubresourceLayers imageSubresourceLayer {
1611             (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1612             subresourceLayer.mipLevel,
1613             subresourceLayer.baseArrayLayer,
1614             layerCount,
1615         };
1616         const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1617         // Math::min to force staying inside image
1618         const uint32_t mip = subresourceLayer.mipLevel;
1619         const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1620         const Size3D& imageOffset = bufferImageCopy.imageOffset;
1621         const VkExtent3D imageExtent = {
1622             Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1623             Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1624             Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1625         };
1626         const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1627                            (imageOffset.depth < imageSize.depth);
1628         const VkBufferImageCopy bufferImageCopyVk {
1629             bufferImageCopy.bufferOffset,
1630             bufferImageCopy.bufferRowLength,
1631             bufferImageCopy.bufferImageHeight,
1632             imageSubresourceLayer,
1633             { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1634                 static_cast<int32_t>(imageOffset.depth) },
1635             imageExtent,
1636         };
1637 
1638         const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1639         const VkImage image = (gpuImage->GetPlatformData()).image;
1640 
1641         if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1642             vkCmdCopyBufferToImage(cmdBuf.commandBuffer,             // commandBuffer
1643                 buffer,                                              // srcBuffer
1644                 image,                                               // dstImage
1645                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1646                 1,                                                   // regionCount
1647                 &bufferImageCopyVk);                                 // pRegions
1648         } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1649             vkCmdCopyImageToBuffer(cmdBuf.commandBuffer,             // commandBuffer
1650                 image,                                               // srcImage
1651                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1652                 buffer,                                              // dstBuffer
1653                 1,                                                   // regionCount
1654                 &bufferImageCopyVk);                                 // pRegions
1655         }
1656     }
1657 }
1658 
1659 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1660     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1661 {
1662     const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1663     const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1664     if (srcGpuImage && dstGpuImage) {
1665         const ImageCopy& copy = renderCmd.imageCopy;
1666         const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1667         const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1668 
1669         const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1670         const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1671         const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1672                                            ? srcPlatImage.arrayLayers
1673                                            : srcSubresourceLayer.layerCount;
1674         const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1675                                            ? dstPlatImage.arrayLayers
1676                                            : dstSubresourceLayer.layerCount;
1677 
1678         const VkImageSubresourceLayers srcImageSubresourceLayer {
1679             (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1680             srcSubresourceLayer.mipLevel,
1681             srcSubresourceLayer.baseArrayLayer,
1682             srcLayerCount,
1683         };
1684         const VkImageSubresourceLayers dstImageSubresourceLayer {
1685             (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1686             dstSubresourceLayer.mipLevel,
1687             dstSubresourceLayer.baseArrayLayer,
1688             dstLayerCount,
1689         };
1690 
1691         const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1692         const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1693 
1694         VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1695         ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1696         ext.height =
1697             Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1698         ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1699 
1700         const VkImageCopy imageCopyVk {
1701             srcImageSubresourceLayer,                                 // srcSubresource
1702             { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1703             dstImageSubresourceLayer,                                 // dstSubresource
1704             { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1705             ext,                                                      // extent
1706         };
1707         vkCmdCopyImage(cmdBuf.commandBuffer,                     // commandBuffer
1708             srcPlatImage.image,                                  // srcImage
1709             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1710             dstPlatImage.image,                                  // dstImage
1711             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1712             1,                                                   // regionCount
1713             &imageCopyVk);                                       // pRegions
1714     }
1715 }
1716 
1717 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1718     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1719     const RenderBarrierList& rbl)
1720 {
1721     if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1722         return;
1723     }
1724 
1725     const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1726         rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1727     PLUGIN_ASSERT(barrierPointBarriers);
1728     if (!barrierPointBarriers) {
1729         return;
1730     }
1731     constexpr uint32_t maxBarrierCount { 8 };
1732     VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1733     VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1734     VkMemoryBarrier memoryBarriers[maxBarrierCount];
1735 
1736     // generally there is only single barrierListCount per barrier point
1737     // in situations with batched render passes there can be many
1738     // NOTE: all barrier lists could be patched to single vk command if needed
1739     // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1740     const uint32_t barrierListCount = (uint32_t)barrierPointBarriers->barrierListCount;
1741     const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1742 #if (RENDER_VALIDATION_ENABLED == 1)
1743     uint32_t fullBarrierCount = 0u;
1744 #endif
1745     for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1746         if (nextBarrierList == nullptr) { // cannot be null, just a safety
1747             PLUGIN_ASSERT(false);
1748             return;
1749         }
1750         const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1751         nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1752         const uint32_t barrierCount = (uint32_t)barrierListRef.count;
1753 
1754         uint32_t bufferBarrierIdx = 0;
1755         uint32_t imageBarrierIdx = 0;
1756         uint32_t memoryBarrierIdx = 0;
1757 
1758         VkPipelineStageFlags srcPipelineStageMask { 0 };
1759         VkPipelineStageFlags dstPipelineStageMask { 0 };
1760         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1761 
1762         for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1763             const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1764 
1765             uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1766             uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1767             if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1768                 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1769                 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1770             }
1771 
1772             const RenderHandle resourceHandle = ref.resourceHandle;
1773             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1774 
1775             PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1776                           (handleType == RenderHandleType::GPU_IMAGE));
1777 
1778             const VkAccessFlags srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1779             const VkAccessFlags dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1780 
1781             srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1782             dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1783 
1784             // NOTE: zero size buffer barriers allowed ATM
1785             if (handleType == RenderHandleType::GPU_BUFFER) {
1786                 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle); gpuBuffer) {
1787                     const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1788                     // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1789                     const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1790                     const VkDeviceSize size =
1791                         Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1792                             (VkDeviceSize)ref.dst.optionalByteSize);
1793                     if (platBuffer.buffer) {
1794                         bufferMemoryBarriers[bufferBarrierIdx++] = {
1795                             VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1796                             nullptr,                                 // pNext
1797                             srcAccessMask,                           // srcAccessMask
1798                             dstAccessMask,                           // dstAccessMask
1799                             srcQueueFamilyIndex,                     // srcQueueFamilyIndex
1800                             dstQueueFamilyIndex,                     // dstQueueFamilyIndex
1801                             platBuffer.buffer,                       // buffer
1802                             offset,                                  // offset
1803                             size,                                    // size
1804                         };
1805                     }
1806                 }
1807             } else if (handleType == RenderHandleType::GPU_IMAGE) {
1808                 if (const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle); gpuImage) {
1809                     const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1810 
1811                     const VkImageLayout srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1812                     const VkImageLayout dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1813 
1814                     const VkImageAspectFlags imageAspectFlags =
1815                         (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1816                             ? platImage.aspectFlags
1817                             : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1818 
1819                     const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1820                                                     PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1821                                                     ? VK_REMAINING_MIP_LEVELS
1822                                                     : ref.src.optionalImageSubresourceRange.levelCount;
1823 
1824                     const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1825                                                     PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1826                                                     ? VK_REMAINING_ARRAY_LAYERS
1827                                                     : ref.src.optionalImageSubresourceRange.layerCount;
1828 
1829                     const VkImageSubresourceRange imageSubresourceRange {
1830                         imageAspectFlags,                                     // aspectMask
1831                         ref.src.optionalImageSubresourceRange.baseMipLevel,   // baseMipLevel
1832                         levelCount,                                           // levelCount
1833                         ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1834                         layerCount,                                           // layerCount
1835                     };
1836 
1837                     if (platImage.image) {
1838                         imageMemoryBarriers[imageBarrierIdx++] = {
1839                             VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1840                             nullptr,                                // pNext
1841                             srcAccessMask,                          // srcAccessMask
1842                             dstAccessMask,                          // dstAccessMask
1843                             srcImageLayout,                         // oldLayout
1844                             dstImageLayout,                         // newLayout
1845                             srcQueueFamilyIndex,                    // srcQueueFamilyIndex
1846                             dstQueueFamilyIndex,                    // dstQueueFamilyIndex
1847                             platImage.image,                        // image
1848                             imageSubresourceRange,                  // subresourceRange
1849                         };
1850                     }
1851                 }
1852             } else {
1853                 memoryBarriers[memoryBarrierIdx++] = {
1854                     VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1855                     nullptr,                          // pNext
1856                     srcAccessMask,                    // srcAccessMask
1857                     dstAccessMask,                    // dstAccessMask
1858                 };
1859             }
1860 
1861             const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1862             const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1863                                            (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)))
1864                                            ? true
1865                                            : false;
1866 
1867             if (hasBarriers && resetBarriers) {
1868 #if (RENDER_VALIDATION_ENABLED == 1)
1869                 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1870 #endif
1871                 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1872                     srcPipelineStageMask,                  // srcStageMask
1873                     dstPipelineStageMask,                  // dstStageMask
1874                     dependencyFlags,                       // dependencyFlags
1875                     memoryBarrierIdx,                      // memoryBarrierCount
1876                     memoryBarriers,                        // pMemoryBarriers
1877                     bufferBarrierIdx,                      // bufferMemoryBarrierCount
1878                     bufferMemoryBarriers,                  // pBufferMemoryBarriers
1879                     imageBarrierIdx,                       // imageMemoryBarrierCount
1880                     imageMemoryBarriers);                  // pImageMemoryBarriers
1881 
1882                 bufferBarrierIdx = 0;
1883                 imageBarrierIdx = 0;
1884                 memoryBarrierIdx = 0;
1885             }
1886         }
1887     }
1888 #if (RENDER_VALIDATION_ENABLED == 1)
1889     if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1890         PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1891             "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1892             barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1893     }
1894 #endif
1895 }
1896 
1897 namespace {
1898 struct DescriptorSetUpdateDataStruct {
1899     uint32_t accelIndex { 0U };
1900     uint32_t bufferIndex { 0U };
1901     uint32_t imageIndex { 0U };
1902     uint32_t samplerIndex { 0U };
1903     uint32_t writeBindIdx { 0U };
1904 };
1905 } // namespace
1906 
1907 void RenderBackendVk::UpdateCommandListDescriptorSets(
1908     const RenderCommandList& renderCommandList, StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
1909 {
1910     NodeContextDescriptorSetManagerVk& ctxDescMgr = (NodeContextDescriptorSetManagerVk&)ncdsm;
1911 
1912     const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
1913     const uint32_t upDescriptorSetCount = static_cast<uint32_t>(allDescSets.size());
1914     LowLevelContextDescriptorWriteDataVk& wd = ctxDescMgr.GetLowLevelDescriptorWriteData();
1915     DescriptorSetUpdateDataStruct dsud;
1916     for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
1917         if ((descIdx >= static_cast<uint32_t>(wd.writeDescriptorSets.size())) ||
1918             (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET)) {
1919             continue;
1920         }
1921         const RenderHandle descHandle = allDescSets[descIdx];
1922         // first update gpu descriptor indices
1923         ncdsm.UpdateDescriptorSetGpuHandle(descHandle);
1924 
1925         // actual vulkan descriptor set update
1926         const LowLevelDescriptorSetVk* descriptorSet = ctxDescMgr.GetDescriptorSet(descHandle);
1927         if (descriptorSet && descriptorSet->descriptorSet) {
1928             const DescriptorSetLayoutBindingResources bindingResources = ncdsm.GetCpuDescriptorSetData(descHandle);
1929 #if (RENDER_VALIDATION_ENABLED == 1)
1930             // get descriptor counts
1931             const LowLevelDescriptorCountsVk& descriptorCounts = ctxDescMgr.GetLowLevelDescriptorCounts(descHandle);
1932             if ((uint32_t)bindingResources.bindings.size() > descriptorCounts.writeDescriptorCount) {
1933                 PLUGIN_LOG_E("RENDER_VALIDATION: update descriptor set bindings exceed descriptor set bindings");
1934             }
1935 #endif
1936             if ((uint32_t)bindingResources.bindings.size() >
1937                 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
1938                 PLUGIN_ASSERT(false);
1939                 continue;
1940             }
1941             const auto& buffers = bindingResources.buffers;
1942             const auto& images = bindingResources.images;
1943             const auto& samplers = bindingResources.samplers;
1944             for (const auto& ref : buffers) {
1945                 const uint32_t descriptorCount = ref.binding.descriptorCount;
1946                 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1947                 if (descriptorCount == 0) {
1948                     continue;
1949                 }
1950                 const uint32_t arrayOffset = ref.arrayOffset;
1951                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1952                 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1953 #if (RENDER_VULKAN_RT_ENABLED == 1)
1954                     for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1955                         // first is the ref, starting from 1 we use array offsets
1956                         const BindableBuffer& bRes =
1957                             (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1958                         if (const GpuBufferVk* resPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1959                             const GpuAccelerationStructurePlatformDataVk& platAccel =
1960                                 resPtr->GetPlatformDataAccelerationStructure();
1961                             wd.descriptorAccelInfos[dsud.accelIndex + idx] = {
1962                                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1963                                 nullptr,                                                           // pNext
1964                                 descriptorCount,                  // accelerationStructureCount
1965                                 &platAccel.accelerationStructure, // pAccelerationStructures
1966                             };
1967                         }
1968                     }
1969                     wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1970                         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
1971                         &wd.descriptorAccelInfos[dsud.accelIndex],    // pNext
1972                         descriptorSet->descriptorSet,                 // dstSet
1973                         ref.binding.binding,                          // dstBinding
1974                         0,                                            // dstArrayElement
1975                         descriptorCount,                              // descriptorCount
1976                         (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1977                         nullptr,                                      // pImageInfo
1978                         nullptr,                                      // pBufferInfo
1979                         nullptr,                                      // pTexelBufferView
1980                     };
1981                     dsud.accelIndex += descriptorCount;
1982 #endif
1983                 } else {
1984                     for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1985                         // first is the ref, starting from 1 we use array offsets
1986                         const BindableBuffer& bRes =
1987                             (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1988                         const VkDeviceSize optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1989                         if (const GpuBufferVk* resPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1990                             const GpuBufferPlatformDataVk& platBuffer = resPtr->GetPlatformData();
1991                             // takes into account dynamic ring buffers with mapping
1992                             const VkDeviceSize bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1993                             const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1994                             const VkDeviceSize bufferRange =
1995                                 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1996                                     (VkDeviceSize)bRes.byteSize);
1997                             wd.descriptorBufferInfos[dsud.bufferIndex + idx] = {
1998                                 platBuffer.buffer, // buffer
1999                                 byteOffset,        // offset
2000                                 bufferRange,       // range
2001                             };
2002                         }
2003                     }
2004                     wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2005                         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
2006                         nullptr,                                      // pNext
2007                         descriptorSet->descriptorSet,                 // dstSet
2008                         ref.binding.binding,                          // dstBinding
2009                         0,                                            // dstArrayElement
2010                         descriptorCount,                              // descriptorCount
2011                         (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2012                         nullptr,                                      // pImageInfo
2013                         &wd.descriptorBufferInfos[dsud.bufferIndex],  // pBufferInfo
2014                         nullptr,                                      // pTexelBufferView
2015                     };
2016                     dsud.bufferIndex += descriptorCount;
2017                 }
2018             }
2019             for (const auto& ref : images) {
2020                 const uint32_t descriptorCount = ref.binding.descriptorCount;
2021                 // skip, array bindings which are bound from first index have also descriptorCount 0
2022                 if (descriptorCount == 0) {
2023                     continue;
2024                 }
2025                 const VkDescriptorType descriptorType = (VkDescriptorType)ref.binding.descriptorType;
2026                 const uint32_t arrayOffset = ref.arrayOffset;
2027                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
2028                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2029                     // first is the ref, starting from 1 we use array offsets
2030                     const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].resource;
2031                     if (const GpuImageVk* resPtr = gpuResourceMgr_.GetImage<GpuImageVk>(bRes.handle); resPtr) {
2032                         VkSampler sampler = VK_NULL_HANDLE;
2033                         if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2034                             const GpuSamplerVk* samplerPtr =
2035                                 gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
2036                             if (samplerPtr) {
2037                                 sampler = samplerPtr->GetPlatformData().sampler;
2038                             }
2039                         }
2040                         const GpuImagePlatformDataVk& platImage = resPtr->GetPlatformData();
2041                         const GpuImagePlatformDataViewsVk& platImageViews = resPtr->GetPlatformDataViews();
2042                         VkImageView imageView = platImage.imageView;
2043                         if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2044                             (bRes.layer < platImageViews.layerImageViews.size())) {
2045                             imageView = platImageViews.layerImageViews[bRes.layer];
2046                         } else if (bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) {
2047                             if ((bRes.layer == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2048                                 (bRes.mip < platImageViews.mipImageAllLayerViews.size())) {
2049                                 imageView = platImageViews.mipImageAllLayerViews[bRes.mip];
2050                             } else if (bRes.mip < platImageViews.mipImageViews.size()) {
2051                                 imageView = platImageViews.mipImageViews[bRes.mip];
2052                             }
2053                         }
2054                         wd.descriptorImageInfos[dsud.imageIndex + idx] = {
2055                             sampler,                         // sampler
2056                             imageView,                       // imageView
2057                             (VkImageLayout)bRes.imageLayout, // imageLayout
2058                         };
2059                     }
2060                 }
2061                 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2062                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,    // sType
2063                     nullptr,                                   // pNext
2064                     descriptorSet->descriptorSet,              // dstSet
2065                     ref.binding.binding,                       // dstBinding
2066                     0,                                         // dstArrayElement
2067                     descriptorCount,                           // descriptorCount
2068                     descriptorType,                            // descriptorType
2069                     &wd.descriptorImageInfos[dsud.imageIndex], // pImageInfo
2070                     nullptr,                                   // pBufferInfo
2071                     nullptr,                                   // pTexelBufferView
2072                 };
2073                 dsud.imageIndex += descriptorCount;
2074             }
2075             for (const auto& ref : samplers) {
2076                 const uint32_t descriptorCount = ref.binding.descriptorCount;
2077                 // skip, array bindings which are bound from first index have also descriptorCount 0
2078                 if (descriptorCount == 0) {
2079                     continue;
2080                 }
2081                 const uint32_t arrayOffset = ref.arrayOffset;
2082                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
2083                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2084                     // first is the ref, starting from 1 we use array offsets
2085                     const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].resource;
2086                     if (const GpuSamplerVk* resPtr = gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.handle); resPtr) {
2087                         const GpuSamplerPlatformDataVk& platSampler = resPtr->GetPlatformData();
2088                         wd.descriptorSamplerInfos[dsud.samplerIndex + idx] = {
2089                             platSampler.sampler,      // sampler
2090                             VK_NULL_HANDLE,           // imageView
2091                             VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
2092                         };
2093                     }
2094                 }
2095                 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2096                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,        // sType
2097                     nullptr,                                       // pNext
2098                     descriptorSet->descriptorSet,                  // dstSet
2099                     ref.binding.binding,                           // dstBinding
2100                     0,                                             // dstArrayElement
2101                     descriptorCount,                               // descriptorCount
2102                     (VkDescriptorType)ref.binding.descriptorType,  // descriptorType
2103                     &wd.descriptorSamplerInfos[dsud.samplerIndex], // pImageInfo
2104                     nullptr,                                       // pBufferInfo
2105                     nullptr,                                       // pTexelBufferView
2106                 };
2107                 dsud.samplerIndex += descriptorCount;
2108             }
2109 
2110 #if (RENDER_PERF_ENABLED == 1)
2111             // count the actual updated descriptors sets, not the api calls
2112             stateCache.perfCounters.updateDescriptorSetCount++;
2113 #endif
2114         }
2115     }
2116     // update if the batch ended or we are the last descriptor set
2117     if (dsud.writeBindIdx > 0U) {
2118         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2119         vkUpdateDescriptorSets(device,     // device
2120             dsud.writeBindIdx,             // descriptorWriteCount
2121             wd.writeDescriptorSets.data(), // pDescriptorWrites
2122             0,                             // descriptorCopyCount
2123             nullptr);                      // pDescriptorCopies
2124     }
2125 }
2126 
2127 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
2128     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2129     StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
2130 {
2131     const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
2132 
2133     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2134     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
2135     const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
2136                                                       ? VK_PIPELINE_BIND_POINT_COMPUTE
2137                                                       : VK_PIPELINE_BIND_POINT_GRAPHICS;
2138     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2139 
2140     bool valid = (pipelineLayout != VK_NULL_HANDLE) ? true : false;
2141     const uint32_t firstSet = renderCmd.firstSet;
2142     const uint32_t setCount = renderCmd.setCount;
2143     if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
2144         uint32_t combinedDynamicOffsetCount = 0;
2145         uint32_t dynamicOffsetDescriptorSetIndices = 0;
2146         uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
2147 
2148         VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2149         const uint32_t firstPlusCount = firstSet + setCount;
2150         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2151             const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2152             if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
2153                 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
2154                 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
2155                 combinedDynamicOffsetCount += dynamicDescriptorCount;
2156 
2157                 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
2158                 if (descriptorSet && descriptorSet->descriptorSet) {
2159                     descriptorSets[idx] = descriptorSet->descriptorSet;
2160                     // update, copy to state cache
2161                     PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
2162                     stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
2163                     const uint32_t currShift = (idx * 16u);
2164                     const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
2165                     uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
2166                     stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
2167                 } else {
2168                     valid = false;
2169                 }
2170             }
2171         }
2172 
2173         uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT *
2174                                 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2175         uint32_t dynamicOffsetIdx = 0;
2176         // NOTE: optimize
2177         // this code has some safety checks that the offset is not updated for non-dynamic sets
2178         // it could be left on only for validation
2179         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2180             if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
2181                 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2182                 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
2183                 const uint32_t dodResCount = static_cast<uint32_t>(dod.resources.size());
2184                 const auto& descriptorSetDynamicOffsets = renderCmd.descriptorSetDynamicOffsets[idx];
2185                 for (uint32_t dodIdx = 0U; dodIdx < dodResCount; ++dodIdx) {
2186                     uint32_t byteOffset = 0U;
2187                     if (dodIdx < descriptorSetDynamicOffsets.dynamicOffsetCount) {
2188                         byteOffset = descriptorSetDynamicOffsets.dynamicOffsets[dynamicOffsetIdx];
2189                     }
2190                     dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
2191                 }
2192             }
2193         }
2194 
2195         stateCache.validBindings = valid;
2196         if (stateCache.validBindings) {
2197             if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
2198                 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
2199                     pipelineBindPoint,                        // pipelineBindPoint
2200                     pipelineLayout,                           // layout
2201                     firstSet,                                 // firstSet
2202                     setCount,                                 // descriptorSetCount
2203                     &descriptorSets[firstSet],                // pDescriptorSets
2204                     dynamicOffsetIdx,                         // dynamicOffsetCount
2205                     dynamicOffsets);                          // pDynamicOffsets
2206 #if (RENDER_PERF_ENABLED == 1)
2207                 stateCache.perfCounters.bindDescriptorSetCount++;
2208 #endif
2209             } else {
2210                 // possible pso re-creation and bind of these sets to the new pso
2211                 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
2212                     (PipelineBindPoint)pipelineBindPoint };
2213                 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
2214                 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
2215             }
2216         }
2217     }
2218 }
2219 
2220 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2221     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2222 {
2223     PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
2224     PLUGIN_ASSERT(renderCmd.data);
2225 
2226     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2227     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2228 
2229     const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0)) ? true : false;
2230     PLUGIN_ASSERT(valid);
2231 
2232     if (valid) {
2233         const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
2234         vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
2235             pipelineLayout,                      // layout
2236             shaderStageFlags,                    // stageFlags
2237             0,                                   // offset
2238             renderCmd.pushConstant.byteSize,     // size
2239             static_cast<void*>(renderCmd.data)); // pValues
2240     }
2241 }
2242 
2243 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
2244     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2245     const StateCache& stateCache)
2246 {
2247 #if (RENDER_VULKAN_RT_ENABLED == 1)
2248     // NOTE: missing
2249     const GpuBufferVk* dst = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.dstAccelerationStructure);
2250     const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.scratchBuffer);
2251     if (dst && scratchBuffer) {
2252         const DevicePlatformDataVk& devicePlat = deviceVk_.GetPlatformDataVk();
2253         const VkDevice device = devicePlat.device;
2254 
2255         const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformDataAccelerationStructure();
2256         const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
2257 
2258         // scratch data with user offset
2259         const VkDeviceAddress scratchData { GetBufferDeviceAddress(device, scratchBuffer->GetPlatformData().buffer) +
2260                                             VkDeviceSize(renderCmd.scratchOffset) };
2261 
2262         const size_t arraySize =
2263             renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
2264         vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
2265         vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
2266 
2267         size_t arrayIndex = 0;
2268         for (const auto& trianglesRef : renderCmd.trianglesView) {
2269             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2270                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2271                 nullptr,                                               // pNext
2272                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR,     // geometryType
2273                 {},                                                    // geometry;
2274                 0,                                                     // flags
2275             };
2276             uint32_t primitiveCount = 0;
2277             const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.vertexData.handle);
2278             const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.indexData.handle);
2279             if (vb && ib) {
2280                 const VkDeviceOrHostAddressConstKHR vertexData { GetBufferDeviceAddress(
2281                     device, vb->GetPlatformData().buffer) };
2282                 const VkDeviceOrHostAddressConstKHR indexData { GetBufferDeviceAddress(
2283                     device, ib->GetPlatformData().buffer) };
2284                 VkDeviceOrHostAddressConstKHR transformData {};
2285                 if (RenderHandleUtil::IsValid(trianglesRef.transformData.handle)) {
2286                     if (const GpuBufferVk* tr =
2287                             gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.transformData.handle);
2288                         tr) {
2289                         transformData.deviceAddress = { GetBufferDeviceAddress(device, ib->GetPlatformData().buffer) };
2290                     }
2291                 }
2292                 primitiveCount = trianglesRef.info.indexCount / 3u; // triangles
2293 
2294                 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2295                 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
2296                     VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
2297                     nullptr,                                                              // pNext
2298                     VkFormat(trianglesRef.info.vertexFormat),                             // vertexFormat
2299                     vertexData,                                                           // vertexData
2300                     VkDeviceSize(trianglesRef.info.vertexStride),                         // vertexStride
2301                     trianglesRef.info.maxVertex,                                          // maxVertex
2302                     VkIndexType(trianglesRef.info.indexType),                             // indexType
2303                     indexData,                                                            // indexData
2304                     transformData,                                                        // transformData
2305                 };
2306             }
2307             buildRangeInfos[arrayIndex] = {
2308                 primitiveCount, // primitiveCount
2309                 0u,             // primitiveOffset
2310                 0u,             // firstVertex
2311                 0u,             // transformOffset
2312             };
2313             arrayIndex++;
2314         }
2315         for (const auto& aabbsRef : renderCmd.aabbsView) {
2316             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2317                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2318                 nullptr,                                               // pNext
2319                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR,         // geometryType
2320                 {},                                                    // geometry;
2321                 0,                                                     // flags
2322             };
2323             VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2324             if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(aabbsRef.data.handle); iPtr) {
2325                 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2326             }
2327             geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2328             geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
2329                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
2330                 nullptr,                                                          // pNext
2331                 deviceAddress,                                                    // data
2332                 aabbsRef.info.stride,                                             // stride
2333             };
2334             buildRangeInfos[arrayIndex] = {
2335                 1u, // primitiveCount
2336                 0u, // primitiveOffset
2337                 0u, // firstVertex
2338                 0u, // transformOffset
2339             };
2340             arrayIndex++;
2341         }
2342         for (const auto& instancesRef : renderCmd.instancesView) {
2343             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2344                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2345                 nullptr,                                               // pNext
2346                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR,     // geometryType
2347                 {},                                                    // geometry;
2348                 0,                                                     // flags
2349             };
2350             VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2351             if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(instancesRef.data.handle);
2352                 iPtr) {
2353                 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2354             }
2355             geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2356             geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2357                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2358                 nullptr,                                                              // pNext
2359                 instancesRef.info.arrayOfPointers,                                    // arrayOfPointers
2360                 deviceAddress,                                                        // data
2361             };
2362             buildRangeInfos[arrayIndex] = {
2363                 1u, // primitiveCount
2364                 0u, // primitiveOffset
2365                 0u, // firstVertex
2366                 0u, // transformOffset
2367             };
2368             arrayIndex++;
2369         }
2370 
2371         const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2372             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2373             nullptr,                                                          // pNext
2374             VkAccelerationStructureTypeKHR(renderCmd.type),                   // type
2375             VkBuildAccelerationStructureFlagsKHR(renderCmd.flags),            // flags
2376             VkBuildAccelerationStructureModeKHR(renderCmd.mode),              // mode
2377             VK_NULL_HANDLE,                                                   // srcAccelerationStructure
2378             dstAs,                                                            // dstAccelerationStructure
2379             uint32_t(arrayIndex),                                             // geometryCount
2380             geometryData.data(),                                              // pGeometries
2381             nullptr,                                                          // ppGeometries
2382             scratchData,                                                      // scratchData
2383         };
2384 
2385         vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2386         for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2387             buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2388         }
2389         const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2390         if (extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2391             extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2392                 1u,                                                                // infoCount
2393                 &buildGeometryInfo,                                                // pInfos
2394                 buildRangeInfosPtr.data());                                        // ppBuildRangeInfos
2395         }
2396     }
2397 #endif
2398 }
2399 
2400 void RenderBackendVk::RenderCommand(const RenderCommandClearColorImage& renderCmd,
2401     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2402     const StateCache& stateCache)
2403 {
2404     const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.handle);
2405     // the layout could be VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but we don't support it at the moment
2406     const VkImageLayout imageLayout = (VkImageLayout)renderCmd.imageLayout;
2407     PLUGIN_ASSERT((imageLayout == VK_IMAGE_LAYOUT_GENERAL) || (imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
2408     if (imagePtr) {
2409         const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
2410         if (platImage.image) {
2411             VkClearColorValue clearColor;
2412             PLUGIN_STATIC_ASSERT(sizeof(clearColor) == sizeof(renderCmd.color));
2413             CloneData(&clearColor, sizeof(clearColor), &renderCmd.color, sizeof(renderCmd.color));
2414 
2415             // NOTE: temporary vector allocated due to not having max limit
2416             vector<VkImageSubresourceRange> ranges(renderCmd.ranges.size());
2417             for (size_t idx = 0; idx < ranges.size(); ++idx) {
2418                 const auto& inputRef = renderCmd.ranges[idx];
2419                 ranges[idx] = {
2420                     (VkImageAspectFlags)inputRef.imageAspectFlags, // aspectMask
2421                     inputRef.baseMipLevel,                         // baseMipLevel
2422                     inputRef.levelCount,                           // levelCount
2423                     inputRef.baseArrayLayer,                       // baseArrayLayer
2424                     inputRef.layerCount,                           // layerCount
2425                 };
2426             }
2427 
2428             vkCmdClearColorImage(cmdBuf.commandBuffer, // commandBuffer
2429                 platImage.image,                       // image
2430                 imageLayout,                           // imageLayout
2431                 &clearColor,                           // pColor
2432                 static_cast<uint32_t>(ranges.size()),  // rangeCount
2433                 ranges.data());                        // pRanges
2434         }
2435     }
2436 }
2437 
2438 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2439     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2440     const StateCache& stateCache)
2441 {
2442     const ViewportDesc& vd = renderCmd.viewportDesc;
2443 
2444     const VkViewport viewport {
2445         vd.x,        // x
2446         vd.y,        // y
2447         vd.width,    // width
2448         vd.height,   // height
2449         vd.minDepth, // minDepth
2450         vd.maxDepth, // maxDepth
2451     };
2452 
2453     vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2454         0,                                 // firstViewport
2455         1,                                 // viewportCount
2456         &viewport);                        // pViewports
2457 }
2458 
2459 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2460     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2461     const StateCache& stateCache)
2462 {
2463     const ScissorDesc& sd = renderCmd.scissorDesc;
2464 
2465     const VkRect2D scissor {
2466         { sd.offsetX, sd.offsetY },          // offset
2467         { sd.extentWidth, sd.extentHeight }, // extent
2468     };
2469 
2470     vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2471         0,                                // firstScissor
2472         1,                                // scissorCount
2473         &scissor);                        // pScissors
2474 }
2475 
2476 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2477     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2478     const StateCache& stateCache)
2479 {
2480     vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2481         renderCmd.lineWidth);               // lineWidth
2482 }
2483 
2484 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2485     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2486     const StateCache& stateCache)
2487 {
2488     vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2489         renderCmd.depthBiasConstantFactor,  // depthBiasConstantFactor
2490         renderCmd.depthBiasClamp,           // depthBiasClamp
2491         renderCmd.depthBiasSlopeFactor);    // depthBiasSlopeFactor
2492 }
2493 
2494 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2495     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2496     const StateCache& stateCache)
2497 {
2498     vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2499         renderCmd.blendConstants);               // blendConstants[4]
2500 }
2501 
2502 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2503     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2504     const StateCache& stateCache)
2505 {
2506     vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2507         renderCmd.minDepthBounds,             // minDepthBounds
2508         renderCmd.maxDepthBounds);            // maxDepthBounds
2509 }
2510 
2511 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2512     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2513     const StateCache& stateCache)
2514 {
2515     const VkStencilFaceFlags stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2516 
2517     if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2518         vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2519             stencilFaceMask,                             // faceMask
2520             renderCmd.mask);                             // compareMask
2521     } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2522         vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2523             stencilFaceMask,                           // faceMask
2524             renderCmd.mask);                           // writeMask
2525     } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2526         vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2527             stencilFaceMask,                           // faceMask
2528             renderCmd.mask);                           // reference
2529     }
2530 }
2531 
2532 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateFragmentShadingRate& renderCmd,
2533     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2534     const StateCache& stateCache)
2535 {
2536 #if (RENDER_VULKAN_FSR_ENABLED == 1)
2537     const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2538     if (extFunctions.vkCmdSetFragmentShadingRateKHR) {
2539         const VkExtent2D fragmentSize = { renderCmd.fragmentSize.width, renderCmd.fragmentSize.height };
2540         const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {
2541             (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op1,
2542             (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op2,
2543         };
2544 
2545         extFunctions.vkCmdSetFragmentShadingRateKHR(cmdBuf.commandBuffer, // commandBuffer
2546             &fragmentSize,                                                // pFragmentSize
2547             combinerOps);                                                 // combinerOps
2548     }
2549 #endif
2550 }
2551 
2552 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2553     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2554     const StateCache& stateCache)
2555 {
2556     if (stateCache.backendNode) {
2557         const RenderBackendRecordingStateVk recordingState = {
2558             {},
2559             cmdBuf.commandBuffer,                              // commandBuffer
2560             stateCache.lowLevelRenderPassData.renderPass,      // renderPass
2561             stateCache.lowLevelRenderPassData.framebuffer,     // framebuffer
2562             stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2563             stateCache.lowLevelRenderPassData.subpassIndex,    // subpassIndex
2564             stateCache.pipelineLayout,                         // pipelineLayout
2565         };
2566         const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2567         stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2568     }
2569 }
2570 
2571 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2572     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2573 {
2574     PLUGIN_ASSERT_MSG(false, "not implemented");
2575 
2576     const VkPipelineStageFlagBits pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2577     const uint32_t queryIndex = renderCmd.queryIndex;
2578     VkQueryPool queryPool = VK_NULL_HANDLE;
2579 
2580     vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2581         queryPool,                            // queryPool
2582         queryIndex,                           // firstQuery
2583         1);                                   // queryCount
2584 
2585     vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2586         pipelineStageFlagBits,                // pipelineStage
2587         queryPool,                            // queryPool
2588         queryIndex);                          // query
2589 }
2590 
2591 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf, const uint32_t cmdBufferIdx)
2592 {
2593     for (auto& presRef : presentationData_.infos) {
2594         if (presRef.renderNodeCommandListIndex != cmdBufferIdx) {
2595             continue;
2596         }
2597 
2598         PLUGIN_ASSERT(presRef.presentationLayoutChangeNeeded);
2599         PLUGIN_ASSERT(presRef.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
2600 
2601         const GpuResourceState& state = presRef.renderGraphProcessedState;
2602         const VkAccessFlags srcAccessMask = (VkAccessFlags)state.accessFlags;
2603         const VkAccessFlags dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2604         const VkPipelineStageFlags srcStageMask = ((VkPipelineStageFlags)state.pipelineStageFlags) |
2605                                                   (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2606         const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2607         const VkImageLayout oldLayout = (VkImageLayout)presRef.imageLayout;
2608         const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2609         // NOTE: queue is not currently checked (should be in the same queue as last time used)
2610         constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2611         constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2612         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2613         constexpr VkImageSubresourceRange imageSubresourceRange {
2614             VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2615             0,                                                // baseMipLevel
2616             1,                                                // levelCount
2617             0,                                                // baseArrayLayer
2618             1,                                                // layerCount
2619         };
2620 
2621         const VkImageMemoryBarrier imageMemoryBarrier {
2622             VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2623             nullptr,                                // pNext
2624             srcAccessMask,                          // srcAccessMask
2625             dstAccessMask,                          // dstAccessMask
2626             oldLayout,                              // oldLayout
2627             newLayout,                              // newLayout
2628             srcQueueFamilyIndex,                    // srcQueueFamilyIndex
2629             dstQueueFamilyIndex,                    // dstQueueFamilyIndex
2630             presRef.swapchainImage,                 // image
2631             imageSubresourceRange,                  // subresourceRange
2632         };
2633 
2634         vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2635             srcStageMask,                          // srcStageMask
2636             dstStageMask,                          // dstStageMask
2637             dependencyFlags,                       // dependencyFlags
2638             0,                                     // memoryBarrierCount
2639             nullptr,                               // pMemoryBarriers
2640             0,                                     // bufferMemoryBarrierCount
2641             nullptr,                               // pBufferMemoryBarriers
2642             1,                                     // imageMemoryBarrierCount
2643             &imageMemoryBarrier);                  // pImageMemoryBarriers
2644 
2645         presRef.presentationLayoutChangeNeeded = false;
2646         presRef.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC;
2647     }
2648 }
2649 
2650 #if (RENDER_PERF_ENABLED == 1)
2651 
2652 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2653 {
2654     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2655         const string_view& debugName = renderCommandContext.debugName;
2656         if (timers_.count(debugName) == 0) { // new timers
2657 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2658             PerfDataSet& perfDataSet = timers_[debugName];
2659             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2660             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2661             constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2662             perfDataSet.gpuBufferOffset = (uint32_t)timers_.size() * singleQueryByteSize;
2663 #else
2664             timers_.insert({ debugName, {} });
2665 #endif
2666         }
2667     }
2668 
2669 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2670     perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2671     perfGpuTimerData_.currentOffset =
2672         (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2673 #endif
2674 }
2675 
2676 void RenderBackendVk::EndFrameTimers()
2677 {
2678     int64_t fullGpuTime = 0;
2679 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2680     // already in micros
2681     fullGpuTime = perfGpuTimerData_.fullGpuCounter;
2682     perfGpuTimerData_.fullGpuCounter = 0;
2683 
2684     perfGpuTimerData_.gpuBuffer->Unmap();
2685 #endif
2686     if (IPerformanceDataManagerFactory* globalPerfData =
2687             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2688         globalPerfData) {
2689         IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2690         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2691         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2692         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2693         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2694         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2695         perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2696     }
2697 }
2698 
2699 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2700     const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits, const StateCache& stateCache)
2701 {
2702 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2703     if (stateCache.secondaryCommandBuffer) {
2704         return; // cannot be called inside render pass (e.g. with secondary command buffers)
2705     }
2706     PLUGIN_ASSERT(timers_.count(name) == 1);
2707     const PerfDataSet* perfDataSet = &timers_[name];
2708     if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2709         const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2710         if (platData.queryPool) {
2711             vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2712                 platData.queryPool,                   // queryPool
2713                 queryIndex,                           // firstQuery
2714                 1);                                   // queryCount
2715 
2716             vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2717                 stageFlagBits,                        // pipelineStage,
2718                 platData.queryPool,                   // queryPool,
2719                 queryIndex);                          // query
2720         }
2721     }
2722 #endif
2723 }
2724 
2725 namespace {
2726 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2727 {
2728     perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount);
2729     perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount);
2730     perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount);
2731     perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount);
2732     perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount);
2733     perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount);
2734     perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount);
2735     perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount);
2736     perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount);
2737     perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount);
2738 }
2739 } // namespace
2740 
2741 void RenderBackendVk::CopyPerfTimeStamp(
2742     const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& stateCache)
2743 {
2744     PLUGIN_ASSERT(timers_.count(name) == 1);
2745     PerfDataSet* const perfDataSet = &timers_[name];
2746 
2747 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2748     // take data from earlier queries to cpu
2749     // and copy in from query to gpu buffer
2750     const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
2751     int64_t gpuMicroSeconds = 0;
2752     {
2753         auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
2754         auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
2755 
2756         const uint64_t startStamp = *currentData;
2757         const uint64_t endStamp = *(currentData + 1);
2758 
2759         const double timestampPeriod =
2760             static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
2761                                     .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
2762         constexpr int64_t nanosToMicrosDivisor { 1000 };
2763         gpuMicroSeconds = static_cast<int64_t>((endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
2764         constexpr int64_t maxValidMicroSecondValue { 4294967295 };
2765         if (gpuMicroSeconds > maxValidMicroSecondValue) {
2766             gpuMicroSeconds = 0;
2767         }
2768         perfGpuTimerData_.fullGpuCounter += gpuMicroSeconds;
2769     }
2770 #endif
2771     const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
2772 
2773     if (IPerformanceDataManagerFactory* globalPerfData =
2774             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2775         globalPerfData) {
2776         IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2777 
2778         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2779 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2780         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2781 
2782         // cannot be called inside render pass (e.g. with secondary command buffers)
2783         if (!stateCache.secondaryCommandBuffer) {
2784             if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2785                 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2786 
2787                 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
2788                 PLUGIN_ASSERT(gpuBuffer);
2789                 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
2790 
2791                 constexpr uint32_t queryCount = 2;
2792                 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
2793                 constexpr VkQueryResultFlags queryResultFlags =
2794                     VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
2795 
2796                 if (platData.queryPool) {
2797                     vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
2798                         platData.queryPool,                         // queryPool
2799                         0,                                          // firstQuery
2800                         queryCount,                                 // queryCount
2801                         platBuffer.buffer,                          // dstBuffer
2802                         currentFrameByteOffset,                     // dstOffset
2803                         queryStride,                                // stride
2804                         queryResultFlags);                          // flags
2805                 }
2806             }
2807         }
2808 #endif
2809         UpdatePerfCounters(*perfData, name, perfDataSet->perfCounters);
2810         perfDataSet->perfCounters = {}; // reset perf counters
2811     }
2812 }
2813 
2814 #endif
2815 RENDER_END_NAMESPACE()
2816