1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_backend_vk.h"
17
18 #include <algorithm>
19 #include <cstdint>
20 #include <functional>
21 #include <vulkan/vulkan_core.h>
22
23 #include <base/containers/array_view.h>
24 #include <base/containers/fixed_string.h>
25 #include <base/containers/string_view.h>
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <core/plugin/intf_class_register.h>
29 #include <render/datastore/render_data_store_render_pods.h>
30 #include <render/device/pipeline_state_desc.h>
31 #include <render/namespace.h>
32 #include <render/nodecontext/intf_render_backend_node.h>
33 #include <render/vulkan/intf_device_vk.h>
34
35 #if (RENDER_PERF_ENABLED == 1)
36 #include "perf/gpu_query.h"
37 #include "perf/gpu_query_manager.h"
38 #include "vulkan/gpu_query_vk.h"
39 #endif
40
41 #include "device/gpu_buffer.h"
42 #include "device/gpu_image.h"
43 #include "device/gpu_resource_handle_util.h"
44 #include "device/gpu_resource_manager.h"
45 #include "device/gpu_sampler.h"
46 #include "device/pipeline_state_object.h"
47 #include "device/render_frame_sync.h"
48 #include "nodecontext/node_context_descriptor_set_manager.h"
49 #include "nodecontext/node_context_pool_manager.h"
50 #include "nodecontext/node_context_pso_manager.h"
51 #include "nodecontext/render_barrier_list.h"
52 #include "nodecontext/render_command_list.h"
53 #include "nodecontext/render_node_graph_node_store.h"
54 #include "render_backend.h"
55 #include "render_graph.h"
56 #include "util/log.h"
57 #include "util/render_frame_util.h"
58 #include "vulkan/gpu_buffer_vk.h"
59 #include "vulkan/gpu_image_vk.h"
60 #include "vulkan/gpu_sampler_vk.h"
61 #include "vulkan/gpu_semaphore_vk.h"
62 #include "vulkan/node_context_descriptor_set_manager_vk.h"
63 #include "vulkan/node_context_pool_manager_vk.h"
64 #include "vulkan/pipeline_state_object_vk.h"
65 #include "vulkan/render_frame_sync_vk.h"
66 #include "vulkan/swapchain_vk.h"
67 #include "vulkan/validate_vk.h"
68
69 using namespace BASE_NS;
70
71 using CORE_NS::GetInstance;
72 using CORE_NS::IParallelTaskQueue;
73 using CORE_NS::IPerformanceDataManager;
74 using CORE_NS::IPerformanceDataManagerFactory;
75 using CORE_NS::ITaskQueueFactory;
76 using CORE_NS::IThreadPool;
77 using CORE_NS::UID_TASK_QUEUE_FACTORY;
78
79 RENDER_BEGIN_NAMESPACE()
80 namespace {
81 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)82 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
83 {
84 const VkBufferDeviceAddressInfo addressInfo {
85 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
86 nullptr, // pNext
87 buffer, // buffer
88 };
89 return vkGetBufferDeviceAddress(device, &addressInfo);
90 }
91 #endif
92 #if (RENDER_PERF_ENABLED == 1)
CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)93 void CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)
94 {
95 dst.drawCount += src.drawCount;
96 dst.drawIndirectCount += src.drawIndirectCount;
97 dst.dispatchCount += src.dispatchCount;
98 dst.dispatchIndirectCount += src.dispatchIndirectCount;
99 dst.bindPipelineCount += src.bindPipelineCount;
100 dst.renderPassCount += src.renderPassCount;
101 dst.updateDescriptorSetCount += src.updateDescriptorSetCount;
102 dst.bindDescriptorSetCount += src.bindDescriptorSetCount;
103 dst.triangleCount += src.triangleCount;
104 dst.instanceCount += src.instanceCount;
105 }
106 #endif
107 } // namespace
108
109 // Helper class for running std::function as a ThreadPool task.
110 class FunctionTask final : public IThreadPool::ITask {
111 public:
Create(std::function<void()> func)112 static Ptr Create(std::function<void()> func)
113 {
114 return Ptr { new FunctionTask(BASE_NS::move(func)) };
115 }
116
FunctionTask(std::function<void()> func)117 explicit FunctionTask(std::function<void()> func) : func_(BASE_NS::move(func)) {};
118
119 void operator()() override
120 {
121 func_();
122 }
123
124 protected:
125 void Destroy() override
126 {
127 delete this;
128 }
129
130 private:
131 std::function<void()> func_;
132 };
133
134 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
135 namespace {
136 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
137 }
138 #endif
139
RenderBackendVk( Device& dev, GpuResourceManager& gpuResourceManager, const CORE_NS::IParallelTaskQueue::Ptr& queue)140 RenderBackendVk::RenderBackendVk(
141 Device& dev, GpuResourceManager& gpuResourceManager, const CORE_NS::IParallelTaskQueue::Ptr& queue)
142 : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
143 queue_(queue.get())
144 {
145 #if (RENDER_PERF_ENABLED == 1)
146 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
147 gpuQueryMgr_ = make_unique<GpuQueryManager>();
148
149 constexpr uint32_t maxQueryObjectCount { 512u };
150 constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
151 const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
152 const GpuBufferDesc desc {
153 BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT, // usageFlags
154 CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
155 0, // engineCreationFlags
156 fullByteSize, // byteSize
157 };
158 perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
159 perfGpuTimerData_.currentOffset = 0;
160 perfGpuTimerData_.frameByteSize = byteSize;
161 perfGpuTimerData_.fullByteSize = fullByteSize;
162 { // zero initialize
163 uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
164 memset_s(bufferData, fullByteSize, 0, fullByteSize);
165 perfGpuTimerData_.gpuBuffer->Unmap();
166 }
167 #endif
168 #endif
169 }
170
AcquirePresentationInfo( RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)171 void RenderBackendVk::AcquirePresentationInfo(
172 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
173 {
174 if (device_.HasSwapchain()) {
175 presentationData_.present = true;
176 // resized to same for convenience
177 presentationData_.infos.resize(backBufferConfig.swapchainData.size());
178 for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
179 const auto& swapData = backBufferConfig.swapchainData[swapIdx];
180 PresentationInfo pi;
181 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
182
183 if (const SwapchainVk* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain(swapData.handle));
184 swapchain) {
185 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
186 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
187 const uint32_t semaphoreIdx = swapchain->GetNextAcquireSwapchainSemaphoreIndex();
188 PLUGIN_ASSERT(semaphoreIdx < platSwapchain.swapchainImages.semaphores.size());
189 pi.swapchainSemaphore = platSwapchain.swapchainImages.semaphores[semaphoreIdx];
190 pi.swapchain = platSwapchain.swapchain;
191 pi.useSwapchain = true;
192 // NOTE: for legacy default backbuffer reasons there might the same swapchain multiple times ATM
193 for (const auto& piRef : presentationData_.infos) {
194 if (piRef.swapchain == pi.swapchain) {
195 pi.useSwapchain = false;
196 }
197 }
198 // NOTE: do not re-acquire default backbuffer swapchain if it's in used with different handle
199 if (pi.useSwapchain) {
200 const VkResult result = vkAcquireNextImageKHR(device, // device
201 vkSwapchain, // swapchin
202 UINT64_MAX, // timeout
203 pi.swapchainSemaphore, // semaphore
204 (VkFence) nullptr, // fence
205 &pi.swapchainImageIndex); // pImageIndex
206
207 switch (result) {
208 // Success
209 case VK_SUCCESS:
210 case VK_TIMEOUT:
211 case VK_NOT_READY:
212 case VK_SUBOPTIMAL_KHR:
213 pi.validAcquire = true;
214 break;
215
216 // Failure
217 case VK_ERROR_OUT_OF_HOST_MEMORY:
218 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
219 PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
220 return;
221 case VK_ERROR_DEVICE_LOST:
222 PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
223 return;
224 case VK_ERROR_OUT_OF_DATE_KHR:
225 PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
226 return;
227 case VK_ERROR_SURFACE_LOST_KHR:
228 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
229 return;
230
231 case VK_EVENT_SET:
232 case VK_EVENT_RESET:
233 case VK_INCOMPLETE:
234 case VK_ERROR_INITIALIZATION_FAILED:
235 case VK_ERROR_MEMORY_MAP_FAILED:
236 case VK_ERROR_LAYER_NOT_PRESENT:
237 case VK_ERROR_EXTENSION_NOT_PRESENT:
238 case VK_ERROR_FEATURE_NOT_PRESENT:
239 case VK_ERROR_INCOMPATIBLE_DRIVER:
240 case VK_ERROR_TOO_MANY_OBJECTS:
241 case VK_ERROR_FORMAT_NOT_SUPPORTED:
242 case VK_ERROR_FRAGMENTED_POOL:
243 case VK_ERROR_OUT_OF_POOL_MEMORY:
244 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
245 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
246 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
247 case VK_ERROR_VALIDATION_FAILED_EXT:
248 case VK_ERROR_INVALID_SHADER_NV:
249 // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
250 case VK_ERROR_FRAGMENTATION_EXT:
251 case VK_ERROR_NOT_PERMITTED_EXT:
252 // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
253 case VK_RESULT_MAX_ENUM:
254 default:
255 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
256 PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
257 device_.SetDeviceStatus(false);
258 break;
259 }
260
261 if (pi.swapchainImageIndex >= static_cast<uint32_t>(platSwapchain.swapchainImages.images.size())) {
262 PLUGIN_LOG_E("swapchain image index (%u) should be smaller than (%u)", pi.swapchainImageIndex,
263 static_cast<uint32_t>(platSwapchain.swapchainImages.images.size()));
264 }
265
266 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
267 const RenderHandle handle = swapchainData.remappableSwapchainImage;
268 if (pi.swapchainImageIndex < swapchainData.imageViewCount) {
269 // remap image to backbuffer
270 const RenderHandle currentSwapchainHandle = swapchainData.imageViews[pi.swapchainImageIndex];
271 // special swapchain remapping
272 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(handle, currentSwapchainHandle);
273 }
274 pi.renderGraphProcessedState = swapData.backBufferState;
275 pi.imageLayout = swapData.layout;
276 if (pi.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC) {
277 pi.presentationLayoutChangeNeeded = true;
278 pi.renderNodeCommandListIndex =
279 static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
280
281 const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
282 PLUGIN_ASSERT(swapImage);
283 pi.swapchainImage = swapImage->GetPlatformData().image;
284 }
285 }
286 }
287 presentationData_.infos[swapIdx] = pi;
288 }
289 }
290 }
291
292 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
293 {
294 if (!backBufferConfig.swapchainData.empty()) {
295 if (device_.HasSwapchain() && presentationData_.present) {
296 PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8u);
297 uint32_t swapchainCount = 0U;
298 VkSwapchainKHR vkSwapchains[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { VK_NULL_HANDLE, VK_NULL_HANDLE,
299 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
300 uint32_t vkSwapImageIndices[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { 0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U };
301 for (const auto& presRef : presentationData_.infos) {
302 // NOTE: default backbuffer might be present multiple times
303 // the flag useSwapchain should be false in these cases
304 if (presRef.useSwapchain && presRef.swapchain && presRef.validAcquire) {
305 PLUGIN_ASSERT(presRef.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
306 vkSwapImageIndices[swapchainCount] = presRef.swapchainImageIndex;
307 vkSwapchains[swapchainCount++] = presRef.swapchain;
308 }
309 }
310 #if (RENDER_PERF_ENABLED == 1)
311 commonCpuTimers_.present.Begin();
312 #endif
313
314 // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid
315 // semaphore)
316 if (swapchainCount > 0U) {
317 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
318 uint32_t waitSemaphoreCount = 0;
319 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
320 waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
321 waitSemaphoreCount = 1;
322 }
323
324 const VkPresentInfoKHR presentInfo {
325 VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
326 nullptr, // pNext
327 waitSemaphoreCount, // waitSemaphoreCount
328 &waitSemaphore, // pWaitSemaphores
329 swapchainCount, // swapchainCount
330 vkSwapchains, // pSwapchains
331 vkSwapImageIndices, // pImageIndices
332 nullptr // pResults
333 };
334
335 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
336 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
337 &presentInfo); // pPresentInfo
338
339 switch (result) {
340 // Success
341 case VK_SUCCESS:
342 break;
343 case VK_SUBOPTIMAL_KHR:
344 #if (RENDER_VALIDATION_ENABLED == 1)
345 PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
346 #endif
347 break;
348
349 // Failure
350 case VK_ERROR_OUT_OF_HOST_MEMORY:
351 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
352 PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
353 return;
354 case VK_ERROR_DEVICE_LOST:
355 PLUGIN_LOG_E("vkQueuePresentKHR device lost");
356 return;
357 case VK_ERROR_OUT_OF_DATE_KHR:
358 PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
359 return;
360 case VK_ERROR_SURFACE_LOST_KHR:
361 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
362 return;
363
364 case VK_NOT_READY:
365 case VK_TIMEOUT:
366 case VK_EVENT_SET:
367 case VK_EVENT_RESET:
368 case VK_INCOMPLETE:
369 case VK_ERROR_INITIALIZATION_FAILED:
370 case VK_ERROR_MEMORY_MAP_FAILED:
371 case VK_ERROR_LAYER_NOT_PRESENT:
372 case VK_ERROR_EXTENSION_NOT_PRESENT:
373 case VK_ERROR_FEATURE_NOT_PRESENT:
374 case VK_ERROR_INCOMPATIBLE_DRIVER:
375 case VK_ERROR_TOO_MANY_OBJECTS:
376 case VK_ERROR_FORMAT_NOT_SUPPORTED:
377 case VK_ERROR_FRAGMENTED_POOL:
378 case VK_ERROR_OUT_OF_POOL_MEMORY:
379 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
380 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
381 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
382 case VK_ERROR_VALIDATION_FAILED_EXT:
383 case VK_ERROR_INVALID_SHADER_NV:
384 case VK_ERROR_FRAGMENTATION_EXT:
385 case VK_ERROR_NOT_PERMITTED_EXT:
386 case VK_RESULT_MAX_ENUM:
387 default:
388 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
389 PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
390 break;
391 }
392 }
393 #if (RENDER_PERF_ENABLED == 1)
394 commonCpuTimers_.present.End();
395 #endif
396 } else {
397 #if (RENDER_VALIDATION_ENABLED == 1)
398 PLUGIN_LOG_ONCE_E(
399 "RenderBackendVk::Present_layout", "Presentation layout has not been updated, cannot present.");
400 #endif
401 }
402 }
403 }
404
405 void RenderBackendVk::Render(
406 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
407 {
408 // NOTE: all command lists are validated before entering here
409 #if (RENDER_PERF_ENABLED == 1)
410 commonCpuTimers_.full.Begin();
411 commonCpuTimers_.acquire.Begin();
412 #endif
413
414 commandBufferSubmitter_ = {};
415 commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
416
417 presentationData_.present = false;
418 presentationData_.infos.clear();
419
420 #if (RENDER_PERF_ENABLED == 1)
421 commonCpuTimers_.acquire.End();
422
423 StartFrameTimers(renderCommandFrameData);
424 commonCpuTimers_.execute.Begin();
425 #endif
426
427 // command list process loop/execute
428 // first tries to acquire swapchain if needed in a task
429 RenderProcessCommandLists(renderCommandFrameData, backBufferConfig);
430
431 #if (RENDER_PERF_ENABLED == 1)
432 commonCpuTimers_.execute.End();
433 commonCpuTimers_.submit.Begin();
434 #endif
435
436 PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
437 // submit vulkan command buffers
438 // checks that presentation info has valid acquire
439 RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
440
441 #if (RENDER_PERF_ENABLED == 1)
442 commonCpuTimers_.submit.End();
443 commonCpuTimers_.full.End();
444 EndFrameTimers();
445 #endif
446 }
447
448 void RenderBackendVk::RenderProcessSubmitCommandLists(
449 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
450 {
451 // NOTE: currently backtraces to final valid command buffer semaphore
452 uint32_t finalCommandBufferSubmissionIndex = ~0u;
453 commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
454 bool swapchainSemaphoreWaited = false;
455 for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
456 --cmdBufferIdx) {
457 if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
458 (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
459 VK_NULL_HANDLE)) {
460 finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
461 break;
462 }
463 }
464
465 for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
466 const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
467 if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
468 continue;
469 }
470
471 const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
472
473 uint32_t waitSemaphoreCount = 0u;
474 constexpr const uint32_t maxWaitSemaphoreCount =
475 PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + DeviceConstants::MAX_SWAPCHAIN_COUNT;
476 VkSemaphore waitSemaphores[maxWaitSemaphoreCount];
477 VkPipelineStageFlags waitSemaphorePipelineStageFlags[maxWaitSemaphoreCount];
478 for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
479 const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
480 PLUGIN_ASSERT(waitIdx < (uint32_t)commandBufferSubmitter_.commandBuffers.size());
481
482 VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
483 if (waitSemaphore != VK_NULL_HANDLE) {
484 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
485 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
486 waitSemaphoreCount++;
487 }
488 }
489
490 if ((!swapchainSemaphoreWaited) && (renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
491 (!presentationData_.infos.empty())) {
492 swapchainSemaphoreWaited = true;
493 // go through all swapchain semaphores
494 for (const auto& presRef : presentationData_.infos) {
495 if (presRef.swapchainSemaphore) {
496 waitSemaphores[waitSemaphoreCount] = presRef.swapchainSemaphore;
497 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
498 waitSemaphoreCount++;
499 }
500 }
501 }
502
503 uint32_t signalSemaphoreCount = 0u;
504 PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8U);
505 constexpr uint32_t maxSignalSemaphoreCount { 1U + DeviceConstants::MAX_SWAPCHAIN_COUNT };
506 VkSemaphore semaphores[maxSignalSemaphoreCount] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE,
507 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
508 VkFence fence = VK_NULL_HANDLE;
509 if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
510 // add fence signaling to last submission for frame sync
511 if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
512 fence = frameSync->GetFrameFence().fence;
513 frameSync->FrameFenceIsSignalled();
514 }
515 // signal external semaphores
516 if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
517 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
518 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
519 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
520 if (externalSignals.size() == externalSemaphores.size()) {
521 for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
522 // needs to be false
523 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
524 if (const GpuSemaphoreVk* gs = (const GpuSemaphoreVk*)externalSemaphores[sigIdx].get();
525 gs) {
526 semaphores[signalSemaphoreCount++] = gs->GetPlatformData().semaphore;
527 externalSignals[sigIdx].signaled = true;
528 }
529 }
530 }
531 }
532 }
533
534 if (presentationData_.present) {
535 commandBufferSubmitter_.presentationWaitSemaphore =
536 commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
537 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
538 }
539 // add additional semaphores
540 for (const auto& swapRef : backBufferConfig.swapchainData) {
541 // should have been checked in render graph already
542 if ((signalSemaphoreCount < maxSignalSemaphoreCount) && swapRef.config.gpuSemaphoreHandle) {
543 semaphores[signalSemaphoreCount++] =
544 VulkanHandleCast<VkSemaphore>(swapRef.config.gpuSemaphoreHandle);
545 }
546 }
547 } else if (renderContextRef.submitDepencies.signalSemaphore) {
548 semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
549 }
550 PLUGIN_ASSERT(signalSemaphoreCount <= maxSignalSemaphoreCount);
551
552 const VkSubmitInfo submitInfo {
553 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
554 nullptr, // pNext
555 waitSemaphoreCount, // waitSemaphoreCount
556 (waitSemaphoreCount == 0) ? nullptr : waitSemaphores, // pWaitSemaphores
557 waitSemaphorePipelineStageFlags, // pWaitDstStageMask
558 1, // commandBufferCount
559 &cmdSubmitterRef.commandBuffer, // pCommandBuffers
560 signalSemaphoreCount, // signalSemaphoreCount
561 (signalSemaphoreCount == 0) ? nullptr : semaphores, // pSignalSemaphores
562 };
563
564 const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
565 if (queue) {
566 VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
567 1, // submitCount
568 &submitInfo, // pSubmits
569 fence)); // fence
570 }
571 }
572 }
573
574 void RenderBackendVk::RenderProcessCommandLists(
575 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
576 {
577 const uint32_t cmdBufferCount = static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());
578 if (queue_) {
579 constexpr uint64_t acquireTaskIdendifier { ~0U };
580 vector<uint64_t> afterIdentifiers;
581 afterIdentifiers.reserve(1u); // need for swapchain acquire wait
582 // submit acquire task if needed
583 if ((!backBufferConfig.swapchainData.empty()) && device_.HasSwapchain()) {
584 queue_->Submit(
585 acquireTaskIdendifier, FunctionTask::Create([this, &renderCommandFrameData, &backBufferConfig]() {
586 AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
587 }));
588 }
589 uint64_t secondaryIdx = cmdBufferCount;
590 for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < cmdBufferCount;) {
591 afterIdentifiers.clear();
592 // add wait for acquire if needed
593 if (cmdBufferIdx >= renderCommandFrameData.firstSwapchainNodeIdx) {
594 afterIdentifiers.push_back(acquireTaskIdendifier);
595 }
596 // NOTE: idx increase
597 const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
598 const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
599 PLUGIN_ASSERT(mrpData.subpassCount > 0);
600 const uint32_t rcCount = mrpData.subpassCount;
601 if (mrpData.secondaryCmdLists) {
602 afterIdentifiers.reserve(afterIdentifiers.size() + rcCount);
603 for (uint32_t secondIdx = 0; secondIdx < rcCount; ++secondIdx) {
604 const uint64_t submitId = secondaryIdx++;
605 afterIdentifiers.push_back(submitId);
606 PLUGIN_ASSERT((cmdBufferIdx + secondIdx) < cmdBufferCount);
607 queue_->SubmitAfter(afterIdentifiers, submitId,
608 FunctionTask::Create([this, cmdBufferIdx, secondIdx, &renderCommandFrameData]() {
609 const uint32_t currCmdBufferIdx = cmdBufferIdx + secondIdx;
610 MultiRenderCommandListDesc mrcDesc;
611 mrcDesc.multiRenderCommandListCount = 1u;
612 mrcDesc.baseContext = nullptr;
613 mrcDesc.secondaryCommandBuffer = true;
614 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currCmdBufferIdx];
615 const DebugNames debugNames { ref2.debugName,
616 renderCommandFrameData.renderCommandContexts[currCmdBufferIdx].debugName };
617 RenderSingleCommandList(ref2, currCmdBufferIdx, mrcDesc, debugNames);
618 }));
619 }
620 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
621 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
622 MultiRenderCommandListDesc mrcDesc;
623 mrcDesc.multiRenderCommandListCount = rcCount;
624 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
625 const DebugNames debugNames { ref2.debugName,
626 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
627 RenderPrimaryRenderPass(renderCommandFrameData, ref2, cmdBufferIdx, mrcDesc, debugNames);
628 }));
629 } else {
630 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
631 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
632 MultiRenderCommandListDesc mrcDesc;
633 mrcDesc.multiRenderCommandListCount = rcCount;
634 if (rcCount > 1) {
635 mrcDesc.multiRenderNodeCmdList = true;
636 mrcDesc.baseContext = &renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
637 }
638 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
639 const uint32_t currIdx = cmdBufferIdx + rcIdx;
640 mrcDesc.multiRenderCommandListIndex = rcIdx;
641 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
642 const DebugNames debugNames { ref2.debugName,
643 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
644 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
645 }
646 }));
647 }
648 // idx increase
649 cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
650 }
651
652 // execute and wait for completion.
653 queue_->Execute();
654 queue_->Clear();
655 } else {
656 AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
657 for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < (uint32_t)renderCommandFrameData.renderCommandContexts.size();) {
658 // NOTE: idx increase
659 const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
660 const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
661 PLUGIN_ASSERT(mrpData.subpassCount > 0);
662 const uint32_t rcCount = mrpData.subpassCount;
663
664 MultiRenderCommandListDesc mrcDesc;
665 mrcDesc.multiRenderCommandListCount = rcCount;
666 mrcDesc.baseContext = (rcCount > 1) ? &renderCommandFrameData.renderCommandContexts[cmdBufferIdx] : nullptr;
667
668 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
669 const uint32_t currIdx = cmdBufferIdx + rcIdx;
670 mrcDesc.multiRenderCommandListIndex = rcIdx;
671 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
672 const DebugNames debugNames { ref2.debugName,
673 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
674 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
675 }
676 cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
677 }
678 }
679 }
680
681 void RenderBackendVk::RenderPrimaryRenderPass(const RenderCommandFrameData& renderCommandFrameData,
682 RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
683 const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
684 {
685 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
686 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
687 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
688
689 const ContextCommandPoolVk& ptrCmdPool =
690 (static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
691 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool.commandBuffer;
692
693 // begin cmd buffer
694 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
695 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
696 const bool valid = ptrCmdPool.commandPool && cmdBuffer.commandBuffer;
697 if (valid) {
698 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
699 ptrCmdPool.commandPool, // commandPool
700 commandPoolResetFlags)); // flags
701 }
702
703 constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
704 VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
705 };
706 const VkCommandBufferBeginInfo commandBufferBeginInfo {
707 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
708 nullptr, // pNext
709 commandBufferUsageFlags, // flags
710 nullptr, // pInheritanceInfo
711 };
712 if (valid) {
713 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
714 &commandBufferBeginInfo)); // pBeginInfo
715 }
716
717 StateCache stateCache;
718
719 const MultiRenderPassCommandListData mrpcld = renderCommandList.GetMultiRenderCommandListData();
720 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
721 const uint32_t commandCount = static_cast<uint32_t>(rcRef.size());
722 const RenderCommandBeginRenderPass* rcBeginRenderPass =
723 (mrpcld.rpBeginCmdIndex < commandCount)
724 ? static_cast<const RenderCommandBeginRenderPass*>(rcRef[mrpcld.rpBeginCmdIndex].rc)
725 : nullptr;
726 const RenderCommandEndRenderPass* rcEndRenderPass =
727 (mrpcld.rpEndCmdIndex < commandCount)
728 ? static_cast<const RenderCommandEndRenderPass*>(rcRef[mrpcld.rpEndCmdIndex].rc)
729 : nullptr;
730
731 if (rcBeginRenderPass && rcEndRenderPass) {
732 if (mrpcld.rpBarrierCmdIndex < commandCount) {
733 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
734 PLUGIN_ASSERT(rcRef[mrpcld.rpBarrierCmdIndex].type == RenderCommandType::BARRIER_POINT);
735 const RenderCommandBarrierPoint& barrierPoint =
736 *static_cast<RenderCommandBarrierPoint*>(rcRef[mrpcld.rpBarrierCmdIndex].rc);
737 // handle all barriers before render command that needs resource syncing
738 RenderCommand(barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
739 }
740
741 // begin render pass
742 stateCache.primaryRenderPass = true;
743 RenderCommand(*rcBeginRenderPass, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
744 stateCache.primaryRenderPass = false;
745
746 // get secondary command buffers from correct indices and execute
747 for (uint32_t idx = 0; idx < multiRenderCommandListDesc.multiRenderCommandListCount; ++idx) {
748 const uint32_t currCmdBufIdx = cmdBufIdx + idx;
749 PLUGIN_ASSERT(currCmdBufIdx < renderCommandFrameData.renderCommandContexts.size());
750 const RenderCommandContext& currContext = renderCommandFrameData.renderCommandContexts[currCmdBufIdx];
751 NodeContextPoolManagerVk& contextPoolVk =
752 *static_cast<NodeContextPoolManagerVk*>(currContext.nodeContextPoolMgr);
753
754 const array_view<const RenderCommandWithType> mlaRcRef = currContext.renderCommandList->GetRenderCommands();
755 const auto& mla = currContext.renderCommandList->GetMultiRenderCommandListData();
756 const uint32_t mlaCommandCount = static_cast<uint32_t>(mlaRcRef.size());
757 // next subpass only called from second render pass on
758 if ((idx > 0) && (mla.rpBeginCmdIndex < mlaCommandCount)) {
759 RenderCommandBeginRenderPass renderPass =
760 *static_cast<RenderCommandBeginRenderPass*>(mlaRcRef[mla.rpBeginCmdIndex].rc);
761 renderPass.renderPassDesc.subpassContents =
762 SubpassContents::CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS;
763 stateCache.renderCommandBeginRenderPass = nullptr; // reset
764 RenderCommand(
765 renderPass, cmdBuffer, *currContext.nodeContextPsoMgr, *currContext.nodeContextPoolMgr, stateCache);
766 }
767 RenderExecuteSecondaryCommandLists(cmdBuffer, contextPoolVk.GetContextSecondaryCommandPool().commandBuffer);
768 }
769
770 // end render pass (replace the primary render pass)
771 stateCache.renderCommandBeginRenderPass = rcBeginRenderPass;
772 // NOTE: render graph has batched the subpasses to have END_SUBPASS, we need END_RENDER_PASS
773 constexpr RenderCommandEndRenderPass rcerp = {};
774 RenderCommand(rcerp, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
775 }
776
777 // end cmd buffer
778 if (valid) {
779 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
780 }
781
782 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
783 }
784
785 void RenderBackendVk::RenderExecuteSecondaryCommandLists(
786 const LowLevelCommandBufferVk& cmdBuffer, const LowLevelCommandBufferVk& executeCmdBuffer)
787 {
788 if (cmdBuffer.commandBuffer && executeCmdBuffer.commandBuffer) {
789 vkCmdExecuteCommands(cmdBuffer.commandBuffer, // commandBuffer
790 1u, // commandBufferCount
791 &executeCmdBuffer.commandBuffer); // pCommandBuffers
792 }
793 }
794
795 VkCommandBufferInheritanceInfo RenderBackendVk::RenderGetCommandBufferInheritanceInfo(
796 const RenderCommandList& renderCommandList, NodeContextPoolManager& poolMgr)
797 {
798 NodeContextPoolManagerVk& poolMgrVk = static_cast<NodeContextPoolManagerVk&>(poolMgr);
799
800 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
801 const uint32_t cmdCount = static_cast<uint32_t>(rcRef.size());
802
803 const MultiRenderPassCommandListData mrpCmdData = renderCommandList.GetMultiRenderCommandListData();
804 PLUGIN_ASSERT(mrpCmdData.rpBeginCmdIndex < cmdCount);
805 PLUGIN_ASSERT(mrpCmdData.rpEndCmdIndex < cmdCount);
806 if (mrpCmdData.rpBeginCmdIndex < cmdCount) {
807 const auto& ref = rcRef[mrpCmdData.rpBeginCmdIndex];
808 PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
809 const RenderCommandBeginRenderPass& renderCmd = *static_cast<const RenderCommandBeginRenderPass*>(ref.rc);
810 LowLevelRenderPassDataVk lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
811
812 const uint32_t subpass = renderCmd.subpassStartIndex;
813 return VkCommandBufferInheritanceInfo {
814 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // sType
815 nullptr, // pNext
816 lowLevelRenderPassData.renderPass, // renderPass
817 subpass, // subpass
818 VK_NULL_HANDLE, // framebuffer
819 VK_FALSE, // occlusionQueryEnable
820 0, // queryFlags
821 0, // pipelineStatistics
822 };
823 } else {
824 return VkCommandBufferInheritanceInfo {};
825 }
826 }
827
828 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
829 const MultiRenderCommandListDesc& mrclDesc, const DebugNames& debugNames)
830 {
831 // these are validated in render graph
832 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
833 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
834 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
835 NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
836 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
837
838 contextPoolMgr.BeginBackendFrame();
839 ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
840 nodeContextPsoMgr.BeginBackendFrame();
841
842 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
843
844 StateCache stateCache = {}; // state cache for this render command list
845 stateCache.backendNode = renderCommandCtx.renderBackendNode;
846 stateCache.secondaryCommandBuffer = mrclDesc.secondaryCommandBuffer;
847
848 // command buffer has been wait with a single frame fence
849 const bool multiCmdList = (mrclDesc.multiRenderNodeCmdList);
850 const bool beginCommandBuffer = (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == 0));
851 const bool endCommandBuffer =
852 (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == mrclDesc.multiRenderCommandListCount - 1));
853 const ContextCommandPoolVk* ptrCmdPool = nullptr;
854 if (mrclDesc.multiRenderNodeCmdList) {
855 PLUGIN_ASSERT(mrclDesc.baseContext);
856 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(mrclDesc.baseContext->nodeContextPoolMgr))
857 ->GetContextCommandPool();
858 } else if (mrclDesc.secondaryCommandBuffer) {
859 PLUGIN_ASSERT(stateCache.secondaryCommandBuffer);
860 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextSecondaryCommandPool();
861 } else {
862 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
863 }
864
865 // update cmd list context descriptor sets
866 UpdateCommandListDescriptorSets(renderCommandList, stateCache, nodeContextDescriptorSetMgr);
867
868 PLUGIN_ASSERT(ptrCmdPool);
869 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
870
871 #if (RENDER_PERF_ENABLED == 1)
872 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
873 const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
874 const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
875 #endif
876 PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
877 PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
878 #endif
879
880 if (beginCommandBuffer) {
881 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
882 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
883 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
884 ptrCmdPool->commandPool, // commandPool
885 commandPoolResetFlags)); // flags
886
887 VkCommandBufferUsageFlags commandBufferUsageFlags { VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
888 VkCommandBufferInheritanceInfo inheritanceInfo {};
889 if (stateCache.secondaryCommandBuffer) {
890 commandBufferUsageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
891 inheritanceInfo = RenderGetCommandBufferInheritanceInfo(renderCommandList, contextPoolMgr);
892 }
893 const VkCommandBufferBeginInfo commandBufferBeginInfo {
894 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
895 nullptr, // pNext
896 commandBufferUsageFlags, // flags
897 mrclDesc.secondaryCommandBuffer ? (&inheritanceInfo) : nullptr, // pInheritanceInfo
898 };
899
900 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
901 &commandBufferBeginInfo)); // pBeginInfo
902
903 #if (RENDER_PERF_ENABLED == 1)
904 if (perfDataSet) {
905 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
906 if (validGpuQueries) {
907 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
908 PLUGIN_ASSERT(gpuQuery);
909
910 gpuQuery->NextQueryIndex();
911
912 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
913 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stateCache);
914 }
915 #endif
916 perfDataSet->cpuTimer.Begin();
917 }
918 #endif
919 }
920
921 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
922 if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
923 const VkDebugUtilsLabelEXT label {
924 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
925 nullptr, // pNext
926 debugNames.renderCommandListName.data(), // pLabelName
927 { 1.f, 1.f, 1.f, 1.f } // color[4]
928 };
929 deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
930 }
931 #endif
932
933 for (const auto& ref : rcRef) {
934 if (!stateCache.validCommandList) {
935 #if (RENDER_VALIDATION_ENABLED == 1)
936 PLUGIN_LOG_ONCE_E("invalidated_be_cmd_list_" + debugNames.renderCommandListName,
937 "RENDER_VALIDATION: (RN:%s) backend render commands are invalidated",
938 debugNames.renderCommandListName.data());
939 #endif
940 break;
941 }
942
943 PLUGIN_ASSERT(ref.rc);
944 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
945 if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
946 const uint32_t index = (uint32_t)ref.type < countof(COMMAND_NAMES) ? (uint32_t)ref.type : 0;
947 const VkDebugUtilsLabelEXT label {
948 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
949 nullptr, // pNext
950 COMMAND_NAMES[index], // pLabelName
951 { 0.87f, 0.83f, 0.29f, 1.f } // color[4]
952 };
953 deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
954 }
955 #endif
956
957 switch (ref.type) {
958 case RenderCommandType::BARRIER_POINT: {
959 if (!stateCache.secondaryCommandBuffer) {
960 const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
961 // handle all barriers before render command that needs resource syncing
962 RenderCommand(
963 barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
964 }
965 break;
966 }
967 case RenderCommandType::DRAW: {
968 RenderCommand(
969 *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
970 break;
971 }
972 case RenderCommandType::DRAW_INDIRECT: {
973 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
974 contextPoolMgr, stateCache);
975 break;
976 }
977 case RenderCommandType::DISPATCH: {
978 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
979 contextPoolMgr, stateCache);
980 break;
981 }
982 case RenderCommandType::DISPATCH_INDIRECT: {
983 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
984 contextPoolMgr, stateCache);
985 break;
986 }
987 case RenderCommandType::BIND_PIPELINE: {
988 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
989 contextPoolMgr, stateCache);
990 break;
991 }
992 case RenderCommandType::BEGIN_RENDER_PASS: {
993 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
994 contextPoolMgr, stateCache);
995 break;
996 }
997 case RenderCommandType::NEXT_SUBPASS: {
998 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
999 contextPoolMgr, stateCache);
1000 break;
1001 }
1002 case RenderCommandType::END_RENDER_PASS: {
1003 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1004 contextPoolMgr, stateCache);
1005 break;
1006 }
1007 case RenderCommandType::BIND_VERTEX_BUFFERS: {
1008 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1009 contextPoolMgr, stateCache);
1010 break;
1011 }
1012 case RenderCommandType::BIND_INDEX_BUFFER: {
1013 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1014 contextPoolMgr, stateCache);
1015 break;
1016 }
1017 case RenderCommandType::COPY_BUFFER: {
1018 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1019 contextPoolMgr, stateCache);
1020 break;
1021 }
1022 case RenderCommandType::COPY_BUFFER_IMAGE: {
1023 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1024 contextPoolMgr, stateCache);
1025 break;
1026 }
1027 case RenderCommandType::COPY_IMAGE: {
1028 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1029 contextPoolMgr, stateCache);
1030 break;
1031 }
1032 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
1033 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1034 contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
1035 break;
1036 }
1037 case RenderCommandType::PUSH_CONSTANT: {
1038 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1039 contextPoolMgr, stateCache);
1040 break;
1041 }
1042 case RenderCommandType::BLIT_IMAGE: {
1043 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1044 contextPoolMgr, stateCache);
1045 break;
1046 }
1047 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
1048 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
1049 nodeContextPsoMgr, contextPoolMgr, stateCache);
1050 break;
1051 }
1052 case RenderCommandType::CLEAR_COLOR_IMAGE: {
1053 RenderCommand(*static_cast<RenderCommandClearColorImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1054 contextPoolMgr, stateCache);
1055 break;
1056 }
1057 // dynamic states
1058 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
1059 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1060 contextPoolMgr, stateCache);
1061 break;
1062 }
1063 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
1064 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1065 contextPoolMgr, stateCache);
1066 break;
1067 }
1068 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
1069 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1070 contextPoolMgr, stateCache);
1071 break;
1072 }
1073 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
1074 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1075 contextPoolMgr, stateCache);
1076 break;
1077 }
1078 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
1079 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
1080 nodeContextPsoMgr, contextPoolMgr, stateCache);
1081 break;
1082 }
1083 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
1084 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1085 contextPoolMgr, stateCache);
1086 break;
1087 }
1088 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
1089 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1090 contextPoolMgr, stateCache);
1091 break;
1092 }
1093 case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
1094 RenderCommand(*static_cast<RenderCommandDynamicStateFragmentShadingRate*>(ref.rc), cmdBuffer,
1095 nodeContextPsoMgr, contextPoolMgr, stateCache);
1096 break;
1097 }
1098 case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
1099 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
1100 nodeContextPsoMgr, contextPoolMgr, stateCache);
1101 break;
1102 }
1103 //
1104 case RenderCommandType::WRITE_TIMESTAMP: {
1105 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1106 contextPoolMgr, stateCache);
1107 break;
1108 }
1109 case RenderCommandType::UNDEFINED:
1110 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
1111 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
1112 default: {
1113 PLUGIN_ASSERT(false && "non-valid render command");
1114 break;
1115 }
1116 }
1117 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1118 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1119 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1120 }
1121 #endif
1122 }
1123
1124 if ((!presentationData_.infos.empty())) {
1125 RenderPresentationLayout(cmdBuffer, cmdBufIdx);
1126 }
1127
1128 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1129 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1130 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1131 }
1132 #endif
1133
1134 #if (RENDER_PERF_ENABLED == 1)
1135 // copy counters
1136 if (perfDataSet) {
1137 CopyPerfCounters(stateCache.perfCounters, perfDataSet->perfCounters);
1138 }
1139 #endif
1140
1141 if (endCommandBuffer) {
1142 #if (RENDER_PERF_ENABLED == 1)
1143 if (perfDataSet) {
1144 perfDataSet->cpuTimer.End();
1145 }
1146 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1147 if (validGpuQueries) {
1148 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
1149 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, stateCache);
1150 }
1151 #endif
1152 CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
1153 #endif
1154
1155 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
1156
1157 if (mrclDesc.secondaryCommandBuffer) {
1158 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = {};
1159 } else {
1160 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
1161 }
1162 }
1163 }
1164
1165 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1166 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1167 {
1168 const RenderHandle psoHandle = renderCmd.psoHandle;
1169 const VkPipelineBindPoint pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
1170
1171 stateCache.psoHandle = psoHandle;
1172
1173 VkPipeline pipeline { VK_NULL_HANDLE };
1174 VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
1175 if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
1176 const ComputePipelineStateObjectVk* pso = static_cast<const ComputePipelineStateObjectVk*>(
1177 psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
1178 if (pso) {
1179 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1180 pipeline = plat.pipeline;
1181 pipelineLayout = plat.pipelineLayout;
1182 }
1183 } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
1184 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1185 if (stateCache.renderCommandBeginRenderPass) {
1186 uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
1187 if (stateCache.pipelineDescSetHash != 0) {
1188 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
1189 }
1190 const GraphicsPipelineStateObjectVk* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
1191 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
1192 stateCache.renderCommandBeginRenderPass->subpasses,
1193 stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
1194 &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
1195 if (pso) {
1196 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1197 pipeline = plat.pipeline;
1198 pipelineLayout = plat.pipelineLayout;
1199 }
1200 }
1201 }
1202
1203 // NOTE: render front-end expects pso binding after begin render pass
1204 // in some situations the render pass might change and therefore the pipeline changes
1205 // in some situations the render pass is the same and the rebinding is not needed
1206 const bool newPipeline = (pipeline != stateCache.pipeline) ? true : false;
1207 const bool valid = (pipeline != VK_NULL_HANDLE) ? true : false;
1208 if (valid && newPipeline) {
1209 stateCache.pipeline = pipeline;
1210 stateCache.pipelineLayout = pipelineLayout;
1211 stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
1212 vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
1213 pipelineBindPoint, // pipelineBindPoint
1214 pipeline); // pipeline
1215 #if (RENDER_PERF_ENABLED == 1)
1216 stateCache.perfCounters.bindPipelineCount++;
1217 #endif
1218 }
1219 }
1220
1221 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1222 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1223 {
1224 if (stateCache.validBindings) {
1225 if (renderCmd.indexCount) {
1226 vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
1227 renderCmd.indexCount, // indexCount
1228 renderCmd.instanceCount, // instanceCount
1229 renderCmd.firstIndex, // firstIndex
1230 renderCmd.vertexOffset, // vertexOffset
1231 renderCmd.firstInstance); // firstInstance
1232 #if (RENDER_PERF_ENABLED == 1)
1233 stateCache.perfCounters.drawCount++;
1234 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1235 stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1236 #endif
1237 } else {
1238 vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
1239 renderCmd.vertexCount, // vertexCount
1240 renderCmd.instanceCount, // instanceCount
1241 renderCmd.firstVertex, // firstVertex
1242 renderCmd.firstInstance); // firstInstance
1243 #if (RENDER_PERF_ENABLED == 1)
1244 stateCache.perfCounters.drawCount++;
1245 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1246 stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
1247 * renderCmd.instanceCount;
1248 #endif
1249 }
1250 }
1251 }
1252
1253 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1254 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1255 {
1256 if (stateCache.validBindings) {
1257 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1258 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1259 const VkBuffer buffer = plat.buffer;
1260 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1261 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1262 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
1263 buffer, // buffer
1264 offset, // offset
1265 renderCmd.drawCount, // drawCount
1266 renderCmd.stride); // stride
1267 } else {
1268 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
1269 buffer, // buffer
1270 (VkDeviceSize)renderCmd.offset, // offset
1271 renderCmd.drawCount, // drawCount
1272 renderCmd.stride); // stride
1273 }
1274 #if (RENDER_PERF_ENABLED == 1)
1275 stateCache.perfCounters.drawIndirectCount++;
1276 #endif
1277 }
1278 }
1279 }
1280
1281 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1282 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1283 {
1284 if (stateCache.validBindings) {
1285 vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
1286 renderCmd.groupCountX, // groupCountX
1287 renderCmd.groupCountY, // groupCountY
1288 renderCmd.groupCountZ); // groupCountZ
1289 #if (RENDER_PERF_ENABLED == 1)
1290 stateCache.perfCounters.dispatchCount++;
1291 #endif
1292 }
1293 }
1294
1295 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
1296 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1297 const StateCache& stateCache)
1298 {
1299 if (stateCache.validBindings) {
1300 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1301 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1302 const VkBuffer buffer = plat.buffer;
1303 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1304 vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
1305 buffer, // buffer
1306 offset); // offset
1307 #if (RENDER_PERF_ENABLED == 1)
1308 stateCache.perfCounters.dispatchIndirectCount++;
1309 #endif
1310 }
1311 }
1312 }
1313
1314 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1315 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1316 StateCache& stateCache)
1317 {
1318 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1319 stateCache.renderCommandBeginRenderPass = &renderCmd;
1320
1321 NodeContextPoolManagerVk& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1322 // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1323 stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1324
1325 // early out for multi render command list render pass
1326 if (stateCache.secondaryCommandBuffer) {
1327 return; // early out
1328 }
1329 const bool validRpFbo = (stateCache.lowLevelRenderPassData.renderPass != VK_NULL_HANDLE) &&
1330 (stateCache.lowLevelRenderPassData.framebuffer != VK_NULL_HANDLE);
1331 // invalidate the whole command list
1332 if (!validRpFbo) {
1333 stateCache.validCommandList = false;
1334 return; // early out
1335 }
1336
1337 if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1338 #if (RENDER_VULKAN_COMBINE_MULTI_COMMAND_LIST_MSAA_SUBPASSES_ENABLED == 1)
1339 // fix for e.g. moltenvk msaa resolve not working with mac (we do not execute subpasses)
1340 if ((!stateCache.renderCommandBeginRenderPass->subpasses.empty()) &&
1341 stateCache.renderCommandBeginRenderPass->subpasses[0].resolveAttachmentCount == 0) {
1342 const VkSubpassContents subpassContents =
1343 static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1344 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1345 subpassContents); // contents
1346 }
1347 #else
1348 const VkSubpassContents subpassContents =
1349 static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1350 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1351 subpassContents); // contents
1352 #endif
1353 return; // early out
1354 }
1355
1356 const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1357
1358 VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1359 bool hasClearValues = false;
1360 for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1361 const auto& ref = renderPassDesc.attachments[idx];
1362 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1363 ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1364 const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1365 VkClearValue clearValue;
1366 if (RenderHandleUtil::IsDepthImage(handle)) {
1367 PLUGIN_STATIC_ASSERT(sizeof(clearValue.depthStencil) == sizeof(ref.clearValue.depthStencil));
1368 clearValue.depthStencil.depth = ref.clearValue.depthStencil.depth;
1369 clearValue.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1370 } else {
1371 PLUGIN_STATIC_ASSERT(sizeof(clearValue.color) == sizeof(ref.clearValue.color));
1372 if (!CloneData(&clearValue.color, sizeof(clearValue.color), &ref.clearValue.color,
1373 sizeof(ref.clearValue.color))) {
1374 PLUGIN_LOG_E("Copying of clearValue.color failed.");
1375 }
1376 }
1377 clearValues[idx] = clearValue;
1378 hasClearValues = true;
1379 }
1380 }
1381
1382 // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1383 // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1384 const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1385
1386 VkRect2D renderArea {
1387 { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1388 { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1389 };
1390 // render area needs to be inside frame buffer
1391 const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1392 renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1393 renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1394 renderArea.extent.width = Math::min(renderArea.extent.width,
1395 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1396 renderArea.extent.height = Math::min(renderArea.extent.height,
1397 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1398
1399 const VkRenderPassBeginInfo renderPassBeginInfo {
1400 VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // sType
1401 nullptr, // pNext
1402 stateCache.lowLevelRenderPassData.renderPass, // renderPass
1403 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1404 renderArea, // renderArea
1405 clearValueCount, // clearValueCount
1406 clearValues, // pClearValues
1407 };
1408
1409 // NOTE: could be patched in render graph
1410 const VkSubpassContents subpassContents =
1411 stateCache.primaryRenderPass ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE;
1412 vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1413 &renderPassBeginInfo, // pRenderPassBegin
1414 subpassContents); // contents
1415 #if (RENDER_PERF_ENABLED == 1)
1416 stateCache.perfCounters.renderPassCount++;
1417 #endif
1418 }
1419
1420 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1421 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1422 {
1423 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1424
1425 const VkSubpassContents subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1426 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1427 subpassContents); // contents
1428 }
1429
1430 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1431 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1432 {
1433 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1434
1435 // early out for multi render command list render pass
1436 if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1437 return; // NOTE
1438 }
1439
1440 stateCache.renderCommandBeginRenderPass = nullptr;
1441 stateCache.lowLevelRenderPassData = {};
1442
1443 if (!stateCache.secondaryCommandBuffer) {
1444 vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1445 }
1446 }
1447
1448 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1449 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1450 const StateCache& stateCache)
1451 {
1452 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1453 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1454
1455 const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1456
1457 VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1458 VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1459 const GpuBufferVk* gpuBuffer = nullptr;
1460 RenderHandle currBufferHandle;
1461 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1462 const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1463 // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1464 // do not re-fetch the buffer if not needed
1465 if (currBufferHandle.id != currVb.bufferHandle.id) {
1466 currBufferHandle = currVb.bufferHandle;
1467 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1468 }
1469 if (gpuBuffer) {
1470 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1471 const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1472 vertexBuffers[idx] = plat.buffer;
1473 offsets[idx] = offset;
1474 }
1475 }
1476
1477 vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1478 0, // firstBinding
1479 vertexBufferCount, // bindingCount
1480 vertexBuffers, // pBuffers
1481 offsets); // pOffsets
1482 }
1483
1484 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1485 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1486 const StateCache& stateCache)
1487 {
1488 const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1489
1490 PLUGIN_ASSERT(gpuBuffer);
1491 if (gpuBuffer) {
1492 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1493 const VkBuffer buffer = plat.buffer;
1494 const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1495 const VkIndexType indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1496
1497 vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1498 buffer, // buffer
1499 offset, // offset
1500 indexType); // indexType
1501 }
1502 }
1503
1504 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1505 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1506 {
1507 const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1508 const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1509 if (srcImagePtr && dstImagePtr) {
1510 const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1511 const GpuImagePlatformDataVk& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1512
1513 const ImageBlit& ib = renderCmd.imageBlit;
1514 const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1515 ? srcPlatImage.arrayLayers
1516 : ib.srcSubresource.layerCount;
1517 const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1518 ? dstPlatImage.arrayLayers
1519 : ib.dstSubresource.layerCount;
1520
1521 const VkImageSubresourceLayers srcSubresourceLayers {
1522 (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1523 ib.srcSubresource.mipLevel, // mipLevel
1524 ib.srcSubresource.baseArrayLayer, // baseArrayLayer
1525 srcLayerCount, // layerCount
1526 };
1527 const VkImageSubresourceLayers dstSubresourceLayers {
1528 (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1529 ib.dstSubresource.mipLevel, // mipLevel
1530 ib.dstSubresource.baseArrayLayer, // baseArrayLayer
1531 dstLayerCount, // layerCount
1532 };
1533
1534 const VkImageBlit imageBlit {
1535 srcSubresourceLayers, // srcSubresource
1536 { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1537 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1538 (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1539 dstSubresourceLayers, // dstSubresource
1540 { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1541 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1542 (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1543 };
1544
1545 vkCmdBlitImage(cmdBuf.commandBuffer, // commandBuffer
1546 srcPlatImage.image, // srcImage
1547 (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1548 dstPlatImage.image, // dstImage
1549 (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1550 1, // regionCount
1551 &imageBlit, // pRegions
1552 (VkFilter)renderCmd.filter); // filter
1553 }
1554 }
1555
1556 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1557 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1558 {
1559 const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1560 const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1561
1562 PLUGIN_ASSERT(srcGpuBuffer);
1563 PLUGIN_ASSERT(dstGpuBuffer);
1564
1565 if (srcGpuBuffer && dstGpuBuffer) {
1566 const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1567 const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1568 const VkBufferCopy bufferCopy {
1569 renderCmd.bufferCopy.srcOffset,
1570 renderCmd.bufferCopy.dstOffset,
1571 renderCmd.bufferCopy.size,
1572 };
1573
1574 if (bufferCopy.size > 0) {
1575 vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1576 srcBuffer, // srcBuffer
1577 dstBuffer, // dstBuffer
1578 1, // regionCount
1579 &bufferCopy); // pRegions
1580 }
1581 }
1582 }
1583
1584 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1585 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1586 const StateCache& stateCache)
1587 {
1588 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1589 PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1590 return;
1591 }
1592
1593 const GpuBufferVk* gpuBuffer = nullptr;
1594 const GpuImageVk* gpuImage = nullptr;
1595 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1596 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1597 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1598 } else {
1599 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1600 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1601 }
1602
1603 if (gpuBuffer && gpuImage) {
1604 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1605 const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1606 const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1607 const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1608 ? platImage.arrayLayers
1609 : subresourceLayer.layerCount;
1610 const VkImageSubresourceLayers imageSubresourceLayer {
1611 (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1612 subresourceLayer.mipLevel,
1613 subresourceLayer.baseArrayLayer,
1614 layerCount,
1615 };
1616 const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1617 // Math::min to force staying inside image
1618 const uint32_t mip = subresourceLayer.mipLevel;
1619 const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1620 const Size3D& imageOffset = bufferImageCopy.imageOffset;
1621 const VkExtent3D imageExtent = {
1622 Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1623 Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1624 Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1625 };
1626 const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1627 (imageOffset.depth < imageSize.depth);
1628 const VkBufferImageCopy bufferImageCopyVk {
1629 bufferImageCopy.bufferOffset,
1630 bufferImageCopy.bufferRowLength,
1631 bufferImageCopy.bufferImageHeight,
1632 imageSubresourceLayer,
1633 { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1634 static_cast<int32_t>(imageOffset.depth) },
1635 imageExtent,
1636 };
1637
1638 const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1639 const VkImage image = (gpuImage->GetPlatformData()).image;
1640
1641 if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1642 vkCmdCopyBufferToImage(cmdBuf.commandBuffer, // commandBuffer
1643 buffer, // srcBuffer
1644 image, // dstImage
1645 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1646 1, // regionCount
1647 &bufferImageCopyVk); // pRegions
1648 } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1649 vkCmdCopyImageToBuffer(cmdBuf.commandBuffer, // commandBuffer
1650 image, // srcImage
1651 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1652 buffer, // dstBuffer
1653 1, // regionCount
1654 &bufferImageCopyVk); // pRegions
1655 }
1656 }
1657 }
1658
1659 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1660 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1661 {
1662 const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1663 const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1664 if (srcGpuImage && dstGpuImage) {
1665 const ImageCopy& copy = renderCmd.imageCopy;
1666 const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1667 const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1668
1669 const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1670 const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1671 const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1672 ? srcPlatImage.arrayLayers
1673 : srcSubresourceLayer.layerCount;
1674 const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1675 ? dstPlatImage.arrayLayers
1676 : dstSubresourceLayer.layerCount;
1677
1678 const VkImageSubresourceLayers srcImageSubresourceLayer {
1679 (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1680 srcSubresourceLayer.mipLevel,
1681 srcSubresourceLayer.baseArrayLayer,
1682 srcLayerCount,
1683 };
1684 const VkImageSubresourceLayers dstImageSubresourceLayer {
1685 (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1686 dstSubresourceLayer.mipLevel,
1687 dstSubresourceLayer.baseArrayLayer,
1688 dstLayerCount,
1689 };
1690
1691 const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1692 const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1693
1694 VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1695 ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1696 ext.height =
1697 Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1698 ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1699
1700 const VkImageCopy imageCopyVk {
1701 srcImageSubresourceLayer, // srcSubresource
1702 { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1703 dstImageSubresourceLayer, // dstSubresource
1704 { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1705 ext, // extent
1706 };
1707 vkCmdCopyImage(cmdBuf.commandBuffer, // commandBuffer
1708 srcPlatImage.image, // srcImage
1709 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1710 dstPlatImage.image, // dstImage
1711 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1712 1, // regionCount
1713 &imageCopyVk); // pRegions
1714 }
1715 }
1716
1717 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1718 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1719 const RenderBarrierList& rbl)
1720 {
1721 if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1722 return;
1723 }
1724
1725 const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1726 rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1727 PLUGIN_ASSERT(barrierPointBarriers);
1728 if (!barrierPointBarriers) {
1729 return;
1730 }
1731 constexpr uint32_t maxBarrierCount { 8 };
1732 VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1733 VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1734 VkMemoryBarrier memoryBarriers[maxBarrierCount];
1735
1736 // generally there is only single barrierListCount per barrier point
1737 // in situations with batched render passes there can be many
1738 // NOTE: all barrier lists could be patched to single vk command if needed
1739 // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1740 const uint32_t barrierListCount = (uint32_t)barrierPointBarriers->barrierListCount;
1741 const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1742 #if (RENDER_VALIDATION_ENABLED == 1)
1743 uint32_t fullBarrierCount = 0u;
1744 #endif
1745 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1746 if (nextBarrierList == nullptr) { // cannot be null, just a safety
1747 PLUGIN_ASSERT(false);
1748 return;
1749 }
1750 const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1751 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1752 const uint32_t barrierCount = (uint32_t)barrierListRef.count;
1753
1754 uint32_t bufferBarrierIdx = 0;
1755 uint32_t imageBarrierIdx = 0;
1756 uint32_t memoryBarrierIdx = 0;
1757
1758 VkPipelineStageFlags srcPipelineStageMask { 0 };
1759 VkPipelineStageFlags dstPipelineStageMask { 0 };
1760 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1761
1762 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1763 const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1764
1765 uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1766 uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1767 if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1768 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1769 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1770 }
1771
1772 const RenderHandle resourceHandle = ref.resourceHandle;
1773 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1774
1775 PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1776 (handleType == RenderHandleType::GPU_IMAGE));
1777
1778 const VkAccessFlags srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1779 const VkAccessFlags dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1780
1781 srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1782 dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1783
1784 // NOTE: zero size buffer barriers allowed ATM
1785 if (handleType == RenderHandleType::GPU_BUFFER) {
1786 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle); gpuBuffer) {
1787 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1788 // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1789 const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1790 const VkDeviceSize size =
1791 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1792 (VkDeviceSize)ref.dst.optionalByteSize);
1793 if (platBuffer.buffer) {
1794 bufferMemoryBarriers[bufferBarrierIdx++] = {
1795 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1796 nullptr, // pNext
1797 srcAccessMask, // srcAccessMask
1798 dstAccessMask, // dstAccessMask
1799 srcQueueFamilyIndex, // srcQueueFamilyIndex
1800 dstQueueFamilyIndex, // dstQueueFamilyIndex
1801 platBuffer.buffer, // buffer
1802 offset, // offset
1803 size, // size
1804 };
1805 }
1806 }
1807 } else if (handleType == RenderHandleType::GPU_IMAGE) {
1808 if (const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle); gpuImage) {
1809 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1810
1811 const VkImageLayout srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1812 const VkImageLayout dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1813
1814 const VkImageAspectFlags imageAspectFlags =
1815 (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1816 ? platImage.aspectFlags
1817 : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1818
1819 const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1820 PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1821 ? VK_REMAINING_MIP_LEVELS
1822 : ref.src.optionalImageSubresourceRange.levelCount;
1823
1824 const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1825 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1826 ? VK_REMAINING_ARRAY_LAYERS
1827 : ref.src.optionalImageSubresourceRange.layerCount;
1828
1829 const VkImageSubresourceRange imageSubresourceRange {
1830 imageAspectFlags, // aspectMask
1831 ref.src.optionalImageSubresourceRange.baseMipLevel, // baseMipLevel
1832 levelCount, // levelCount
1833 ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1834 layerCount, // layerCount
1835 };
1836
1837 if (platImage.image) {
1838 imageMemoryBarriers[imageBarrierIdx++] = {
1839 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1840 nullptr, // pNext
1841 srcAccessMask, // srcAccessMask
1842 dstAccessMask, // dstAccessMask
1843 srcImageLayout, // oldLayout
1844 dstImageLayout, // newLayout
1845 srcQueueFamilyIndex, // srcQueueFamilyIndex
1846 dstQueueFamilyIndex, // dstQueueFamilyIndex
1847 platImage.image, // image
1848 imageSubresourceRange, // subresourceRange
1849 };
1850 }
1851 }
1852 } else {
1853 memoryBarriers[memoryBarrierIdx++] = {
1854 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1855 nullptr, // pNext
1856 srcAccessMask, // srcAccessMask
1857 dstAccessMask, // dstAccessMask
1858 };
1859 }
1860
1861 const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1862 const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1863 (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)))
1864 ? true
1865 : false;
1866
1867 if (hasBarriers && resetBarriers) {
1868 #if (RENDER_VALIDATION_ENABLED == 1)
1869 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1870 #endif
1871 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1872 srcPipelineStageMask, // srcStageMask
1873 dstPipelineStageMask, // dstStageMask
1874 dependencyFlags, // dependencyFlags
1875 memoryBarrierIdx, // memoryBarrierCount
1876 memoryBarriers, // pMemoryBarriers
1877 bufferBarrierIdx, // bufferMemoryBarrierCount
1878 bufferMemoryBarriers, // pBufferMemoryBarriers
1879 imageBarrierIdx, // imageMemoryBarrierCount
1880 imageMemoryBarriers); // pImageMemoryBarriers
1881
1882 bufferBarrierIdx = 0;
1883 imageBarrierIdx = 0;
1884 memoryBarrierIdx = 0;
1885 }
1886 }
1887 }
1888 #if (RENDER_VALIDATION_ENABLED == 1)
1889 if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1890 PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1891 "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1892 barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1893 }
1894 #endif
1895 }
1896
1897 namespace {
1898 struct DescriptorSetUpdateDataStruct {
1899 uint32_t accelIndex { 0U };
1900 uint32_t bufferIndex { 0U };
1901 uint32_t imageIndex { 0U };
1902 uint32_t samplerIndex { 0U };
1903 uint32_t writeBindIdx { 0U };
1904 };
1905 } // namespace
1906
1907 void RenderBackendVk::UpdateCommandListDescriptorSets(
1908 const RenderCommandList& renderCommandList, StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
1909 {
1910 NodeContextDescriptorSetManagerVk& ctxDescMgr = (NodeContextDescriptorSetManagerVk&)ncdsm;
1911
1912 const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
1913 const uint32_t upDescriptorSetCount = static_cast<uint32_t>(allDescSets.size());
1914 LowLevelContextDescriptorWriteDataVk& wd = ctxDescMgr.GetLowLevelDescriptorWriteData();
1915 DescriptorSetUpdateDataStruct dsud;
1916 for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
1917 if ((descIdx >= static_cast<uint32_t>(wd.writeDescriptorSets.size())) ||
1918 (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET)) {
1919 continue;
1920 }
1921 const RenderHandle descHandle = allDescSets[descIdx];
1922 // first update gpu descriptor indices
1923 ncdsm.UpdateDescriptorSetGpuHandle(descHandle);
1924
1925 // actual vulkan descriptor set update
1926 const LowLevelDescriptorSetVk* descriptorSet = ctxDescMgr.GetDescriptorSet(descHandle);
1927 if (descriptorSet && descriptorSet->descriptorSet) {
1928 const DescriptorSetLayoutBindingResources bindingResources = ncdsm.GetCpuDescriptorSetData(descHandle);
1929 #if (RENDER_VALIDATION_ENABLED == 1)
1930 // get descriptor counts
1931 const LowLevelDescriptorCountsVk& descriptorCounts = ctxDescMgr.GetLowLevelDescriptorCounts(descHandle);
1932 if ((uint32_t)bindingResources.bindings.size() > descriptorCounts.writeDescriptorCount) {
1933 PLUGIN_LOG_E("RENDER_VALIDATION: update descriptor set bindings exceed descriptor set bindings");
1934 }
1935 #endif
1936 if ((uint32_t)bindingResources.bindings.size() >
1937 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
1938 PLUGIN_ASSERT(false);
1939 continue;
1940 }
1941 const auto& buffers = bindingResources.buffers;
1942 const auto& images = bindingResources.images;
1943 const auto& samplers = bindingResources.samplers;
1944 for (const auto& ref : buffers) {
1945 const uint32_t descriptorCount = ref.binding.descriptorCount;
1946 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1947 if (descriptorCount == 0) {
1948 continue;
1949 }
1950 const uint32_t arrayOffset = ref.arrayOffset;
1951 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1952 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1953 #if (RENDER_VULKAN_RT_ENABLED == 1)
1954 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1955 // first is the ref, starting from 1 we use array offsets
1956 const BindableBuffer& bRes =
1957 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1958 if (const GpuBufferVk* resPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1959 const GpuAccelerationStructurePlatformDataVk& platAccel =
1960 resPtr->GetPlatformDataAccelerationStructure();
1961 wd.descriptorAccelInfos[dsud.accelIndex + idx] = {
1962 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1963 nullptr, // pNext
1964 descriptorCount, // accelerationStructureCount
1965 &platAccel.accelerationStructure, // pAccelerationStructures
1966 };
1967 }
1968 }
1969 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1970 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1971 &wd.descriptorAccelInfos[dsud.accelIndex], // pNext
1972 descriptorSet->descriptorSet, // dstSet
1973 ref.binding.binding, // dstBinding
1974 0, // dstArrayElement
1975 descriptorCount, // descriptorCount
1976 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1977 nullptr, // pImageInfo
1978 nullptr, // pBufferInfo
1979 nullptr, // pTexelBufferView
1980 };
1981 dsud.accelIndex += descriptorCount;
1982 #endif
1983 } else {
1984 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1985 // first is the ref, starting from 1 we use array offsets
1986 const BindableBuffer& bRes =
1987 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1988 const VkDeviceSize optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1989 if (const GpuBufferVk* resPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1990 const GpuBufferPlatformDataVk& platBuffer = resPtr->GetPlatformData();
1991 // takes into account dynamic ring buffers with mapping
1992 const VkDeviceSize bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1993 const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1994 const VkDeviceSize bufferRange =
1995 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1996 (VkDeviceSize)bRes.byteSize);
1997 wd.descriptorBufferInfos[dsud.bufferIndex + idx] = {
1998 platBuffer.buffer, // buffer
1999 byteOffset, // offset
2000 bufferRange, // range
2001 };
2002 }
2003 }
2004 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2005 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2006 nullptr, // pNext
2007 descriptorSet->descriptorSet, // dstSet
2008 ref.binding.binding, // dstBinding
2009 0, // dstArrayElement
2010 descriptorCount, // descriptorCount
2011 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2012 nullptr, // pImageInfo
2013 &wd.descriptorBufferInfos[dsud.bufferIndex], // pBufferInfo
2014 nullptr, // pTexelBufferView
2015 };
2016 dsud.bufferIndex += descriptorCount;
2017 }
2018 }
2019 for (const auto& ref : images) {
2020 const uint32_t descriptorCount = ref.binding.descriptorCount;
2021 // skip, array bindings which are bound from first index have also descriptorCount 0
2022 if (descriptorCount == 0) {
2023 continue;
2024 }
2025 const VkDescriptorType descriptorType = (VkDescriptorType)ref.binding.descriptorType;
2026 const uint32_t arrayOffset = ref.arrayOffset;
2027 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
2028 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2029 // first is the ref, starting from 1 we use array offsets
2030 const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].resource;
2031 if (const GpuImageVk* resPtr = gpuResourceMgr_.GetImage<GpuImageVk>(bRes.handle); resPtr) {
2032 VkSampler sampler = VK_NULL_HANDLE;
2033 if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2034 const GpuSamplerVk* samplerPtr =
2035 gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
2036 if (samplerPtr) {
2037 sampler = samplerPtr->GetPlatformData().sampler;
2038 }
2039 }
2040 const GpuImagePlatformDataVk& platImage = resPtr->GetPlatformData();
2041 const GpuImagePlatformDataViewsVk& platImageViews = resPtr->GetPlatformDataViews();
2042 VkImageView imageView = platImage.imageView;
2043 if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2044 (bRes.layer < platImageViews.layerImageViews.size())) {
2045 imageView = platImageViews.layerImageViews[bRes.layer];
2046 } else if (bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) {
2047 if ((bRes.layer == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2048 (bRes.mip < platImageViews.mipImageAllLayerViews.size())) {
2049 imageView = platImageViews.mipImageAllLayerViews[bRes.mip];
2050 } else if (bRes.mip < platImageViews.mipImageViews.size()) {
2051 imageView = platImageViews.mipImageViews[bRes.mip];
2052 }
2053 }
2054 wd.descriptorImageInfos[dsud.imageIndex + idx] = {
2055 sampler, // sampler
2056 imageView, // imageView
2057 (VkImageLayout)bRes.imageLayout, // imageLayout
2058 };
2059 }
2060 }
2061 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2062 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2063 nullptr, // pNext
2064 descriptorSet->descriptorSet, // dstSet
2065 ref.binding.binding, // dstBinding
2066 0, // dstArrayElement
2067 descriptorCount, // descriptorCount
2068 descriptorType, // descriptorType
2069 &wd.descriptorImageInfos[dsud.imageIndex], // pImageInfo
2070 nullptr, // pBufferInfo
2071 nullptr, // pTexelBufferView
2072 };
2073 dsud.imageIndex += descriptorCount;
2074 }
2075 for (const auto& ref : samplers) {
2076 const uint32_t descriptorCount = ref.binding.descriptorCount;
2077 // skip, array bindings which are bound from first index have also descriptorCount 0
2078 if (descriptorCount == 0) {
2079 continue;
2080 }
2081 const uint32_t arrayOffset = ref.arrayOffset;
2082 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
2083 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2084 // first is the ref, starting from 1 we use array offsets
2085 const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].resource;
2086 if (const GpuSamplerVk* resPtr = gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.handle); resPtr) {
2087 const GpuSamplerPlatformDataVk& platSampler = resPtr->GetPlatformData();
2088 wd.descriptorSamplerInfos[dsud.samplerIndex + idx] = {
2089 platSampler.sampler, // sampler
2090 VK_NULL_HANDLE, // imageView
2091 VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
2092 };
2093 }
2094 }
2095 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2096 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2097 nullptr, // pNext
2098 descriptorSet->descriptorSet, // dstSet
2099 ref.binding.binding, // dstBinding
2100 0, // dstArrayElement
2101 descriptorCount, // descriptorCount
2102 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2103 &wd.descriptorSamplerInfos[dsud.samplerIndex], // pImageInfo
2104 nullptr, // pBufferInfo
2105 nullptr, // pTexelBufferView
2106 };
2107 dsud.samplerIndex += descriptorCount;
2108 }
2109
2110 #if (RENDER_PERF_ENABLED == 1)
2111 // count the actual updated descriptors sets, not the api calls
2112 stateCache.perfCounters.updateDescriptorSetCount++;
2113 #endif
2114 }
2115 }
2116 // update if the batch ended or we are the last descriptor set
2117 if (dsud.writeBindIdx > 0U) {
2118 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2119 vkUpdateDescriptorSets(device, // device
2120 dsud.writeBindIdx, // descriptorWriteCount
2121 wd.writeDescriptorSets.data(), // pDescriptorWrites
2122 0, // descriptorCopyCount
2123 nullptr); // pDescriptorCopies
2124 }
2125 }
2126
2127 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
2128 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2129 StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
2130 {
2131 const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
2132
2133 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2134 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
2135 const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
2136 ? VK_PIPELINE_BIND_POINT_COMPUTE
2137 : VK_PIPELINE_BIND_POINT_GRAPHICS;
2138 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2139
2140 bool valid = (pipelineLayout != VK_NULL_HANDLE) ? true : false;
2141 const uint32_t firstSet = renderCmd.firstSet;
2142 const uint32_t setCount = renderCmd.setCount;
2143 if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
2144 uint32_t combinedDynamicOffsetCount = 0;
2145 uint32_t dynamicOffsetDescriptorSetIndices = 0;
2146 uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
2147
2148 VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2149 const uint32_t firstPlusCount = firstSet + setCount;
2150 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2151 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2152 if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
2153 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
2154 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
2155 combinedDynamicOffsetCount += dynamicDescriptorCount;
2156
2157 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
2158 if (descriptorSet && descriptorSet->descriptorSet) {
2159 descriptorSets[idx] = descriptorSet->descriptorSet;
2160 // update, copy to state cache
2161 PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
2162 stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
2163 const uint32_t currShift = (idx * 16u);
2164 const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
2165 uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
2166 stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
2167 } else {
2168 valid = false;
2169 }
2170 }
2171 }
2172
2173 uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT *
2174 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2175 uint32_t dynamicOffsetIdx = 0;
2176 // NOTE: optimize
2177 // this code has some safety checks that the offset is not updated for non-dynamic sets
2178 // it could be left on only for validation
2179 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2180 if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
2181 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2182 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
2183 const uint32_t dodResCount = static_cast<uint32_t>(dod.resources.size());
2184 const auto& descriptorSetDynamicOffsets = renderCmd.descriptorSetDynamicOffsets[idx];
2185 for (uint32_t dodIdx = 0U; dodIdx < dodResCount; ++dodIdx) {
2186 uint32_t byteOffset = 0U;
2187 if (dodIdx < descriptorSetDynamicOffsets.dynamicOffsetCount) {
2188 byteOffset = descriptorSetDynamicOffsets.dynamicOffsets[dynamicOffsetIdx];
2189 }
2190 dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
2191 }
2192 }
2193 }
2194
2195 stateCache.validBindings = valid;
2196 if (stateCache.validBindings) {
2197 if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
2198 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
2199 pipelineBindPoint, // pipelineBindPoint
2200 pipelineLayout, // layout
2201 firstSet, // firstSet
2202 setCount, // descriptorSetCount
2203 &descriptorSets[firstSet], // pDescriptorSets
2204 dynamicOffsetIdx, // dynamicOffsetCount
2205 dynamicOffsets); // pDynamicOffsets
2206 #if (RENDER_PERF_ENABLED == 1)
2207 stateCache.perfCounters.bindDescriptorSetCount++;
2208 #endif
2209 } else {
2210 // possible pso re-creation and bind of these sets to the new pso
2211 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
2212 (PipelineBindPoint)pipelineBindPoint };
2213 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
2214 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
2215 }
2216 }
2217 }
2218 }
2219
2220 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2221 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2222 {
2223 PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
2224 PLUGIN_ASSERT(renderCmd.data);
2225
2226 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2227 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2228
2229 const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0)) ? true : false;
2230 PLUGIN_ASSERT(valid);
2231
2232 if (valid) {
2233 const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
2234 vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
2235 pipelineLayout, // layout
2236 shaderStageFlags, // stageFlags
2237 0, // offset
2238 renderCmd.pushConstant.byteSize, // size
2239 static_cast<void*>(renderCmd.data)); // pValues
2240 }
2241 }
2242
2243 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
2244 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2245 const StateCache& stateCache)
2246 {
2247 #if (RENDER_VULKAN_RT_ENABLED == 1)
2248 // NOTE: missing
2249 const GpuBufferVk* dst = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.dstAccelerationStructure);
2250 const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.scratchBuffer);
2251 if (dst && scratchBuffer) {
2252 const DevicePlatformDataVk& devicePlat = deviceVk_.GetPlatformDataVk();
2253 const VkDevice device = devicePlat.device;
2254
2255 const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformDataAccelerationStructure();
2256 const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
2257
2258 // scratch data with user offset
2259 const VkDeviceAddress scratchData { GetBufferDeviceAddress(device, scratchBuffer->GetPlatformData().buffer) +
2260 VkDeviceSize(renderCmd.scratchOffset) };
2261
2262 const size_t arraySize =
2263 renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
2264 vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
2265 vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
2266
2267 size_t arrayIndex = 0;
2268 for (const auto& trianglesRef : renderCmd.trianglesView) {
2269 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2270 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2271 nullptr, // pNext
2272 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR, // geometryType
2273 {}, // geometry;
2274 0, // flags
2275 };
2276 uint32_t primitiveCount = 0;
2277 const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.vertexData.handle);
2278 const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.indexData.handle);
2279 if (vb && ib) {
2280 const VkDeviceOrHostAddressConstKHR vertexData { GetBufferDeviceAddress(
2281 device, vb->GetPlatformData().buffer) };
2282 const VkDeviceOrHostAddressConstKHR indexData { GetBufferDeviceAddress(
2283 device, ib->GetPlatformData().buffer) };
2284 VkDeviceOrHostAddressConstKHR transformData {};
2285 if (RenderHandleUtil::IsValid(trianglesRef.transformData.handle)) {
2286 if (const GpuBufferVk* tr =
2287 gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.transformData.handle);
2288 tr) {
2289 transformData.deviceAddress = { GetBufferDeviceAddress(device, ib->GetPlatformData().buffer) };
2290 }
2291 }
2292 primitiveCount = trianglesRef.info.indexCount / 3u; // triangles
2293
2294 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2295 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
2296 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
2297 nullptr, // pNext
2298 VkFormat(trianglesRef.info.vertexFormat), // vertexFormat
2299 vertexData, // vertexData
2300 VkDeviceSize(trianglesRef.info.vertexStride), // vertexStride
2301 trianglesRef.info.maxVertex, // maxVertex
2302 VkIndexType(trianglesRef.info.indexType), // indexType
2303 indexData, // indexData
2304 transformData, // transformData
2305 };
2306 }
2307 buildRangeInfos[arrayIndex] = {
2308 primitiveCount, // primitiveCount
2309 0u, // primitiveOffset
2310 0u, // firstVertex
2311 0u, // transformOffset
2312 };
2313 arrayIndex++;
2314 }
2315 for (const auto& aabbsRef : renderCmd.aabbsView) {
2316 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2317 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2318 nullptr, // pNext
2319 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR, // geometryType
2320 {}, // geometry;
2321 0, // flags
2322 };
2323 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2324 if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(aabbsRef.data.handle); iPtr) {
2325 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2326 }
2327 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2328 geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
2329 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
2330 nullptr, // pNext
2331 deviceAddress, // data
2332 aabbsRef.info.stride, // stride
2333 };
2334 buildRangeInfos[arrayIndex] = {
2335 1u, // primitiveCount
2336 0u, // primitiveOffset
2337 0u, // firstVertex
2338 0u, // transformOffset
2339 };
2340 arrayIndex++;
2341 }
2342 for (const auto& instancesRef : renderCmd.instancesView) {
2343 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2344 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2345 nullptr, // pNext
2346 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR, // geometryType
2347 {}, // geometry;
2348 0, // flags
2349 };
2350 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2351 if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(instancesRef.data.handle);
2352 iPtr) {
2353 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2354 }
2355 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2356 geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2357 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2358 nullptr, // pNext
2359 instancesRef.info.arrayOfPointers, // arrayOfPointers
2360 deviceAddress, // data
2361 };
2362 buildRangeInfos[arrayIndex] = {
2363 1u, // primitiveCount
2364 0u, // primitiveOffset
2365 0u, // firstVertex
2366 0u, // transformOffset
2367 };
2368 arrayIndex++;
2369 }
2370
2371 const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2372 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2373 nullptr, // pNext
2374 VkAccelerationStructureTypeKHR(renderCmd.type), // type
2375 VkBuildAccelerationStructureFlagsKHR(renderCmd.flags), // flags
2376 VkBuildAccelerationStructureModeKHR(renderCmd.mode), // mode
2377 VK_NULL_HANDLE, // srcAccelerationStructure
2378 dstAs, // dstAccelerationStructure
2379 uint32_t(arrayIndex), // geometryCount
2380 geometryData.data(), // pGeometries
2381 nullptr, // ppGeometries
2382 scratchData, // scratchData
2383 };
2384
2385 vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2386 for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2387 buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2388 }
2389 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2390 if (extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2391 extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2392 1u, // infoCount
2393 &buildGeometryInfo, // pInfos
2394 buildRangeInfosPtr.data()); // ppBuildRangeInfos
2395 }
2396 }
2397 #endif
2398 }
2399
2400 void RenderBackendVk::RenderCommand(const RenderCommandClearColorImage& renderCmd,
2401 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2402 const StateCache& stateCache)
2403 {
2404 const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.handle);
2405 // the layout could be VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but we don't support it at the moment
2406 const VkImageLayout imageLayout = (VkImageLayout)renderCmd.imageLayout;
2407 PLUGIN_ASSERT((imageLayout == VK_IMAGE_LAYOUT_GENERAL) || (imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
2408 if (imagePtr) {
2409 const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
2410 if (platImage.image) {
2411 VkClearColorValue clearColor;
2412 PLUGIN_STATIC_ASSERT(sizeof(clearColor) == sizeof(renderCmd.color));
2413 CloneData(&clearColor, sizeof(clearColor), &renderCmd.color, sizeof(renderCmd.color));
2414
2415 // NOTE: temporary vector allocated due to not having max limit
2416 vector<VkImageSubresourceRange> ranges(renderCmd.ranges.size());
2417 for (size_t idx = 0; idx < ranges.size(); ++idx) {
2418 const auto& inputRef = renderCmd.ranges[idx];
2419 ranges[idx] = {
2420 (VkImageAspectFlags)inputRef.imageAspectFlags, // aspectMask
2421 inputRef.baseMipLevel, // baseMipLevel
2422 inputRef.levelCount, // levelCount
2423 inputRef.baseArrayLayer, // baseArrayLayer
2424 inputRef.layerCount, // layerCount
2425 };
2426 }
2427
2428 vkCmdClearColorImage(cmdBuf.commandBuffer, // commandBuffer
2429 platImage.image, // image
2430 imageLayout, // imageLayout
2431 &clearColor, // pColor
2432 static_cast<uint32_t>(ranges.size()), // rangeCount
2433 ranges.data()); // pRanges
2434 }
2435 }
2436 }
2437
2438 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2439 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2440 const StateCache& stateCache)
2441 {
2442 const ViewportDesc& vd = renderCmd.viewportDesc;
2443
2444 const VkViewport viewport {
2445 vd.x, // x
2446 vd.y, // y
2447 vd.width, // width
2448 vd.height, // height
2449 vd.minDepth, // minDepth
2450 vd.maxDepth, // maxDepth
2451 };
2452
2453 vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2454 0, // firstViewport
2455 1, // viewportCount
2456 &viewport); // pViewports
2457 }
2458
2459 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2460 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2461 const StateCache& stateCache)
2462 {
2463 const ScissorDesc& sd = renderCmd.scissorDesc;
2464
2465 const VkRect2D scissor {
2466 { sd.offsetX, sd.offsetY }, // offset
2467 { sd.extentWidth, sd.extentHeight }, // extent
2468 };
2469
2470 vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2471 0, // firstScissor
2472 1, // scissorCount
2473 &scissor); // pScissors
2474 }
2475
2476 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2477 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2478 const StateCache& stateCache)
2479 {
2480 vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2481 renderCmd.lineWidth); // lineWidth
2482 }
2483
2484 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2485 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2486 const StateCache& stateCache)
2487 {
2488 vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2489 renderCmd.depthBiasConstantFactor, // depthBiasConstantFactor
2490 renderCmd.depthBiasClamp, // depthBiasClamp
2491 renderCmd.depthBiasSlopeFactor); // depthBiasSlopeFactor
2492 }
2493
2494 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2495 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2496 const StateCache& stateCache)
2497 {
2498 vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2499 renderCmd.blendConstants); // blendConstants[4]
2500 }
2501
2502 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2503 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2504 const StateCache& stateCache)
2505 {
2506 vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2507 renderCmd.minDepthBounds, // minDepthBounds
2508 renderCmd.maxDepthBounds); // maxDepthBounds
2509 }
2510
2511 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2512 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2513 const StateCache& stateCache)
2514 {
2515 const VkStencilFaceFlags stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2516
2517 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2518 vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2519 stencilFaceMask, // faceMask
2520 renderCmd.mask); // compareMask
2521 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2522 vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2523 stencilFaceMask, // faceMask
2524 renderCmd.mask); // writeMask
2525 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2526 vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2527 stencilFaceMask, // faceMask
2528 renderCmd.mask); // reference
2529 }
2530 }
2531
2532 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateFragmentShadingRate& renderCmd,
2533 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2534 const StateCache& stateCache)
2535 {
2536 #if (RENDER_VULKAN_FSR_ENABLED == 1)
2537 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2538 if (extFunctions.vkCmdSetFragmentShadingRateKHR) {
2539 const VkExtent2D fragmentSize = { renderCmd.fragmentSize.width, renderCmd.fragmentSize.height };
2540 const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {
2541 (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op1,
2542 (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op2,
2543 };
2544
2545 extFunctions.vkCmdSetFragmentShadingRateKHR(cmdBuf.commandBuffer, // commandBuffer
2546 &fragmentSize, // pFragmentSize
2547 combinerOps); // combinerOps
2548 }
2549 #endif
2550 }
2551
2552 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2553 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2554 const StateCache& stateCache)
2555 {
2556 if (stateCache.backendNode) {
2557 const RenderBackendRecordingStateVk recordingState = {
2558 {},
2559 cmdBuf.commandBuffer, // commandBuffer
2560 stateCache.lowLevelRenderPassData.renderPass, // renderPass
2561 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
2562 stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2563 stateCache.lowLevelRenderPassData.subpassIndex, // subpassIndex
2564 stateCache.pipelineLayout, // pipelineLayout
2565 };
2566 const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2567 stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2568 }
2569 }
2570
2571 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2572 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2573 {
2574 PLUGIN_ASSERT_MSG(false, "not implemented");
2575
2576 const VkPipelineStageFlagBits pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2577 const uint32_t queryIndex = renderCmd.queryIndex;
2578 VkQueryPool queryPool = VK_NULL_HANDLE;
2579
2580 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2581 queryPool, // queryPool
2582 queryIndex, // firstQuery
2583 1); // queryCount
2584
2585 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2586 pipelineStageFlagBits, // pipelineStage
2587 queryPool, // queryPool
2588 queryIndex); // query
2589 }
2590
2591 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf, const uint32_t cmdBufferIdx)
2592 {
2593 for (auto& presRef : presentationData_.infos) {
2594 if (presRef.renderNodeCommandListIndex != cmdBufferIdx) {
2595 continue;
2596 }
2597
2598 PLUGIN_ASSERT(presRef.presentationLayoutChangeNeeded);
2599 PLUGIN_ASSERT(presRef.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
2600
2601 const GpuResourceState& state = presRef.renderGraphProcessedState;
2602 const VkAccessFlags srcAccessMask = (VkAccessFlags)state.accessFlags;
2603 const VkAccessFlags dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2604 const VkPipelineStageFlags srcStageMask = ((VkPipelineStageFlags)state.pipelineStageFlags) |
2605 (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2606 const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2607 const VkImageLayout oldLayout = (VkImageLayout)presRef.imageLayout;
2608 const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2609 // NOTE: queue is not currently checked (should be in the same queue as last time used)
2610 constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2611 constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2612 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2613 constexpr VkImageSubresourceRange imageSubresourceRange {
2614 VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2615 0, // baseMipLevel
2616 1, // levelCount
2617 0, // baseArrayLayer
2618 1, // layerCount
2619 };
2620
2621 const VkImageMemoryBarrier imageMemoryBarrier {
2622 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2623 nullptr, // pNext
2624 srcAccessMask, // srcAccessMask
2625 dstAccessMask, // dstAccessMask
2626 oldLayout, // oldLayout
2627 newLayout, // newLayout
2628 srcQueueFamilyIndex, // srcQueueFamilyIndex
2629 dstQueueFamilyIndex, // dstQueueFamilyIndex
2630 presRef.swapchainImage, // image
2631 imageSubresourceRange, // subresourceRange
2632 };
2633
2634 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2635 srcStageMask, // srcStageMask
2636 dstStageMask, // dstStageMask
2637 dependencyFlags, // dependencyFlags
2638 0, // memoryBarrierCount
2639 nullptr, // pMemoryBarriers
2640 0, // bufferMemoryBarrierCount
2641 nullptr, // pBufferMemoryBarriers
2642 1, // imageMemoryBarrierCount
2643 &imageMemoryBarrier); // pImageMemoryBarriers
2644
2645 presRef.presentationLayoutChangeNeeded = false;
2646 presRef.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC;
2647 }
2648 }
2649
2650 #if (RENDER_PERF_ENABLED == 1)
2651
2652 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2653 {
2654 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2655 const string_view& debugName = renderCommandContext.debugName;
2656 if (timers_.count(debugName) == 0) { // new timers
2657 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2658 PerfDataSet& perfDataSet = timers_[debugName];
2659 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2660 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2661 constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2662 perfDataSet.gpuBufferOffset = (uint32_t)timers_.size() * singleQueryByteSize;
2663 #else
2664 timers_.insert({ debugName, {} });
2665 #endif
2666 }
2667 }
2668
2669 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2670 perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2671 perfGpuTimerData_.currentOffset =
2672 (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2673 #endif
2674 }
2675
2676 void RenderBackendVk::EndFrameTimers()
2677 {
2678 int64_t fullGpuTime = 0;
2679 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2680 // already in micros
2681 fullGpuTime = perfGpuTimerData_.fullGpuCounter;
2682 perfGpuTimerData_.fullGpuCounter = 0;
2683
2684 perfGpuTimerData_.gpuBuffer->Unmap();
2685 #endif
2686 if (IPerformanceDataManagerFactory* globalPerfData =
2687 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2688 globalPerfData) {
2689 IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2690 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2691 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2692 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2693 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2694 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2695 perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2696 }
2697 }
2698
2699 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2700 const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits, const StateCache& stateCache)
2701 {
2702 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2703 if (stateCache.secondaryCommandBuffer) {
2704 return; // cannot be called inside render pass (e.g. with secondary command buffers)
2705 }
2706 PLUGIN_ASSERT(timers_.count(name) == 1);
2707 const PerfDataSet* perfDataSet = &timers_[name];
2708 if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2709 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2710 if (platData.queryPool) {
2711 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2712 platData.queryPool, // queryPool
2713 queryIndex, // firstQuery
2714 1); // queryCount
2715
2716 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2717 stageFlagBits, // pipelineStage,
2718 platData.queryPool, // queryPool,
2719 queryIndex); // query
2720 }
2721 }
2722 #endif
2723 }
2724
2725 namespace {
2726 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2727 {
2728 perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount);
2729 perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount);
2730 perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount);
2731 perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount);
2732 perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount);
2733 perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount);
2734 perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount);
2735 perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount);
2736 perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount);
2737 perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount);
2738 }
2739 } // namespace
2740
2741 void RenderBackendVk::CopyPerfTimeStamp(
2742 const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& stateCache)
2743 {
2744 PLUGIN_ASSERT(timers_.count(name) == 1);
2745 PerfDataSet* const perfDataSet = &timers_[name];
2746
2747 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2748 // take data from earlier queries to cpu
2749 // and copy in from query to gpu buffer
2750 const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
2751 int64_t gpuMicroSeconds = 0;
2752 {
2753 auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
2754 auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
2755
2756 const uint64_t startStamp = *currentData;
2757 const uint64_t endStamp = *(currentData + 1);
2758
2759 const double timestampPeriod =
2760 static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
2761 .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
2762 constexpr int64_t nanosToMicrosDivisor { 1000 };
2763 gpuMicroSeconds = static_cast<int64_t>((endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
2764 constexpr int64_t maxValidMicroSecondValue { 4294967295 };
2765 if (gpuMicroSeconds > maxValidMicroSecondValue) {
2766 gpuMicroSeconds = 0;
2767 }
2768 perfGpuTimerData_.fullGpuCounter += gpuMicroSeconds;
2769 }
2770 #endif
2771 const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
2772
2773 if (IPerformanceDataManagerFactory* globalPerfData =
2774 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2775 globalPerfData) {
2776 IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2777
2778 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2779 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2780 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2781
2782 // cannot be called inside render pass (e.g. with secondary command buffers)
2783 if (!stateCache.secondaryCommandBuffer) {
2784 if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2785 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2786
2787 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
2788 PLUGIN_ASSERT(gpuBuffer);
2789 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
2790
2791 constexpr uint32_t queryCount = 2;
2792 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
2793 constexpr VkQueryResultFlags queryResultFlags =
2794 VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
2795
2796 if (platData.queryPool) {
2797 vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
2798 platData.queryPool, // queryPool
2799 0, // firstQuery
2800 queryCount, // queryCount
2801 platBuffer.buffer, // dstBuffer
2802 currentFrameByteOffset, // dstOffset
2803 queryStride, // stride
2804 queryResultFlags); // flags
2805 }
2806 }
2807 }
2808 #endif
2809 UpdatePerfCounters(*perfData, name, perfDataSet->perfCounters);
2810 perfDataSet->perfCounters = {}; // reset perf counters
2811 }
2812 }
2813
2814 #endif
2815 RENDER_END_NAMESPACE()
2816