1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_graph.h"
17 
18 #include <cinttypes>
19 
20 #include <base/containers/array_view.h>
21 #include <base/containers/fixed_string.h>
22 #include <base/math/mathf.h>
23 #include <render/namespace.h>
24 
25 #include "device/gpu_resource_cache.h"
26 #include "device/gpu_resource_handle_util.h"
27 #include "device/gpu_resource_manager.h"
28 #include "nodecontext/render_barrier_list.h"
29 #include "nodecontext/render_command_list.h"
30 #include "nodecontext/render_node_graph_node_store.h"
31 #include "util/log.h"
32 
33 using namespace BASE_NS;
34 
35 RENDER_BEGIN_NAMESPACE()
36 namespace {
37 constexpr uint32_t INVALID_TRACK_IDX { ~0u };
38 
39 #if (RENDER_DEV_ENABLED == 1)
40 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_PRINT = false;
41 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS = false;
42 constexpr const bool CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES = false;
43 
DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)44 void DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)
45 {
46     switch (rc.type) {
47         case RenderCommandType::BARRIER_POINT: {
48             PLUGIN_LOG_I("rc: BarrierPoint");
49             break;
50         }
51         case RenderCommandType::DRAW: {
52             PLUGIN_LOG_I("rc: Draw");
53             break;
54         }
55         case RenderCommandType::DRAW_INDIRECT: {
56             PLUGIN_LOG_I("rc: DrawIndirect");
57             break;
58         }
59         case RenderCommandType::DISPATCH: {
60             PLUGIN_LOG_I("rc: Dispatch");
61             break;
62         }
63         case RenderCommandType::DISPATCH_INDIRECT: {
64             PLUGIN_LOG_I("rc: DispatchIndirect");
65             break;
66         }
67         case RenderCommandType::BIND_PIPELINE: {
68             PLUGIN_LOG_I("rc: BindPipeline");
69             break;
70         }
71         case RenderCommandType::BEGIN_RENDER_PASS: {
72             PLUGIN_LOG_I("rc: BeginRenderPass");
73             if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
74                 const auto& beginRenderPass = *static_cast<RenderCommandBeginRenderPass*>(rc.rc);
75                 for (uint32_t idx = 0; idx < beginRenderPass.renderPassDesc.attachmentCount; ++idx) {
76                     const RenderHandle handle = beginRenderPass.renderPassDesc.attachmentHandles[idx];
77                     PLUGIN_LOG_I("    attachment idx: %u name: %s", idx, aMgr.GetName(handle).c_str());
78                 }
79                 PLUGIN_LOG_I("    subpass count: %u, subpass start idx: %u",
80                     (uint32_t)beginRenderPass.renderPassDesc.subpassCount, beginRenderPass.subpassStartIndex);
81             }
82             break;
83         }
84         case RenderCommandType::NEXT_SUBPASS: {
85             PLUGIN_LOG_I("rc: NextSubpass");
86             break;
87         }
88         case RenderCommandType::END_RENDER_PASS: {
89             PLUGIN_LOG_I("rc: EndRenderPass");
90             break;
91         }
92         case RenderCommandType::BIND_VERTEX_BUFFERS: {
93             PLUGIN_LOG_I("rc: BindVertexBuffers");
94             break;
95         }
96         case RenderCommandType::BIND_INDEX_BUFFER: {
97             PLUGIN_LOG_I("rc: BindIndexBuffer");
98             break;
99         }
100         case RenderCommandType::COPY_BUFFER: {
101             PLUGIN_LOG_I("rc: CopyBuffer");
102             break;
103         }
104         case RenderCommandType::COPY_BUFFER_IMAGE: {
105             PLUGIN_LOG_I("rc: CopyBufferImage");
106             break;
107         }
108         case RenderCommandType::BIND_DESCRIPTOR_SETS: {
109             PLUGIN_LOG_I("rc: BindDescriptorSets");
110             break;
111         }
112         case RenderCommandType::PUSH_CONSTANT: {
113             PLUGIN_LOG_I("rc: PushConstant");
114             break;
115         }
116         case RenderCommandType::BLIT_IMAGE: {
117             PLUGIN_LOG_I("rc: BlitImage");
118             break;
119         }
120             // dynamic states
121         case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
122             PLUGIN_LOG_I("rc: DynamicStateViewport");
123             break;
124         }
125         case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
126             PLUGIN_LOG_I("rc: DynamicStateScissor");
127             break;
128         }
129         case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
130             PLUGIN_LOG_I("rc: DynamicStateLineWidth");
131             break;
132         }
133         case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
134             PLUGIN_LOG_I("rc: DynamicStateDepthBias");
135             break;
136         }
137         case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
138             PLUGIN_LOG_I("rc: DynamicStateBlendConstants");
139             break;
140         }
141         case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
142             PLUGIN_LOG_I("rc: DynamicStateDepthBounds");
143             break;
144         }
145         case RenderCommandType::DYNAMIC_STATE_STENCIL: {
146             PLUGIN_LOG_I("rc: DynamicStateStencil");
147             break;
148         }
149         case RenderCommandType::WRITE_TIMESTAMP: {
150             PLUGIN_LOG_I("rc: WriteTimestamp");
151             break;
152         }
153         case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE: {
154             PLUGIN_LOG_I("rc: GpuQueueTransferRelease");
155             break;
156         }
157         case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE: {
158             PLUGIN_LOG_I("rc: GpuQueueTransferAcquire");
159             break;
160         }
161         case RenderCommandType::UNDEFINED:
162         default: {
163             PLUGIN_ASSERT(false && "non-valid render command");
164             break;
165         }
166     }
167 }
168 
DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)169 void DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)
170 {
171     PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
172     for (const auto& ref : combinedBarriers) {
173         const RenderHandleType type = RenderHandleUtil::GetHandleType(ref.resourceHandle);
174         if (type == RenderHandleType::GPU_BUFFER) {
175             PLUGIN_LOG_I("barrier buffer    :: handle:0x%" PRIx64 " name:%s, src_stage:%u dst_stage:%u",
176                 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(), ref.src.pipelineStageFlags,
177                 ref.dst.pipelineStageFlags);
178         } else {
179             PLUGIN_ASSERT(type == RenderHandleType::GPU_IMAGE);
180             PLUGIN_LOG_I("barrier image     :: handle:0x%" PRIx64
181                          " name:%s, src_stage:%u dst_stage:%u, src_layout:%u dst_layout:%u",
182                 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(), ref.src.pipelineStageFlags,
183                 ref.dst.pipelineStageFlags, ref.src.optionalImageLayout, ref.dst.optionalImageLayout);
184         }
185     }
186 }
187 
DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)188 void DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)
189 {
190     PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
191     for (uint32_t idx = 0; idx < rc.renderPassDesc.attachmentCount; ++idx) {
192         const auto handle = rc.renderPassDesc.attachmentHandles[idx];
193         const auto srcLayout = rc.imageLayouts.attachmentInitialLayouts[idx];
194         const auto dstLayout = rc.imageLayouts.attachmentFinalLayouts[idx];
195         PLUGIN_LOG_I("render_pass image :: handle:0x%" PRIx64 " name:%s, src_layout:%u dst_layout:%u (patched later)",
196             handle.id, gpuResourceMgr.GetName(handle).c_str(), srcLayout, dstLayout);
197     }
198 }
199 
DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)200 void DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)
201 {
202     PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
203     const EngineResourceHandle gpuHandle = gpuResourceMgr.GetGpuHandle(resState.resource.handle);
204     PLUGIN_LOG_I("image_state   :: handle:0x%" PRIx64 " name:%s, layout:%u, index:%u, gen:%u, gpu_gen:%u",
205         resState.resource.handle.id, gpuResourceMgr.GetName(resState.resource.handle).c_str(),
206         resState.resource.imageLayout, RenderHandleUtil::GetIndexPart(resState.resource.handle),
207         RenderHandleUtil::GetGenerationIndexPart(resState.resource.handle),
208         RenderHandleUtil::GetGenerationIndexPart(gpuHandle));
209     // one could fetch and print vulkan handle here as well e.g.
210     // 1. const GpuImagePlatformDataVk& plat =
211     // 2. (const GpuImagePlatformDataVk&)gpuResourceMgr.GetImage(ref.first)->GetBasePlatformData()
212     // 3. PLUGIN_LOG_I("end_frame image   :: vk_handle:0x%" PRIx64, VulkanHandleCast<uint64_t>(plat.image))
213 }
214 #endif // RENDER_DEV_ENABLED
215 
216 static constexpr uint32_t WRITE_ACCESS_FLAGS = CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
217                                                CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
218                                                CORE_ACCESS_TRANSFER_WRITE_BIT | CORE_ACCESS_HOST_WRITE_BIT |
219                                                CORE_ACCESS_MEMORY_WRITE_BIT;
220 
PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout, RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)221 void PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout,
222     RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)
223 {
224     const uint32_t attachmentCount = beginRenderPass.renderPassDesc.attachmentCount;
225     for (uint32_t attachmentIdx = 0; attachmentIdx < attachmentCount; ++attachmentIdx) {
226         if (beginRenderPass.renderPassDesc.attachmentHandles[attachmentIdx].id == handle.id) {
227             beginRenderPass.imageLayouts.attachmentFinalLayouts[attachmentIdx] = imageLayout;
228             storeState.resource.imageLayout = imageLayout;
229         }
230     }
231 };
232 
UpdateMultiRenderCommandListRenderPasses(RenderGraph::MultiRenderPassStore& store)233 void UpdateMultiRenderCommandListRenderPasses(RenderGraph::MultiRenderPassStore& store)
234 {
235     const uint32_t renderPassCount = (uint32_t)store.renderPasses.size();
236     PLUGIN_ASSERT(renderPassCount > 1);
237 
238     RenderCommandBeginRenderPass* firstRenderPass = store.renderPasses[0];
239     PLUGIN_ASSERT(firstRenderPass);
240     PLUGIN_ASSERT(firstRenderPass->subpasses.size() >= renderPassCount);
241     const RenderCommandBeginRenderPass* lastRenderPass = store.renderPasses[renderPassCount - 1];
242     PLUGIN_ASSERT(lastRenderPass);
243 
244     const uint32_t attachmentCount = firstRenderPass->renderPassDesc.attachmentCount;
245 
246     // take attachment loads from the first one, and stores from the last one
247     // take initial layouts from the first one, and final layouts from the last one (could take the next layout)
248     // initial store the correct render pass description to first render pass and then copy to others
249     // resource states are copied from valid subpasses to another render command list subpasses
250     for (uint32_t fromRpIdx = 0; fromRpIdx < renderPassCount; ++fromRpIdx) {
251         const auto& fromRenderPass = *(store.renderPasses[fromRpIdx]);
252         const uint32_t fromRpSubpassStartIndex = fromRenderPass.subpassStartIndex;
253         const auto& fromRpSubpassResourceStates = fromRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
254         for (uint32_t toRpIdx = 0; toRpIdx < renderPassCount; ++toRpIdx) {
255             if (fromRpIdx != toRpIdx) {
256                 auto& toRenderPass = *(store.renderPasses[toRpIdx]);
257                 auto& toRpSubpassResourceStates = toRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
258                 for (uint32_t idx = 0; idx < attachmentCount; ++idx) {
259                     toRpSubpassResourceStates.states[idx] = fromRpSubpassResourceStates.states[idx];
260                     toRpSubpassResourceStates.layouts[idx] = fromRpSubpassResourceStates.layouts[idx];
261                 }
262             }
263         }
264     }
265 
266     for (uint32_t idx = 0; idx < firstRenderPass->renderPassDesc.attachmentCount; ++idx) {
267         firstRenderPass->renderPassDesc.attachments[idx].storeOp =
268             lastRenderPass->renderPassDesc.attachments[idx].storeOp;
269         firstRenderPass->renderPassDesc.attachments[idx].stencilStoreOp =
270             lastRenderPass->renderPassDesc.attachments[idx].stencilStoreOp;
271 
272         firstRenderPass->imageLayouts.attachmentFinalLayouts[idx] =
273             lastRenderPass->imageLayouts.attachmentFinalLayouts[idx];
274     }
275 
276     // copy subpasses to first
277     for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
278         firstRenderPass->subpasses[idx] = store.renderPasses[idx]->subpasses[idx];
279     }
280 
281     // copy from first to following render passes
282     for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
283         // subpass start index is the only changing variables
284         const uint32_t subpassStartIndex = store.renderPasses[idx]->subpassStartIndex;
285         store.renderPasses[idx]->renderPassDesc = firstRenderPass->renderPassDesc;
286         store.renderPasses[idx]->subpassStartIndex = subpassStartIndex;
287 
288         // image layouts needs to match
289         store.renderPasses[idx]->imageLayouts = firstRenderPass->imageLayouts;
290         PLUGIN_ASSERT(store.renderPasses[idx]->subpasses.size() >= renderPassCount);
291         // copy all subpasses
292         if (!CloneData(store.renderPasses[idx]->subpasses.data(), sizeof(RenderPassSubpassDesc) * renderPassCount,
293                 firstRenderPass->subpasses.data(), sizeof(RenderPassSubpassDesc) * renderPassCount)) {
294             PLUGIN_LOG_E("Copying of renderPasses failed.");
295         }
296         // copy input resource state
297         if (!CloneData(store.renderPasses[idx]->inputResourceStates.states, sizeof(GpuResourceState) * attachmentCount,
298                 firstRenderPass->inputResourceStates.states, sizeof(GpuResourceState) * attachmentCount)) {
299             PLUGIN_LOG_E("Copying of renderPasses failed.");
300         }
301         // NOTE: subpassResourceStates are not copied to different render passes
302     }
303 
304 #if (RENDER_VULKAN_COMBINE_MULTI_COMMAND_LIST_MSAA_SUBPASSES_ENABLED == 1)
305     // copy the final layouts and resolves to first render pass
306     const uint32_t finalSubpassIdx = renderPassCount - 1U;
307     if ((renderPassCount > 1U) && (firstRenderPass->subpasses[finalSubpassIdx].resolveAttachmentCount > 0U)) {
308         firstRenderPass->renderPassDesc.subpassCount = 1U;
309         firstRenderPass->subpasses = { firstRenderPass->subpasses.data(), 1U };
310         firstRenderPass->subpassResourceStates = { firstRenderPass->subpassResourceStates.data(), 1U };
311         // copy resolve attachments from the final subpass
312         auto& firstSubpass = firstRenderPass->subpasses[0U];
313         const auto& finalSubpass = store.renderPasses[finalSubpassIdx]->subpasses[finalSubpassIdx];
314         firstSubpass.resolveAttachmentCount = finalSubpass.resolveAttachmentCount;
315         firstSubpass.depthResolveAttachmentCount = finalSubpass.depthResolveAttachmentCount;
316         firstSubpass.depthResolveAttachmentIndex = finalSubpass.depthResolveAttachmentIndex;
317         firstSubpass.depthResolveModeFlagBit = finalSubpass.depthResolveModeFlagBit;
318         CloneData(firstSubpass.resolveAttachmentIndices, sizeof(firstSubpass.resolveAttachmentIndices),
319             finalSubpass.resolveAttachmentIndices, sizeof(uint32_t) * firstSubpass.resolveAttachmentCount);
320         // layouts for resolve attachments
321         const auto& finalSubpassResourceStates =
322             store.renderPasses[finalSubpassIdx]->subpassResourceStates[finalSubpassIdx];
323         const uint32_t resolveAttachmentCount = firstSubpass.resolveAttachmentCount;
324         for (uint32_t resIdx = 0U; resIdx < resolveAttachmentCount; ++resIdx) {
325             const uint32_t resAttIdx = firstSubpass.resolveAttachmentIndices[resIdx];
326             firstRenderPass->subpassResourceStates[0U].layouts[resAttIdx] =
327                 finalSubpassResourceStates.layouts[resAttIdx];
328             firstRenderPass->subpassResourceStates[0U].states[resAttIdx] = finalSubpassResourceStates.states[resAttIdx];
329         }
330         if ((firstSubpass.depthResolveAttachmentCount > 0U) &&
331             (firstSubpass.depthResolveAttachmentIndex < PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT)) {
332             const uint32_t resAttIdx = firstSubpass.resolveAttachmentIndices[firstSubpass.depthResolveAttachmentIndex];
333             firstRenderPass->subpassResourceStates[0U].layouts[resAttIdx] =
334                 finalSubpassResourceStates.layouts[resAttIdx];
335             firstRenderPass->subpassResourceStates[0U].states[resAttIdx] = finalSubpassResourceStates.states[resAttIdx];
336         }
337 
338         // fix render command list indices
339         for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
340             store.renderPasses[idx]->renderPassDesc = firstRenderPass->renderPassDesc;
341             store.renderPasses[idx]->subpassStartIndex = 0U;
342             store.renderPasses[idx]->subpasses = firstRenderPass->subpasses;
343             store.renderPasses[idx]->subpassResourceStates = firstRenderPass->subpassResourceStates;
344         }
345 #if (RENDER_VALIDATION_ENABLED == 1)
346         PLUGIN_LOG_ONCE_I("combine_multi_command_list_msaa_subpasses_enabled",
347             "RENDER_VALIDATION: Combining multi-commandlist MSAA resolve subpasses");
348 #endif
349     }
350 #endif
351 }
352 
GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)353 ResourceBarrier GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
354 {
355     return {
356         state.accessFlags,
357         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
358         ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
359         res.byteOffset,
360         res.byteSize,
361     };
362 }
363 
GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)364 ResourceBarrier GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)
365 {
366     return {
367         state.accessFlags,
368         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
369         res.imageLayout,
370         0,
371         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
372     };
373 }
374 
GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src, const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)375 ResourceBarrier GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
376     const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
377 {
378     uint32_t mipLevel = 0U;
379     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
380     ImageLayout srcImageLayout = src.imageLayout;
381     if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
382         (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
383         if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
384             mipLevel = dst.mip;
385             mipCount = 1U;
386         } else {
387             mipLevel = src.mip;
388             // all mip levels
389         }
390         PLUGIN_ASSERT(additionalImageState.layouts);
391         srcImageLayout = additionalImageState.layouts[mipLevel];
392     }
393     return {
394         state.accessFlags,
395         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
396         srcImageLayout,
397         0,
398         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
399         { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
400     };
401 }
402 
GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)403 ResourceBarrier GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
404 {
405     return {
406         state.accessFlags,
407         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
408         ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
409         res.byteOffset,
410         res.byteSize,
411     };
412 }
413 
GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)414 ResourceBarrier GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)
415 {
416     return {
417         state.accessFlags,
418         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
419         res.imageLayout,
420         0,
421         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
422     };
423 }
424 
GetDstImageBarrierMips(const GpuResourceState& state, const BindableImage& src, const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)425 ResourceBarrier GetDstImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
426     const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
427 {
428     uint32_t mipLevel = 0U;
429     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
430     ImageLayout dstImageLayout = dst.imageLayout;
431     if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
432         (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
433         if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
434             mipLevel = dst.mip;
435             mipCount = 1U;
436         } else {
437             mipLevel = src.mip;
438             // all mip levels
439         }
440     }
441     return {
442         state.accessFlags,
443         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
444         dstImageLayout,
445         0,
446         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
447         { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
448     };
449 }
450 
ModifyAdditionalImageState( const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)451 void ModifyAdditionalImageState(
452     const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)
453 {
454 #if (RENDER_VALIDATION_ENABLED == 1)
455     // NOTE: should not be called for images without CORE_RESOURCE_HANDLE_ADDITIONAL_STATE
456     PLUGIN_ASSERT(RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle));
457 #endif
458     if (additionalStateRef.layouts) {
459         if ((res.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
460             (res.mip < RenderGraph::MAX_MIP_STATE_COUNT)) {
461             additionalStateRef.layouts[res.mip] = res.imageLayout;
462         } else {
463             // set layout for all mips
464             for (uint32_t idx = 0; idx < RenderGraph::MAX_MIP_STATE_COUNT; ++idx) {
465                 additionalStateRef.layouts[idx] = res.imageLayout;
466             }
467         }
468     } else {
469 #if (RENDER_VALIDATION_ENABLED == 1)
470         PLUGIN_LOG_ONCE_E(to_hex(res.handle.id), "mip layouts missing");
471 #endif
472     }
473 }
474 
GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue, const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)475 CommandBarrier GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue,
476     const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)
477 {
478     return {
479         handle,
480 
481         ResourceBarrier {
482             0,
483             PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
484             srcImageLayout,
485             0,
486             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
487             ImageSubresourceRange {},
488         },
489         srcGpuQueue,
490 
491         ResourceBarrier {
492             0,
493             PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
494             dstImageLayout,
495             0,
496             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
497             ImageSubresourceRange {},
498         },
499         dstGpuQueue,
500     };
501 }
502 
PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData, array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)503 void PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,
504     array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)
505 {
506     for (const auto& transferRef : currNodeGpuResourceTransfers) {
507         PLUGIN_ASSERT(transferRef.acquireNodeIdx < (uint32_t)frameRenderNodeContextData.size());
508 
509         auto& acquireNodeRef = frameRenderNodeContextData[transferRef.acquireNodeIdx];
510         const GpuQueue acquireGpuQueue = acquireNodeRef.renderCommandList->GetGpuQueue();
511         GpuQueue releaseGpuQueue = acquireGpuQueue;
512 
513         if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
514             auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
515             releaseGpuQueue = releaseNodeRef.renderCommandList->GetGpuQueue();
516         }
517 
518         const CommandBarrier transferBarrier = GetQueueOwnershipTransferBarrier(transferRef.handle, releaseGpuQueue,
519             acquireGpuQueue, transferRef.optionalReleaseImageLayout, transferRef.optionalAcquireImageLayout);
520 
521         // release ownership (NOTE: not done for previous frame)
522         if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
523             auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
524             const uint32_t rcIndex = releaseNodeRef.renderCommandList->GetRenderCommandCount() - 1;
525             const RenderCommandWithType& cmdRef = releaseNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
526             PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
527             const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
528             PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
529 
530             const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
531             releaseNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
532 
533             // inform that we are patching valid barriers
534             releaseNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
535         }
536         // acquire ownership
537         {
538             const uint32_t rcIndex = 0;
539             const RenderCommandWithType& cmdRef = acquireNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
540             PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
541             const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
542             PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
543 
544             const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
545             acquireNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
546 
547             // inform that we are patching valid barriers
548             acquireNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
549         }
550     }
551 }
552 
CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers, const uint32_t customBarrierCount, const RenderHandle handle)553 bool CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers,
554     const uint32_t customBarrierCount, const RenderHandle handle)
555 {
556     bool needsBarrier = RenderHandleUtil::IsDynamicResource(handle);
557     if ((customBarrierCount > 0) && needsBarrier) {
558         needsBarrier = (handledCustomBarriers.count(handle) > 0) ? false : true;
559     }
560     return needsBarrier;
561 }
562 } // namespace
563 
RenderGraph(GpuResourceManager& gpuResourceMgr)564 RenderGraph::RenderGraph(GpuResourceManager& gpuResourceMgr) : gpuResourceMgr_(gpuResourceMgr) {}
565 
BeginFrame()566 void RenderGraph::BeginFrame()
567 {
568     stateCache_.multiRenderPassStore.renderPasses.clear();
569     stateCache_.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
570     stateCache_.multiRenderPassStore.firstBarrierPointIndex = ~0u;
571     stateCache_.multiRenderPassStore.supportOpen = false;
572     stateCache_.nodeCounter = 0u;
573     stateCache_.checkForBackbufferDependency = false;
574     stateCache_.usesSwapchainImage = false;
575 }
576 
ProcessRenderNodeGraph( const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)577 void RenderGraph::ProcessRenderNodeGraph(
578     const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
579 {
580     stateCache_.checkForBackbufferDependency = checkBackbufferDependancy;
581 
582     // NOTE: separate gpu buffers and gpu images due to larger structs, layers, mips in images
583     // all levels of mips and layers are not currently tracked -> needs more fine grained modifications
584     // handles:
585     // gpu images in descriptor sets, render passes, blits, and custom barriers
586     // gpu buffers in descriptor sets, and custom barriers
587 
588     {
589         // remove resources that will not be tracked anymore and release available slots
590         const GpuResourceManager::StateDestroyConsumeStruct stateResetData = gpuResourceMgr_.ConsumeStateDestroyData();
591         for (const auto& handle : stateResetData.resources) {
592             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
593             const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
594             if ((handleType == RenderHandleType::GPU_IMAGE) &&
595                 (arrayIndex < static_cast<uint32_t>(gpuImageDataIndices_.size()))) {
596                 if (const uint32_t dataIdx = gpuImageDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
597                     PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuImageTracking_.size()));
598                     gpuImageTracking_[dataIdx] = {}; // reset
599                     gpuImageAvailableIndices_.push_back(dataIdx);
600                 }
601                 gpuImageDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
602             } else if (arrayIndex < static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
603                 if (const uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
604                     PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuBufferTracking_.size()));
605                     gpuBufferTracking_[dataIdx] = {}; // reset
606                     gpuBufferAvailableIndices_.push_back(dataIdx);
607                 }
608                 gpuBufferDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
609             }
610         }
611     }
612 
613     gpuBufferDataIndices_.resize(gpuResourceMgr_.GetBufferHandleCount(), INVALID_TRACK_IDX);
614     gpuImageDataIndices_.resize(gpuResourceMgr_.GetImageHandleCount(), INVALID_TRACK_IDX);
615 
616 #if (RENDER_DEV_ENABLED == 1)
617     if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT || CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES ||
618                   CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
619         static uint64_t debugFrame = 0;
620         debugFrame++;
621         PLUGIN_LOG_I("START RENDER GRAPH, FRAME %" PRIu64, debugFrame);
622     }
623 #endif
624 
625     // need to store some of the resource for frame state in undefined state (i.e. reset on frame boundaries)
626     ProcessRenderNodeGraphNodeStores(renderNodeGraphNodeStores, stateCache_);
627 
628     // store final state for next frame
629     StoreFinalBufferState();
630     StoreFinalImageState(); // processes gpuImageBackbufferState_ as well
631 }
632 
633 RenderGraph::SwapchainStates RenderGraph::GetSwapchainResourceStates() const
634 {
635     return swapchainStates_;
636 }
637 
638 void RenderGraph::ProcessRenderNodeGraphNodeStores(
639     const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores, StateCache& stateCache)
640 {
641     for (RenderNodeGraphNodeStore* graphStore : renderNodeGraphNodeStores) {
642         PLUGIN_ASSERT(graphStore);
643         if (!graphStore) {
644             continue;
645         }
646 
647         for (uint32_t nodeIdx = 0; nodeIdx < (uint32_t)graphStore->renderNodeContextData.size(); ++nodeIdx) {
648             auto& ref = graphStore->renderNodeContextData[nodeIdx];
649             ref.submitInfo.waitForSwapchainAcquireSignal = false; // reset
650             stateCache.usesSwapchainImage = false;                // reset
651 
652 #if (RENDER_DEV_ENABLED == 1)
653             if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
654                 PLUGIN_LOG_I("FULL NODENAME %s", graphStore->renderNodeData[nodeIdx].fullName.data());
655             }
656 #endif
657 
658             if (stateCache.multiRenderPassStore.supportOpen &&
659                 (stateCache.multiRenderPassStore.renderPasses.size() == 0)) {
660                 PLUGIN_LOG_E("invalid multi render node render pass subpass stitching");
661                 // NOTE: add more error handling and invalidate render command lists
662             }
663             stateCache.multiRenderPassStore.supportOpen = ref.renderCommandList->HasMultiRenderCommandListSubpasses();
664             array_view<const RenderCommandWithType> cmdListRef = ref.renderCommandList->GetRenderCommands();
665             // go through commands that affect or need transitions and barriers
666             ProcessRenderNodeCommands(cmdListRef, nodeIdx, ref, stateCache);
667 
668             // needs backbuffer/swapchain wait
669             if (stateCache.usesSwapchainImage) {
670                 ref.submitInfo.waitForSwapchainAcquireSignal = true;
671             }
672 
673             // patch gpu resource queue transfers
674             if (!currNodeGpuResourceTransfers_.empty()) {
675                 PatchGpuResourceQueueTransfers(graphStore->renderNodeContextData, currNodeGpuResourceTransfers_);
676                 // clear for next use
677                 currNodeGpuResourceTransfers_.clear();
678             }
679 
680             stateCache_.nodeCounter++;
681         }
682     }
683 }
684 
ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef, const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)685 void RenderGraph::ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef,
686     const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)
687 {
688     for (uint32_t listIdx = 0; listIdx < (uint32_t)cmdListRef.size(); ++listIdx) {
689         auto& cmdRef = cmdListRef[listIdx];
690 
691 #if (RENDER_DEV_ENABLED == 1)
692         if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
693             DebugPrintCommandListCommand(cmdRef, gpuResourceMgr_);
694         }
695 #endif
696 
697         // most of the commands are handled within BarrierPoint
698         switch (cmdRef.type) {
699             case RenderCommandType::BARRIER_POINT:
700                 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc), stateCache);
701                 break;
702 
703             case RenderCommandType::BEGIN_RENDER_PASS:
704                 RenderCommand(
705                     nodeIdx, listIdx, ref, *static_cast<RenderCommandBeginRenderPass*>(cmdRef.rc), stateCache);
706                 break;
707 
708             case RenderCommandType::END_RENDER_PASS:
709                 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandEndRenderPass*>(cmdRef.rc), stateCache);
710                 break;
711 
712             case RenderCommandType::NEXT_SUBPASS:
713             case RenderCommandType::DRAW:
714             case RenderCommandType::DRAW_INDIRECT:
715             case RenderCommandType::DISPATCH:
716             case RenderCommandType::DISPATCH_INDIRECT:
717             case RenderCommandType::BIND_PIPELINE:
718             case RenderCommandType::BIND_VERTEX_BUFFERS:
719             case RenderCommandType::BIND_INDEX_BUFFER:
720             case RenderCommandType::COPY_BUFFER:
721             case RenderCommandType::COPY_BUFFER_IMAGE:
722             case RenderCommandType::COPY_IMAGE:
723             case RenderCommandType::BIND_DESCRIPTOR_SETS:
724             case RenderCommandType::PUSH_CONSTANT:
725             case RenderCommandType::BLIT_IMAGE:
726             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE:
727             case RenderCommandType::CLEAR_COLOR_IMAGE:
728             case RenderCommandType::DYNAMIC_STATE_VIEWPORT:
729             case RenderCommandType::DYNAMIC_STATE_SCISSOR:
730             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH:
731             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS:
732             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS:
733             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS:
734             case RenderCommandType::DYNAMIC_STATE_STENCIL:
735             case RenderCommandType::WRITE_TIMESTAMP:
736             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
737             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
738             case RenderCommandType::UNDEFINED:
739             default: {
740                 // nop
741                 break;
742             }
743         }
744     } // end command for
745 }
746 
StoreFinalBufferState()747 void RenderGraph::StoreFinalBufferState()
748 {
749     for (auto& ref : gpuBufferTracking_) {
750         if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
751             ref = {};
752             continue;
753         }
754         if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
755             // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
756             const RenderHandle handle = ref.resource.handle;
757             ref = {};
758             ref.resource.handle = handle;
759         }
760         // need to reset per frame variables for all buffers (so we do not try to patch or debug from previous
761         // frames)
762         ref.prevRc = {};
763         ref.prevRenderNodeIndex = { ~0u };
764     }
765 }
766 
StoreFinalImageState()767 void RenderGraph::StoreFinalImageState()
768 {
769     swapchainStates_ = {}; // reset
770 
771 #if (RENDER_DEV_ENABLED == 1)
772     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
773         PLUGIN_LOG_I("end_frame image_state:");
774     }
775 #endif
776     for (auto& ref : gpuImageTracking_) {
777         // if resource is not dynamic, we do not track and care
778         if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
779             ref = {};
780             continue;
781         }
782         // handle automatic presentation layout
783         if (stateCache_.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(ref.resource.handle)) {
784             if (ref.prevRc.type == RenderCommandType::BEGIN_RENDER_PASS) {
785                 RenderCommandBeginRenderPass& beginRenderPass =
786                     *static_cast<RenderCommandBeginRenderPass*>(ref.prevRc.rc);
787                 PatchRenderPassFinalLayout(
788                     ref.resource.handle, ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC, beginRenderPass, ref);
789             }
790             // NOTE: currently we handle automatic presentation layout in vulkan backend if not in render pass
791             // store final state for backbuffer
792             // currently we only swapchains if they are really in use in this frame
793             const uint32_t flags = ref.state.accessFlags | ref.state.shaderStageFlags | ref.state.pipelineStageFlags;
794             if (flags != 0) {
795                 swapchainStates_.swapchains.push_back({ ref.resource.handle, ref.state, ref.resource.imageLayout });
796             }
797         }
798 #if (RENDER_DEV_ENABLED == 1)
799         // print before reset for next frame
800         if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
801             DebugPrintImageState(gpuResourceMgr_, ref);
802         }
803 #endif
804         // shallow resources are not tracked
805         // they are always in undefined state in the beging of the frame
806         if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
807             const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(ref.resource.handle);
808             // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
809             const RenderHandle handle = ref.resource.handle;
810             ref = {};
811             ref.resource.handle = handle;
812             if (addMips) {
813                 PLUGIN_ASSERT(!ref.additionalState.layouts);
814                 ref.additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
815             }
816         }
817 
818         // need to reset per frame variables for all images (so we do not try to patch from previous frames)
819         ref.prevRc = {};
820         ref.prevRenderNodeIndex = { ~0u };
821     }
822 }
823 
RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)824 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
825     RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)
826 {
827     // update layouts for attachments to gpu image state
828     BeginRenderPassParameters params { rc, stateCache, { RenderCommandType::BEGIN_RENDER_PASS, &rc } };
829 
830     PLUGIN_ASSERT(rc.renderPassDesc.subpassCount > 0);
831 
832     const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
833     if (hasRenderPassDependency) { // stitch render pass subpasses
834         BeginRenderPassHandleDependency(params, commandListCommandIndex, nodeData);
835     }
836 
837     const GpuQueue gpuQueue = nodeData.renderCommandList->GetGpuQueue();
838 
839     auto finalImageLayouts =
840         array_view(rc.imageLayouts.attachmentFinalLayouts, countof(rc.imageLayouts.attachmentFinalLayouts));
841 
842     BeginRenderPassUpdateImageStates(params, gpuQueue, finalImageLayouts, renderNodeIndex);
843 
844     for (uint32_t subpassIdx = 0; subpassIdx < rc.renderPassDesc.subpassCount; ++subpassIdx) {
845         const auto& subpassRef = rc.subpasses[subpassIdx];
846         const auto& subpassResourceStatesRef = rc.subpassResourceStates[subpassIdx];
847 
848         BeginRenderPassUpdateSubpassImageStates(
849             array_view(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount), rc.renderPassDesc,
850             subpassResourceStatesRef, finalImageLayouts, stateCache);
851 
852         BeginRenderPassUpdateSubpassImageStates(
853             array_view(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount), rc.renderPassDesc,
854             subpassResourceStatesRef, finalImageLayouts, stateCache);
855 
856         BeginRenderPassUpdateSubpassImageStates(
857             array_view(subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount), rc.renderPassDesc,
858             subpassResourceStatesRef, finalImageLayouts, stateCache);
859 
860         if (subpassRef.depthAttachmentCount == 1u) {
861             BeginRenderPassUpdateSubpassImageStates(
862                 array_view(&subpassRef.depthAttachmentIndex, subpassRef.depthAttachmentCount), rc.renderPassDesc,
863                 subpassResourceStatesRef, finalImageLayouts, stateCache);
864             if (subpassRef.depthResolveAttachmentCount == 1) {
865                 BeginRenderPassUpdateSubpassImageStates(
866                     array_view(&subpassRef.depthResolveAttachmentIndex, subpassRef.depthResolveAttachmentCount),
867                     rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts, stateCache);
868             }
869         }
870         if (subpassRef.fragmentShadingRateAttachmentCount == 1u) {
871             BeginRenderPassUpdateSubpassImageStates(array_view(&subpassRef.fragmentShadingRateAttachmentIndex,
872                                                         subpassRef.fragmentShadingRateAttachmentCount),
873                 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts, stateCache);
874         }
875     }
876 
877     if (hasRenderPassDependency) { // stitch render pass subpasses
878         if (rc.subpassStartIndex > 0) {
879             // stitched to behave as a nextSubpass() and not beginRenderPass()
880             rc.beginType = RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN;
881         }
882         const bool finalSubpass = (rc.subpassStartIndex == rc.renderPassDesc.subpassCount - 1);
883         if (finalSubpass) {
884             UpdateMultiRenderCommandListRenderPasses(stateCache.multiRenderPassStore);
885             // multiRenderPassStore cleared in EndRenderPass
886         }
887     }
888 #if (RENDER_DEV_ENABLED == 1)
889     if (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
890         DebugRenderPassLayoutPrint(gpuResourceMgr_, rc);
891     }
892 #endif
893 }
894 
BeginRenderPassHandleDependency( BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)895 void RenderGraph::BeginRenderPassHandleDependency(
896     BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)
897 {
898     params.stateCache.multiRenderPassStore.renderPasses.push_back(&params.rc);
899     // store the first begin render pass
900     params.rpForCmdRef = { RenderCommandType::BEGIN_RENDER_PASS,
901         params.stateCache.multiRenderPassStore.renderPasses[0] };
902 
903     if (params.rc.subpassStartIndex == 0) { // store the first render pass barrier point
904         // barrier point must be previous command
905         PLUGIN_ASSERT(commandListCommandIndex >= 1);
906         const uint32_t prevCommandIndex = commandListCommandIndex - 1;
907 
908         const RenderCommandWithType& barrierPointCmdRef =
909             nodeData.renderCommandList->GetRenderCommands()[prevCommandIndex];
910         PLUGIN_ASSERT(barrierPointCmdRef.type == RenderCommandType::BARRIER_POINT);
911         PLUGIN_ASSERT(static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc));
912 
913         params.stateCache.multiRenderPassStore.firstRenderPassBarrierList = nodeData.renderBarrierList.get();
914         params.stateCache.multiRenderPassStore.firstBarrierPointIndex =
915             static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc)->barrierPointIndex;
916     }
917 }
918 
BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue, array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)919 void RenderGraph::BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue,
920     array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)
921 {
922     auto& initialImageLayouts = params.rc.imageLayouts.attachmentInitialLayouts;
923     const auto& attachmentHandles = params.rc.renderPassDesc.attachmentHandles;
924     auto& attachments = params.rc.renderPassDesc.attachments;
925     auto& attachmentInputResourceStates = params.rc.inputResourceStates;
926 
927     for (uint32_t attachmentIdx = 0; attachmentIdx < params.rc.renderPassDesc.attachmentCount; ++attachmentIdx) {
928         const RenderHandle handle = attachmentHandles[attachmentIdx];
929         // NOTE: invalidate invalid handle commands already in render command list
930         if (!RenderHandleUtil::IsGpuImage(handle)) {
931 #ifdef _DEBUG
932             PLUGIN_LOG_E("invalid handle in render node graph");
933 #endif
934             continue;
935         }
936         auto& stateRef = GetImageResourceStateRef(handle, gpuQueue);
937         ImageLayout imgLayout = stateRef.resource.imageLayout;
938 
939         const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
940         // image layout is undefined if automatic barriers have been disabled
941         if (params.rc.enableAutomaticLayoutChanges) {
942             const RenderPassDesc::AttachmentDesc& attachmentDesc = attachments[attachmentIdx];
943             if (addMips && (attachmentDesc.mipLevel < RenderGraph::MAX_MIP_STATE_COUNT)) {
944                 if (stateRef.additionalState.layouts) {
945                     imgLayout = stateRef.additionalState.layouts[attachmentDesc.mipLevel];
946                 } else {
947 #if (RENDER_VALIDATION_ENABLED == 1)
948                     PLUGIN_LOG_ONCE_E(to_hex(handle.id), "mip layouts missing");
949 #endif
950                 }
951             }
952 
953             initialImageLayouts[attachmentIdx] = imgLayout;
954         }
955         // undefined layout with load_op_load -> we modify to dont_care (and remove validation warning)
956         if ((imgLayout == ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED) &&
957             (attachments[attachmentIdx].loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_LOAD)) {
958             // dont care (user needs to be sure what is wanted, i.e. in first frame one should clear)
959             attachments[attachmentIdx].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
960         }
961         finalImageLayouts[attachmentIdx] = imgLayout;
962         attachmentInputResourceStates.states[attachmentIdx] = stateRef.state;
963         attachmentInputResourceStates.layouts[attachmentIdx] = imgLayout;
964 
965         // store render pass for final layout patching
966         stateRef.prevRc = params.rpForCmdRef;
967         stateRef.prevRenderNodeIndex = renderNodeIndex;
968 
969         // flag for backbuffer use
970         if (params.stateCache.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(handle)) {
971             params.stateCache.usesSwapchainImage = true;
972         }
973     }
974 }
975 
BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices, const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef, array_view<ImageLayout> finalImageLayouts, StateCache& stateCache)976 void RenderGraph::BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,
977     const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef,
978     array_view<ImageLayout> finalImageLayouts, StateCache& stateCache)
979 {
980     for (const uint32_t attachmentIndex : attatchmentIndices) {
981         // NOTE: handle invalid commands already in render command list and invalidate draws etc.
982         PLUGIN_ASSERT(attachmentIndex < renderPassDesc.attachmentCount);
983         const RenderHandle handle = renderPassDesc.attachmentHandles[attachmentIndex];
984         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
985         const GpuResourceState& refState = subpassResourceStatesRef.states[attachmentIndex];
986         const ImageLayout& refImgLayout = subpassResourceStatesRef.layouts[attachmentIndex];
987         // NOTE: we should support non dynamicity and GENERAL
988 
989         finalImageLayouts[attachmentIndex] = refImgLayout;
990         auto& ref = GetImageResourceStateRef(handle, refState.gpuQueue);
991         const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
992 
993         ref.state = refState;
994         ref.resource.handle = handle;
995         ref.resource.imageLayout = refImgLayout;
996         if (addMips) {
997             const RenderPassDesc::AttachmentDesc& attachmentDesc = renderPassDesc.attachments[attachmentIndex];
998             const BindableImage image {
999                 handle,
1000                 attachmentDesc.mipLevel,
1001                 attachmentDesc.layer,
1002                 refImgLayout,
1003                 RenderHandle {},
1004             };
1005             ModifyAdditionalImageState(image, ref.additionalState);
1006         }
1007     }
1008 }
1009 
1010 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1011     const RenderNodeContextData& nodeData, RenderCommandEndRenderPass& rc, StateCache& stateCache)
1012 {
1013     const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
1014     if (hasRenderPassDependency) {
1015         const bool finalSubpass = (rc.subpassCount == (uint32_t)stateCache.multiRenderPassStore.renderPasses.size());
1016         if (finalSubpass) {
1017             if (rc.subpassStartIndex != (rc.subpassCount - 1)) {
1018                 PLUGIN_LOG_E("RenderGraph: error in multi render node render pass subpass ending");
1019                 // NOTE: add more error handling and invalidate render command lists
1020             }
1021             rc.endType = RenderPassEndType::END_RENDER_PASS;
1022             stateCache.multiRenderPassStore.renderPasses.clear();
1023             stateCache.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
1024             stateCache.multiRenderPassStore.firstBarrierPointIndex = ~0u;
1025             stateCache.multiRenderPassStore.supportOpen = false;
1026         } else {
1027             rc.endType = RenderPassEndType::END_SUBPASS;
1028         }
1029     }
1030 }
1031 
1032 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1033     RenderNodeContextData& nodeData, RenderCommandBarrierPoint& rc, StateCache& stateCache)
1034 {
1035     // go through required descriptors for current upcoming event
1036     const auto& customBarrierListRef = nodeData.renderCommandList->GetCustomBarriers();
1037     const auto& cmdListRef = nodeData.renderCommandList->GetRenderCommands();
1038     const auto& allDescriptorSetHandlesForBarriers = nodeData.renderCommandList->GetDescriptorSetHandles();
1039     const auto& nodeDescriptorSetMgrRef = *nodeData.nodeContextDescriptorSetMgr;
1040 
1041     parameterCachePools_.combinedBarriers.clear();
1042     parameterCachePools_.handledCustomBarriers.clear();
1043     ParameterCache parameters { parameterCachePools_.combinedBarriers, parameterCachePools_.handledCustomBarriers,
1044         rc.customBarrierCount, rc.vertexIndexBarrierCount, rc.indirectBufferBarrierCount, renderNodeIndex,
1045         nodeData.renderCommandList->GetGpuQueue(), { RenderCommandType::BARRIER_POINT, &rc }, stateCache };
1046     // first check custom barriers
1047     if (parameters.customBarrierCount > 0) {
1048         HandleCustomBarriers(parameters, rc.customBarrierIndexBegin, customBarrierListRef);
1049     }
1050     // then vertex / index buffer barriers in the barrier point before render pass
1051     if (parameters.vertexInputBarrierCount > 0) {
1052         PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1053         HandleVertexInputBufferBarriers(parameters, rc.vertexIndexBarrierIndexBegin,
1054             nodeData.renderCommandList->GetRenderpassVertexInputBufferBarriers());
1055     }
1056     if (parameters.indirectBufferBarrierCount > 0U) {
1057         PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1058         HandleRenderpassIndirectBufferBarriers(parameters, rc.indirectBufferBarrierIndexBegin,
1059             nodeData.renderCommandList->GetRenderpassIndirectBufferBarriers());
1060     }
1061 
1062     // in barrier point the next render command is known for which the barrier is needed
1063     if (rc.renderCommandType == RenderCommandType::CLEAR_COLOR_IMAGE) {
1064         HandleClearImage(parameters, commandListCommandIndex, cmdListRef);
1065     } else if (rc.renderCommandType == RenderCommandType::BLIT_IMAGE) {
1066         HandleBlitImage(parameters, commandListCommandIndex, cmdListRef);
1067     } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER) {
1068         HandleCopyBuffer(parameters, commandListCommandIndex, cmdListRef);
1069     } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER_IMAGE) {
1070         HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef);
1071     } else if (rc.renderCommandType == RenderCommandType::COPY_IMAGE) {
1072         HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef); // NOTE: handles image to image
1073     } else {                                                                    // descriptor sets
1074         if (rc.renderCommandType == RenderCommandType::DISPATCH_INDIRECT) {
1075             HandleDispatchIndirect(parameters, commandListCommandIndex, cmdListRef);
1076         }
1077         const uint32_t descriptorSetHandleBeginIndex = rc.descriptorSetHandleIndexBegin;
1078         const uint32_t descriptorSetHandleEndIndex = descriptorSetHandleBeginIndex + rc.descriptorSetHandleCount;
1079         const uint32_t descriptorSetHandleMaxIndex =
1080             Math::min(descriptorSetHandleEndIndex, static_cast<uint32_t>(allDescriptorSetHandlesForBarriers.size()));
1081         const auto descriptorSetHandlesForBarriers =
1082             array_view(allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleBeginIndex,
1083                 allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleMaxIndex);
1084         HandleDescriptorSets(parameters, descriptorSetHandlesForBarriers, nodeDescriptorSetMgrRef);
1085     }
1086 
1087     if (!parameters.combinedBarriers.empty()) {
1088         // use first render pass barrier point with following subpasses
1089         // firstRenderPassBarrierPoint is null for the first subpass
1090         const bool renderPassHasDependancy = stateCache.multiRenderPassStore.supportOpen;
1091         if (renderPassHasDependancy && stateCache.multiRenderPassStore.firstRenderPassBarrierList) {
1092             PLUGIN_ASSERT(!stateCache.multiRenderPassStore.renderPasses.empty());
1093             stateCache.multiRenderPassStore.firstRenderPassBarrierList->AddBarriersToBarrierPoint(
1094                 rc.barrierPointIndex, parameters.combinedBarriers);
1095         } else {
1096             nodeData.renderBarrierList->AddBarriersToBarrierPoint(rc.barrierPointIndex, parameters.combinedBarriers);
1097         }
1098     }
1099 #if (RENDER_DEV_ENABLED == 1)
1100     if (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
1101         DebugBarrierPrint(gpuResourceMgr_, parameters.combinedBarriers);
1102     }
1103 #endif
1104 }
1105 
UpdateBufferResourceState( RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)1106 inline void RenderGraph::UpdateBufferResourceState(
1107     RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1108 {
1109     stateRef.resource.handle = cb.resourceHandle;
1110     stateRef.state.shaderStageFlags = 0;
1111     stateRef.state.accessFlags = cb.dst.accessFlags;
1112     stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1113     stateRef.state.gpuQueue = params.gpuQueue;
1114     stateRef.prevRc = params.rcWithType;
1115     stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1116 }
1117 
UpdateImageResourceState( RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)1118 inline void RenderGraph::UpdateImageResourceState(
1119     RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1120 {
1121     stateRef.resource.handle = cb.resourceHandle;
1122     stateRef.state.shaderStageFlags = 0;
1123     stateRef.state.accessFlags = cb.dst.accessFlags;
1124     stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1125     stateRef.state.gpuQueue = params.gpuQueue;
1126     stateRef.prevRc = params.rcWithType;
1127     stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1128 }
1129 
HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin, const array_view<const CommandBarrier>& customBarrierListRef)1130 void RenderGraph::HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1131     const array_view<const CommandBarrier>& customBarrierListRef)
1132 {
1133     params.handledCustomBarriers.reserve(params.customBarrierCount);
1134     PLUGIN_ASSERT(barrierIndexBegin + params.customBarrierCount <= customBarrierListRef.size());
1135     for (auto begin = (customBarrierListRef.begin() + barrierIndexBegin),
1136               end = Math::min(customBarrierListRef.end(), begin + params.customBarrierCount);
1137          begin != end; ++begin) {
1138         // add a copy and modify if needed
1139         auto& cb = params.combinedBarriers.emplace_back(*begin);
1140 
1141         // NOTE: undefined type is for non-resource memory/pipeline barriers
1142         const RenderHandleType type = RenderHandleUtil::GetHandleType(cb.resourceHandle);
1143         const bool isDynamicTrack = RenderHandleUtil::IsDynamicResource(cb.resourceHandle);
1144         PLUGIN_ASSERT((type == RenderHandleType::UNDEFINED) || (type == RenderHandleType::GPU_BUFFER) ||
1145                       (type == RenderHandleType::GPU_IMAGE));
1146         if (type == RenderHandleType::GPU_BUFFER) {
1147             if (isDynamicTrack) {
1148                 auto& stateRef = GetBufferResourceStateRef(cb.resourceHandle, params.gpuQueue);
1149                 UpdateBufferResourceState(stateRef, params, cb);
1150             }
1151             params.handledCustomBarriers[cb.resourceHandle] = 0;
1152         } else if (type == RenderHandleType::GPU_IMAGE) {
1153             if (isDynamicTrack) {
1154                 const bool isAddMips = RenderHandleUtil::IsDynamicAdditionalStateResource(cb.resourceHandle);
1155                 auto& stateRef = GetImageResourceStateRef(cb.resourceHandle, params.gpuQueue);
1156                 if (cb.src.optionalImageLayout == CORE_IMAGE_LAYOUT_MAX_ENUM) {
1157                     uint32_t mipLevel = 0U;
1158                     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
1159                     ImageLayout srcImageLayout = stateRef.resource.imageLayout;
1160                     if (isAddMips) {
1161                         const uint32_t srcMip = cb.src.optionalImageSubresourceRange.baseMipLevel;
1162                         const uint32_t dstMip = cb.dst.optionalImageSubresourceRange.baseMipLevel;
1163                         if ((srcMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
1164                             (dstMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
1165                             if (dstMip < RenderGraph::MAX_MIP_STATE_COUNT) {
1166                                 mipLevel = dstMip;
1167                                 mipCount = 1U;
1168                             } else {
1169                                 mipLevel = srcMip;
1170                                 // all mip levels
1171                             }
1172                             if (stateRef.additionalState.layouts) {
1173                                 srcImageLayout = stateRef.additionalState.layouts[mipLevel];
1174                             } else {
1175 #if (RENDER_VALIDATION_ENABLED == 1)
1176                                 PLUGIN_LOG_ONCE_E(to_hex(cb.resourceHandle.id), "mip layouts missing");
1177 #endif
1178                             }
1179                         }
1180                     }
1181                     cb.src.accessFlags = stateRef.state.accessFlags;
1182                     cb.src.pipelineStageFlags =
1183                         stateRef.state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1184                     cb.src.optionalImageLayout = srcImageLayout;
1185                     cb.src.optionalImageSubresourceRange = { 0, mipLevel, mipCount, 0u,
1186                         PipelineStateConstants::GPU_IMAGE_ALL_LAYERS };
1187                 }
1188                 UpdateImageResourceState(stateRef, params, cb);
1189                 stateRef.resource.imageLayout = cb.dst.optionalImageLayout;
1190                 if (isAddMips) {
1191                     const BindableImage image {
1192                         cb.resourceHandle,
1193                         cb.dst.optionalImageSubresourceRange.baseMipLevel,
1194                         cb.dst.optionalImageSubresourceRange.baseArrayLayer,
1195                         cb.dst.optionalImageLayout,
1196                         RenderHandle {},
1197                     };
1198                     ModifyAdditionalImageState(image, stateRef.additionalState);
1199                 }
1200             }
1201             params.handledCustomBarriers[cb.resourceHandle] = 0;
1202         }
1203     }
1204 }
1205 
HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin, const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)1206 void RenderGraph::HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1207     const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)
1208 {
1209     for (uint32_t idx = 0; idx < params.vertexInputBarrierCount; ++idx) {
1210         const uint32_t barrierIndex = barrierIndexBegin + idx;
1211         PLUGIN_ASSERT(barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size());
1212         if (barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size()) {
1213             const VertexBuffer& vbInput = vertexInputBufferBarrierListRef[barrierIndex];
1214             const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1215                 CORE_ACCESS_INDEX_READ_BIT | CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1216                 CORE_PIPELINE_STAGE_VERTEX_INPUT_BIT, params.gpuQueue };
1217             UpdateStateAndCreateBarriersGpuBuffer(
1218                 resourceState, { vbInput.bufferHandle, vbInput.bufferOffset, vbInput.byteSize }, params);
1219         }
1220     }
1221 }
1222 
1223 void RenderGraph::HandleRenderpassIndirectBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1224     const array_view<const VertexBuffer>& indirectBufferBarrierListRef)
1225 {
1226     for (uint32_t idx = 0; idx < params.indirectBufferBarrierCount; ++idx) {
1227         const uint32_t barrierIndex = barrierIndexBegin + idx;
1228         PLUGIN_ASSERT(barrierIndex < (uint32_t)indirectBufferBarrierListRef.size());
1229         if (barrierIndex < (uint32_t)indirectBufferBarrierListRef.size()) {
1230             const VertexBuffer& ib = indirectBufferBarrierListRef[barrierIndex];
1231             const bool needsArgsBarrier =
1232                 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ib.bufferHandle);
1233             if (needsArgsBarrier) {
1234                 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1235                     CORE_ACCESS_INDIRECT_COMMAND_READ_BIT, CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue };
1236                 UpdateStateAndCreateBarriersGpuBuffer(
1237                     resourceState, { ib.bufferHandle, ib.bufferOffset, ib.byteSize }, params);
1238             }
1239         }
1240     }
1241 }
1242 
1243 void RenderGraph::HandleClearImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1244     const array_view<const RenderCommandWithType>& cmdListRef)
1245 {
1246     const uint32_t nextListIdx = commandListCommandIndex + 1;
1247     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1248     const auto& nextCmdRef = cmdListRef[nextListIdx];
1249     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::CLEAR_COLOR_IMAGE);
1250 
1251     const RenderCommandClearColorImage& nextRc = *static_cast<RenderCommandClearColorImage*>(nextCmdRef.rc);
1252 
1253     const bool needsBarrier =
1254         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.handle);
1255     if (needsBarrier) {
1256         BindableImage bRes = {};
1257         bRes.handle = nextRc.handle;
1258         bRes.imageLayout = nextRc.imageLayout;
1259         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1260             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1261             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1262     }
1263 }
1264 
1265 void RenderGraph::HandleBlitImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1266     const array_view<const RenderCommandWithType>& cmdListRef)
1267 {
1268     const uint32_t nextListIdx = commandListCommandIndex + 1;
1269     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1270     const auto& nextCmdRef = cmdListRef[nextListIdx];
1271     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BLIT_IMAGE);
1272 
1273     const RenderCommandBlitImage& nextRc = *static_cast<RenderCommandBlitImage*>(nextCmdRef.rc);
1274 
1275     const bool needsSrcBarrier =
1276         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1277     if (needsSrcBarrier) {
1278         BindableImage bRes = {};
1279         bRes.handle = nextRc.srcHandle;
1280         bRes.imageLayout = nextRc.srcImageLayout;
1281         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1282             GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1283             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1284     }
1285 
1286     const bool needsDstBarrier =
1287         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1288     if (needsDstBarrier) {
1289         BindableImage bRes = {};
1290         bRes.handle = nextRc.dstHandle;
1291         bRes.imageLayout = nextRc.dstImageLayout;
1292         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1293             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1294             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1295     }
1296 }
1297 
1298 void RenderGraph::HandleCopyBuffer(ParameterCache& params, const uint32_t& commandListCommandIndex,
1299     const array_view<const RenderCommandWithType>& cmdListRef)
1300 {
1301     const uint32_t nextListIdx = commandListCommandIndex + 1;
1302     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1303     const auto& nextCmdRef = cmdListRef[nextListIdx];
1304     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_BUFFER);
1305 
1306     const RenderCommandCopyBuffer& nextRc = *static_cast<RenderCommandCopyBuffer*>(nextCmdRef.rc);
1307 
1308     const bool needsSrcBarrier =
1309         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1310     if (needsSrcBarrier) {
1311         const BindableBuffer bRes = { nextRc.srcHandle, nextRc.bufferCopy.srcOffset, nextRc.bufferCopy.size };
1312         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1313             GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1314             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1315     }
1316 
1317     const bool needsDstBarrier =
1318         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1319     if (needsDstBarrier) {
1320         const BindableBuffer bRes = { nextRc.dstHandle, nextRc.bufferCopy.dstOffset, nextRc.bufferCopy.size };
1321         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1322             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1323             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1324     }
1325 }
1326 
1327 void RenderGraph::HandleCopyBufferImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1328     const array_view<const RenderCommandWithType>& cmdListRef)
1329 {
1330     const uint32_t nextListIdx = commandListCommandIndex + 1;
1331     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1332     const auto& nextCmdRef = cmdListRef[nextListIdx];
1333     PLUGIN_ASSERT((nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) ||
1334                   (nextCmdRef.type == RenderCommandType::COPY_IMAGE));
1335 
1336     // NOTE: two different command types supported
1337     RenderHandle srcHandle;
1338     RenderHandle dstHandle;
1339     ImageSubresourceLayers srcImgLayers;
1340     ImageSubresourceLayers dstImgLayers;
1341     if (nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) {
1342         const RenderCommandCopyBufferImage& nextRc = *static_cast<RenderCommandCopyBufferImage*>(nextCmdRef.rc);
1343         PLUGIN_ASSERT(nextRc.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1344         srcHandle = nextRc.srcHandle;
1345         dstHandle = nextRc.dstHandle;
1346         srcImgLayers = nextRc.bufferImageCopy.imageSubresource;
1347         dstImgLayers = nextRc.bufferImageCopy.imageSubresource;
1348     } else if (nextCmdRef.type == RenderCommandType::COPY_IMAGE) {
1349         const RenderCommandCopyImage& nextRc = *static_cast<RenderCommandCopyImage*>(nextCmdRef.rc);
1350         srcHandle = nextRc.srcHandle;
1351         dstHandle = nextRc.dstHandle;
1352         srcImgLayers = nextRc.imageCopy.srcSubresource;
1353         dstImgLayers = nextRc.imageCopy.dstSubresource;
1354     }
1355 
1356     const bool needsSrcBarrier =
1357         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, srcHandle);
1358     if (needsSrcBarrier) {
1359         const RenderHandleType handleType = RenderHandleUtil::GetHandleType(srcHandle);
1360         PLUGIN_UNUSED(handleType);
1361         PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1362         if (handleType == RenderHandleType::GPU_BUFFER) {
1363             BindableBuffer bRes;
1364             bRes.handle = srcHandle;
1365             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1366                 GpuResourceState {
1367                     0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1368                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1369         } else {
1370             BindableImage bRes;
1371             bRes.handle = srcHandle;
1372             bRes.mip = srcImgLayers.mipLevel;
1373             bRes.layer = srcImgLayers.baseArrayLayer;
1374             bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1375             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1376                 GpuResourceState {
1377                     0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1378                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1379         }
1380     }
1381 
1382     const bool needsDstBarrier =
1383         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, dstHandle);
1384     if (needsDstBarrier) {
1385         const RenderHandleType handleType = RenderHandleUtil::GetHandleType(dstHandle);
1386         PLUGIN_UNUSED(handleType);
1387         PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1388         if (handleType == RenderHandleType::GPU_BUFFER) {
1389             BindableBuffer bRes;
1390             bRes.handle = dstHandle;
1391             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1392                 GpuResourceState {
1393                     0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1394                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1395         } else {
1396             BindableImage bRes;
1397             bRes.handle = dstHandle;
1398             bRes.mip = dstImgLayers.mipLevel;
1399             bRes.layer = dstImgLayers.baseArrayLayer;
1400             bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1401             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1402                 GpuResourceState {
1403                     0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1404                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1405         }
1406     }
1407 }
1408 
1409 void RenderGraph::HandleDispatchIndirect(ParameterCache& params, const uint32_t& commandListCommandIndex,
1410     const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1411 {
1412     const uint32_t nextListIdx = commandListCommandIndex + 1;
1413     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1414     const auto& nextCmdRef = cmdListRef[nextListIdx];
1415     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::DISPATCH_INDIRECT);
1416 
1417     const auto& nextRc = *static_cast<RenderCommandDispatchIndirect*>(nextCmdRef.rc);
1418 
1419     const bool needsArgsBarrier =
1420         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.argsHandle);
1421     if (needsArgsBarrier) {
1422         const BindableBuffer bRes = { nextRc.argsHandle, nextRc.offset, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1423         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1424             GpuResourceState { CORE_SHADER_STAGE_COMPUTE_BIT, CORE_ACCESS_INDIRECT_COMMAND_READ_BIT,
1425                 CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue },
1426             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1427     }
1428 }
1429 
1430 void RenderGraph::HandleDescriptorSets(ParameterCache& params,
1431     const array_view<const RenderHandle>& descriptorSetHandlesForBarriers,
1432     const NodeContextDescriptorSetManager& nodeDescriptorSetMgrRef)
1433 {
1434     for (const RenderHandle descriptorSetHandle : descriptorSetHandlesForBarriers) {
1435         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET);
1436 
1437         const auto bindingResources = nodeDescriptorSetMgrRef.GetCpuDescriptorSetData(descriptorSetHandle);
1438         const auto& buffers = bindingResources.buffers;
1439         const auto& images = bindingResources.images;
1440         for (const auto& ref : buffers) {
1441             const uint32_t descriptorCount = ref.binding.descriptorCount;
1442             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1443             if (descriptorCount == 0) {
1444                 continue;
1445             }
1446             const uint32_t arrayOffset = ref.arrayOffset;
1447             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1448             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1449                 // first is the ref, starting from 1 we use array offsets
1450                 const auto& bRes = (idx == 0) ? ref : buffers[arrayOffset + idx - 1];
1451                 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ref.resource.handle)) {
1452                     UpdateStateAndCreateBarriersGpuBuffer(bRes.state, bRes.resource, params);
1453                 }
1454             }
1455         }
1456         for (const auto& ref : images) {
1457             const uint32_t descriptorCount = ref.binding.descriptorCount;
1458             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1459             if (descriptorCount == 0) {
1460                 continue;
1461             }
1462             const uint32_t arrayOffset = ref.arrayOffset;
1463             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1464             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1465                 // first is the ref, starting from 1 we use array offsets
1466                 const auto& bRes = (idx == 0) ? ref : images[arrayOffset + idx - 1];
1467                 if (CheckForBarrierNeed(
1468                     params.handledCustomBarriers, params.customBarrierCount, bRes.resource.handle)) {
1469                     UpdateStateAndCreateBarriersGpuImage(bRes.state, bRes.resource, params);
1470                 }
1471             }
1472         }
1473     } // end for
1474 }
1475 
1476 void RenderGraph::UpdateStateAndCreateBarriersGpuImage(
1477     const GpuResourceState& state, const BindableImage& res, RenderGraph::ParameterCache& params)
1478 {
1479     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1480     if (arrayIndex >= static_cast<uint32_t>(gpuImageDataIndices_.size())) {
1481         return;
1482     }
1483 
1484     auto& ref = GetImageResourceStateRef(res.handle, state.gpuQueue);
1485     // NOTE: we previous patched the final render pass layouts here
1486     // ATM: we only path the swapchain image if needed
1487 
1488     const GpuResourceState& prevState = ref.state;
1489     const BindableImage& prevImage = ref.resource;
1490     const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle);
1491     const ResourceBarrier prevStateRb = addMips ? GetSrcImageBarrierMips(prevState, prevImage, res, ref.additionalState)
1492                                                 : GetSrcImageBarrier(prevState, prevImage);
1493 
1494     const bool layoutChanged = (prevStateRb.optionalImageLayout != res.imageLayout);
1495     const bool accessFlagsChanged = (prevStateRb.accessFlags != state.accessFlags);
1496     const bool writeTarget = (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS);
1497     const bool inputAttachment = (state.accessFlags == CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT);
1498     // input attachments are handled with render passes and not with barriers
1499     if ((layoutChanged || accessFlagsChanged || writeTarget) && (!inputAttachment)) {
1500         if ((prevState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1501             (prevState.gpuQueue.type != state.gpuQueue.type)) {
1502             PLUGIN_ASSERT(state.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1503 
1504             PLUGIN_ASSERT(ref.prevRenderNodeIndex != params.renderNodeIndex);
1505             currNodeGpuResourceTransfers_.push_back(RenderGraph::GpuQueueTransferState {
1506                 res.handle, ref.prevRenderNodeIndex, params.renderNodeIndex, prevImage.imageLayout, res.imageLayout });
1507         } else {
1508             const ResourceBarrier dstImageBarrier =
1509                 addMips ? GetDstImageBarrierMips(state, prevImage, res, ref.additionalState)
1510                         : GetDstImageBarrier(state, res);
1511             params.combinedBarriers.push_back(
1512                 CommandBarrier { res.handle, prevStateRb, prevState.gpuQueue, dstImageBarrier, params.gpuQueue });
1513         }
1514 
1515         ref.state = state;
1516         ref.resource = res;
1517         ref.prevRc = params.rcWithType;
1518         ref.prevRenderNodeIndex = params.renderNodeIndex;
1519         if (addMips) {
1520             ModifyAdditionalImageState(res, ref.additionalState);
1521         }
1522     }
1523 }
1524 
1525 void RenderGraph::UpdateStateAndCreateBarriersGpuBuffer(
1526     const GpuResourceState& dstState, const BindableBuffer& res, RenderGraph::ParameterCache& params)
1527 {
1528     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1529     if (arrayIndex >= static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
1530         return;
1531     }
1532 
1533     // get the current state of the buffer
1534     auto& srcStateRef = GetBufferResourceStateRef(res.handle, dstState.gpuQueue);
1535     const ResourceBarrier prevStateRb = GetSrcBufferBarrier(srcStateRef.state, res);
1536     if ((prevStateRb.accessFlags != dstState.accessFlags) || (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS)) {
1537         params.combinedBarriers.push_back(CommandBarrier {
1538             res.handle, prevStateRb, dstState.gpuQueue, GetDstBufferBarrier(dstState, res), params.gpuQueue });
1539     }
1540 
1541     // update the cached state to match the situation after the barrier
1542     srcStateRef.state = dstState;
1543     srcStateRef.resource = res;
1544     srcStateRef.prevRc = params.rcWithType;
1545     srcStateRef.prevRenderNodeIndex = params.renderNodeIndex;
1546 }
1547 
1548 void RenderGraph::AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,
1549     const GpuResourceState& newGpuResourceState, const BindableBuffer& newBuffer,
1550     const RenderCommandWithType& rcWithType, vector<CommandBarrier>& barriers,
1551     vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1552 {
1553     auto& stateRef = GetBufferResourceStateRef(newBuffer.handle, newGpuResourceState.gpuQueue);
1554     const GpuResourceState srcState = stateRef.state;
1555     const BindableBuffer srcBuffer = stateRef.resource;
1556 
1557     if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1558         (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1559         PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1560         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newBuffer.handle) == RenderHandleType::GPU_IMAGE);
1561         PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1562         currNodeGpuResourceTransfer.push_back(
1563             RenderGraph::GpuQueueTransferState { newBuffer.handle, stateRef.prevRenderNodeIndex, renderNodeIndex,
1564                 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED, ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED });
1565     } else {
1566         const ResourceBarrier srcBarrier = GetSrcBufferBarrier(srcState, srcBuffer);
1567         const ResourceBarrier dstBarrier = GetDstBufferBarrier(newGpuResourceState, newBuffer);
1568 
1569         barriers.push_back(CommandBarrier {
1570             newBuffer.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1571     }
1572 
1573     stateRef.state = newGpuResourceState;
1574     stateRef.resource = newBuffer;
1575     stateRef.prevRc = rcWithType;
1576     stateRef.prevRenderNodeIndex = renderNodeIndex;
1577 }
1578 
1579 void RenderGraph::AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,
1580     const GpuResourceState& newGpuResourceState, const BindableImage& newImage, const RenderCommandWithType& rcWithType,
1581     vector<CommandBarrier>& barriers, vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1582 {
1583     // newGpuResourceState has queue transfer image layout in old optionalImageLayout
1584 
1585     auto& stateRef = GetImageResourceStateRef(newImage.handle, newGpuResourceState.gpuQueue);
1586     const GpuResourceState srcState = stateRef.state;
1587     const BindableImage srcImage = stateRef.resource;
1588     const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(newImage.handle);
1589 
1590     if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1591         (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1592         PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1593         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newImage.handle) == RenderHandleType::GPU_IMAGE);
1594         PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1595         currNodeGpuResourceTransfer.push_back(RenderGraph::GpuQueueTransferState { newImage.handle,
1596             stateRef.prevRenderNodeIndex, renderNodeIndex, srcImage.imageLayout, newImage.imageLayout });
1597     } else {
1598         const ResourceBarrier srcBarrier =
1599             addMips ? GetSrcImageBarrierMips(srcState, srcImage, newImage, stateRef.additionalState)
1600                     : GetSrcImageBarrier(srcState, srcImage);
1601         const ResourceBarrier dstBarrier =
1602             addMips ? GetDstImageBarrierMips(newGpuResourceState, srcImage, newImage, stateRef.additionalState)
1603                     : GetDstImageBarrier(newGpuResourceState, newImage);
1604 
1605         barriers.push_back(CommandBarrier {
1606             newImage.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1607     }
1608 
1609     stateRef.state = newGpuResourceState;
1610     stateRef.resource = newImage;
1611     stateRef.prevRc = rcWithType;
1612     stateRef.prevRenderNodeIndex = renderNodeIndex;
1613     if (addMips) {
1614         ModifyAdditionalImageState(newImage, stateRef.additionalState);
1615     }
1616 }
1617 
1618 RenderGraph::RenderGraphBufferState& RenderGraph::GetBufferResourceStateRef(
1619     const RenderHandle handle, const GpuQueue& queue)
1620 {
1621     // NOTE: Do not call with non dynamic trackable
1622     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1623     PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_BUFFER);
1624     if (arrayIndex < gpuBufferDataIndices_.size()) {
1625         PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1626         uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex];
1627         if (dataIdx == INVALID_TRACK_IDX) {
1628             if (!gpuBufferAvailableIndices_.empty()) {
1629                 dataIdx = gpuBufferAvailableIndices_.back();
1630                 gpuBufferAvailableIndices_.pop_back();
1631             } else {
1632                 dataIdx = static_cast<uint32_t>(gpuBufferTracking_.size());
1633                 gpuBufferTracking_.emplace_back();
1634             }
1635             gpuBufferDataIndices_[arrayIndex] = dataIdx;
1636 
1637             gpuBufferTracking_[dataIdx].resource.handle = handle;
1638             gpuBufferTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1639         }
1640         return gpuBufferTracking_[dataIdx];
1641     }
1642 
1643     return defaultBufferState_;
1644 }
1645 
1646 RenderGraph::RenderGraphImageState& RenderGraph::GetImageResourceStateRef(
1647     const RenderHandle handle, const GpuQueue& queue)
1648 {
1649     // NOTE: Do not call with non dynamic trackable
1650     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1651     PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1652     if (arrayIndex < gpuImageDataIndices_.size()) {
1653         // NOTE: render pass attachments expected to be dynamic resources always
1654         PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1655         uint32_t dataIdx = gpuImageDataIndices_[arrayIndex];
1656         if (dataIdx == INVALID_TRACK_IDX) {
1657             if (!gpuImageAvailableIndices_.empty()) {
1658                 dataIdx = gpuImageAvailableIndices_.back();
1659                 gpuImageAvailableIndices_.pop_back();
1660             } else {
1661                 dataIdx = static_cast<uint32_t>(gpuImageTracking_.size());
1662                 gpuImageTracking_.emplace_back();
1663             }
1664             gpuImageDataIndices_[arrayIndex] = dataIdx;
1665 
1666             gpuImageTracking_[dataIdx].resource.handle = handle;
1667             gpuImageTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1668             if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1669                 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1670                 gpuImageTracking_[dataIdx].additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
1671             }
1672         }
1673         return gpuImageTracking_[dataIdx];
1674     }
1675 
1676     return defaultImageState_;
1677 }
1678 RENDER_END_NAMESPACE()
1679