1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_graph.h"
17
18 #include <cinttypes>
19
20 #include <base/containers/array_view.h>
21 #include <base/containers/fixed_string.h>
22 #include <base/math/mathf.h>
23 #include <render/namespace.h>
24
25 #include "device/gpu_resource_cache.h"
26 #include "device/gpu_resource_handle_util.h"
27 #include "device/gpu_resource_manager.h"
28 #include "nodecontext/render_barrier_list.h"
29 #include "nodecontext/render_command_list.h"
30 #include "nodecontext/render_node_graph_node_store.h"
31 #include "util/log.h"
32
33 using namespace BASE_NS;
34
35 RENDER_BEGIN_NAMESPACE()
36 namespace {
37 constexpr uint32_t INVALID_TRACK_IDX { ~0u };
38
39 #if (RENDER_DEV_ENABLED == 1)
40 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_PRINT = false;
41 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS = false;
42 constexpr const bool CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES = false;
43
DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)44 void DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)
45 {
46 switch (rc.type) {
47 case RenderCommandType::BARRIER_POINT: {
48 PLUGIN_LOG_I("rc: BarrierPoint");
49 break;
50 }
51 case RenderCommandType::DRAW: {
52 PLUGIN_LOG_I("rc: Draw");
53 break;
54 }
55 case RenderCommandType::DRAW_INDIRECT: {
56 PLUGIN_LOG_I("rc: DrawIndirect");
57 break;
58 }
59 case RenderCommandType::DISPATCH: {
60 PLUGIN_LOG_I("rc: Dispatch");
61 break;
62 }
63 case RenderCommandType::DISPATCH_INDIRECT: {
64 PLUGIN_LOG_I("rc: DispatchIndirect");
65 break;
66 }
67 case RenderCommandType::BIND_PIPELINE: {
68 PLUGIN_LOG_I("rc: BindPipeline");
69 break;
70 }
71 case RenderCommandType::BEGIN_RENDER_PASS: {
72 PLUGIN_LOG_I("rc: BeginRenderPass");
73 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
74 const auto& beginRenderPass = *static_cast<RenderCommandBeginRenderPass*>(rc.rc);
75 for (uint32_t idx = 0; idx < beginRenderPass.renderPassDesc.attachmentCount; ++idx) {
76 const RenderHandle handle = beginRenderPass.renderPassDesc.attachmentHandles[idx];
77 PLUGIN_LOG_I(" attachment idx: %u name: %s", idx, aMgr.GetName(handle).c_str());
78 }
79 PLUGIN_LOG_I(" subpass count: %u, subpass start idx: %u",
80 (uint32_t)beginRenderPass.renderPassDesc.subpassCount, beginRenderPass.subpassStartIndex);
81 }
82 break;
83 }
84 case RenderCommandType::NEXT_SUBPASS: {
85 PLUGIN_LOG_I("rc: NextSubpass");
86 break;
87 }
88 case RenderCommandType::END_RENDER_PASS: {
89 PLUGIN_LOG_I("rc: EndRenderPass");
90 break;
91 }
92 case RenderCommandType::BIND_VERTEX_BUFFERS: {
93 PLUGIN_LOG_I("rc: BindVertexBuffers");
94 break;
95 }
96 case RenderCommandType::BIND_INDEX_BUFFER: {
97 PLUGIN_LOG_I("rc: BindIndexBuffer");
98 break;
99 }
100 case RenderCommandType::COPY_BUFFER: {
101 PLUGIN_LOG_I("rc: CopyBuffer");
102 break;
103 }
104 case RenderCommandType::COPY_BUFFER_IMAGE: {
105 PLUGIN_LOG_I("rc: CopyBufferImage");
106 break;
107 }
108 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
109 PLUGIN_LOG_I("rc: BindDescriptorSets");
110 break;
111 }
112 case RenderCommandType::PUSH_CONSTANT: {
113 PLUGIN_LOG_I("rc: PushConstant");
114 break;
115 }
116 case RenderCommandType::BLIT_IMAGE: {
117 PLUGIN_LOG_I("rc: BlitImage");
118 break;
119 }
120 // dynamic states
121 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
122 PLUGIN_LOG_I("rc: DynamicStateViewport");
123 break;
124 }
125 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
126 PLUGIN_LOG_I("rc: DynamicStateScissor");
127 break;
128 }
129 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
130 PLUGIN_LOG_I("rc: DynamicStateLineWidth");
131 break;
132 }
133 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
134 PLUGIN_LOG_I("rc: DynamicStateDepthBias");
135 break;
136 }
137 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
138 PLUGIN_LOG_I("rc: DynamicStateBlendConstants");
139 break;
140 }
141 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
142 PLUGIN_LOG_I("rc: DynamicStateDepthBounds");
143 break;
144 }
145 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
146 PLUGIN_LOG_I("rc: DynamicStateStencil");
147 break;
148 }
149 case RenderCommandType::WRITE_TIMESTAMP: {
150 PLUGIN_LOG_I("rc: WriteTimestamp");
151 break;
152 }
153 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE: {
154 PLUGIN_LOG_I("rc: GpuQueueTransferRelease");
155 break;
156 }
157 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE: {
158 PLUGIN_LOG_I("rc: GpuQueueTransferAcquire");
159 break;
160 }
161 case RenderCommandType::UNDEFINED:
162 default: {
163 PLUGIN_ASSERT(false && "non-valid render command");
164 break;
165 }
166 }
167 }
168
DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)169 void DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)
170 {
171 PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
172 for (const auto& ref : combinedBarriers) {
173 const RenderHandleType type = RenderHandleUtil::GetHandleType(ref.resourceHandle);
174 if (type == RenderHandleType::GPU_BUFFER) {
175 PLUGIN_LOG_I("barrier buffer :: handle:0x%" PRIx64 " name:%s, src_stage:%u dst_stage:%u",
176 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(), ref.src.pipelineStageFlags,
177 ref.dst.pipelineStageFlags);
178 } else {
179 PLUGIN_ASSERT(type == RenderHandleType::GPU_IMAGE);
180 PLUGIN_LOG_I("barrier image :: handle:0x%" PRIx64
181 " name:%s, src_stage:%u dst_stage:%u, src_layout:%u dst_layout:%u",
182 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(), ref.src.pipelineStageFlags,
183 ref.dst.pipelineStageFlags, ref.src.optionalImageLayout, ref.dst.optionalImageLayout);
184 }
185 }
186 }
187
DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)188 void DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)
189 {
190 PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
191 for (uint32_t idx = 0; idx < rc.renderPassDesc.attachmentCount; ++idx) {
192 const auto handle = rc.renderPassDesc.attachmentHandles[idx];
193 const auto srcLayout = rc.imageLayouts.attachmentInitialLayouts[idx];
194 const auto dstLayout = rc.imageLayouts.attachmentFinalLayouts[idx];
195 PLUGIN_LOG_I("render_pass image :: handle:0x%" PRIx64 " name:%s, src_layout:%u dst_layout:%u (patched later)",
196 handle.id, gpuResourceMgr.GetName(handle).c_str(), srcLayout, dstLayout);
197 }
198 }
199
DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)200 void DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)
201 {
202 PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
203 const EngineResourceHandle gpuHandle = gpuResourceMgr.GetGpuHandle(resState.resource.handle);
204 PLUGIN_LOG_I("image_state :: handle:0x%" PRIx64 " name:%s, layout:%u, index:%u, gen:%u, gpu_gen:%u",
205 resState.resource.handle.id, gpuResourceMgr.GetName(resState.resource.handle).c_str(),
206 resState.resource.imageLayout, RenderHandleUtil::GetIndexPart(resState.resource.handle),
207 RenderHandleUtil::GetGenerationIndexPart(resState.resource.handle),
208 RenderHandleUtil::GetGenerationIndexPart(gpuHandle));
209 // one could fetch and print vulkan handle here as well e.g.
210 // 1. const GpuImagePlatformDataVk& plat =
211 // 2. (const GpuImagePlatformDataVk&)gpuResourceMgr.GetImage(ref.first)->GetBasePlatformData()
212 // 3. PLUGIN_LOG_I("end_frame image :: vk_handle:0x%" PRIx64, VulkanHandleCast<uint64_t>(plat.image))
213 }
214 #endif // RENDER_DEV_ENABLED
215
216 static constexpr uint32_t WRITE_ACCESS_FLAGS = CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
217 CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
218 CORE_ACCESS_TRANSFER_WRITE_BIT | CORE_ACCESS_HOST_WRITE_BIT |
219 CORE_ACCESS_MEMORY_WRITE_BIT;
220
PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout, RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)221 void PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout,
222 RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)
223 {
224 const uint32_t attachmentCount = beginRenderPass.renderPassDesc.attachmentCount;
225 for (uint32_t attachmentIdx = 0; attachmentIdx < attachmentCount; ++attachmentIdx) {
226 if (beginRenderPass.renderPassDesc.attachmentHandles[attachmentIdx].id == handle.id) {
227 beginRenderPass.imageLayouts.attachmentFinalLayouts[attachmentIdx] = imageLayout;
228 storeState.resource.imageLayout = imageLayout;
229 }
230 }
231 };
232
UpdateMultiRenderCommandListRenderPasses(RenderGraph::MultiRenderPassStore& store)233 void UpdateMultiRenderCommandListRenderPasses(RenderGraph::MultiRenderPassStore& store)
234 {
235 const uint32_t renderPassCount = (uint32_t)store.renderPasses.size();
236 PLUGIN_ASSERT(renderPassCount > 1);
237
238 RenderCommandBeginRenderPass* firstRenderPass = store.renderPasses[0];
239 PLUGIN_ASSERT(firstRenderPass);
240 PLUGIN_ASSERT(firstRenderPass->subpasses.size() >= renderPassCount);
241 const RenderCommandBeginRenderPass* lastRenderPass = store.renderPasses[renderPassCount - 1];
242 PLUGIN_ASSERT(lastRenderPass);
243
244 const uint32_t attachmentCount = firstRenderPass->renderPassDesc.attachmentCount;
245
246 // take attachment loads from the first one, and stores from the last one
247 // take initial layouts from the first one, and final layouts from the last one (could take the next layout)
248 // initial store the correct render pass description to first render pass and then copy to others
249 // resource states are copied from valid subpasses to another render command list subpasses
250 for (uint32_t fromRpIdx = 0; fromRpIdx < renderPassCount; ++fromRpIdx) {
251 const auto& fromRenderPass = *(store.renderPasses[fromRpIdx]);
252 const uint32_t fromRpSubpassStartIndex = fromRenderPass.subpassStartIndex;
253 const auto& fromRpSubpassResourceStates = fromRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
254 for (uint32_t toRpIdx = 0; toRpIdx < renderPassCount; ++toRpIdx) {
255 if (fromRpIdx != toRpIdx) {
256 auto& toRenderPass = *(store.renderPasses[toRpIdx]);
257 auto& toRpSubpassResourceStates = toRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
258 for (uint32_t idx = 0; idx < attachmentCount; ++idx) {
259 toRpSubpassResourceStates.states[idx] = fromRpSubpassResourceStates.states[idx];
260 toRpSubpassResourceStates.layouts[idx] = fromRpSubpassResourceStates.layouts[idx];
261 }
262 }
263 }
264 }
265
266 for (uint32_t idx = 0; idx < firstRenderPass->renderPassDesc.attachmentCount; ++idx) {
267 firstRenderPass->renderPassDesc.attachments[idx].storeOp =
268 lastRenderPass->renderPassDesc.attachments[idx].storeOp;
269 firstRenderPass->renderPassDesc.attachments[idx].stencilStoreOp =
270 lastRenderPass->renderPassDesc.attachments[idx].stencilStoreOp;
271
272 firstRenderPass->imageLayouts.attachmentFinalLayouts[idx] =
273 lastRenderPass->imageLayouts.attachmentFinalLayouts[idx];
274 }
275
276 // copy subpasses to first
277 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
278 firstRenderPass->subpasses[idx] = store.renderPasses[idx]->subpasses[idx];
279 }
280
281 // copy from first to following render passes
282 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
283 // subpass start index is the only changing variables
284 const uint32_t subpassStartIndex = store.renderPasses[idx]->subpassStartIndex;
285 store.renderPasses[idx]->renderPassDesc = firstRenderPass->renderPassDesc;
286 store.renderPasses[idx]->subpassStartIndex = subpassStartIndex;
287
288 // image layouts needs to match
289 store.renderPasses[idx]->imageLayouts = firstRenderPass->imageLayouts;
290 PLUGIN_ASSERT(store.renderPasses[idx]->subpasses.size() >= renderPassCount);
291 // copy all subpasses
292 if (!CloneData(store.renderPasses[idx]->subpasses.data(), sizeof(RenderPassSubpassDesc) * renderPassCount,
293 firstRenderPass->subpasses.data(), sizeof(RenderPassSubpassDesc) * renderPassCount)) {
294 PLUGIN_LOG_E("Copying of renderPasses failed.");
295 }
296 // copy input resource state
297 if (!CloneData(store.renderPasses[idx]->inputResourceStates.states, sizeof(GpuResourceState) * attachmentCount,
298 firstRenderPass->inputResourceStates.states, sizeof(GpuResourceState) * attachmentCount)) {
299 PLUGIN_LOG_E("Copying of renderPasses failed.");
300 }
301 // NOTE: subpassResourceStates are not copied to different render passes
302 }
303
304 #if (RENDER_VULKAN_COMBINE_MULTI_COMMAND_LIST_MSAA_SUBPASSES_ENABLED == 1)
305 // copy the final layouts and resolves to first render pass
306 const uint32_t finalSubpassIdx = renderPassCount - 1U;
307 if ((renderPassCount > 1U) && (firstRenderPass->subpasses[finalSubpassIdx].resolveAttachmentCount > 0U)) {
308 firstRenderPass->renderPassDesc.subpassCount = 1U;
309 firstRenderPass->subpasses = { firstRenderPass->subpasses.data(), 1U };
310 firstRenderPass->subpassResourceStates = { firstRenderPass->subpassResourceStates.data(), 1U };
311 // copy resolve attachments from the final subpass
312 auto& firstSubpass = firstRenderPass->subpasses[0U];
313 const auto& finalSubpass = store.renderPasses[finalSubpassIdx]->subpasses[finalSubpassIdx];
314 firstSubpass.resolveAttachmentCount = finalSubpass.resolveAttachmentCount;
315 firstSubpass.depthResolveAttachmentCount = finalSubpass.depthResolveAttachmentCount;
316 firstSubpass.depthResolveAttachmentIndex = finalSubpass.depthResolveAttachmentIndex;
317 firstSubpass.depthResolveModeFlagBit = finalSubpass.depthResolveModeFlagBit;
318 CloneData(firstSubpass.resolveAttachmentIndices, sizeof(firstSubpass.resolveAttachmentIndices),
319 finalSubpass.resolveAttachmentIndices, sizeof(uint32_t) * firstSubpass.resolveAttachmentCount);
320 // layouts for resolve attachments
321 const auto& finalSubpassResourceStates =
322 store.renderPasses[finalSubpassIdx]->subpassResourceStates[finalSubpassIdx];
323 const uint32_t resolveAttachmentCount = firstSubpass.resolveAttachmentCount;
324 for (uint32_t resIdx = 0U; resIdx < resolveAttachmentCount; ++resIdx) {
325 const uint32_t resAttIdx = firstSubpass.resolveAttachmentIndices[resIdx];
326 firstRenderPass->subpassResourceStates[0U].layouts[resAttIdx] =
327 finalSubpassResourceStates.layouts[resAttIdx];
328 firstRenderPass->subpassResourceStates[0U].states[resAttIdx] = finalSubpassResourceStates.states[resAttIdx];
329 }
330 if ((firstSubpass.depthResolveAttachmentCount > 0U) &&
331 (firstSubpass.depthResolveAttachmentIndex < PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT)) {
332 const uint32_t resAttIdx = firstSubpass.resolveAttachmentIndices[firstSubpass.depthResolveAttachmentIndex];
333 firstRenderPass->subpassResourceStates[0U].layouts[resAttIdx] =
334 finalSubpassResourceStates.layouts[resAttIdx];
335 firstRenderPass->subpassResourceStates[0U].states[resAttIdx] = finalSubpassResourceStates.states[resAttIdx];
336 }
337
338 // fix render command list indices
339 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
340 store.renderPasses[idx]->renderPassDesc = firstRenderPass->renderPassDesc;
341 store.renderPasses[idx]->subpassStartIndex = 0U;
342 store.renderPasses[idx]->subpasses = firstRenderPass->subpasses;
343 store.renderPasses[idx]->subpassResourceStates = firstRenderPass->subpassResourceStates;
344 }
345 #if (RENDER_VALIDATION_ENABLED == 1)
346 PLUGIN_LOG_ONCE_I("combine_multi_command_list_msaa_subpasses_enabled",
347 "RENDER_VALIDATION: Combining multi-commandlist MSAA resolve subpasses");
348 #endif
349 }
350 #endif
351 }
352
GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)353 ResourceBarrier GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
354 {
355 return {
356 state.accessFlags,
357 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
358 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
359 res.byteOffset,
360 res.byteSize,
361 };
362 }
363
GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)364 ResourceBarrier GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)
365 {
366 return {
367 state.accessFlags,
368 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
369 res.imageLayout,
370 0,
371 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
372 };
373 }
374
GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src, const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)375 ResourceBarrier GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
376 const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
377 {
378 uint32_t mipLevel = 0U;
379 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
380 ImageLayout srcImageLayout = src.imageLayout;
381 if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
382 (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
383 if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
384 mipLevel = dst.mip;
385 mipCount = 1U;
386 } else {
387 mipLevel = src.mip;
388 // all mip levels
389 }
390 PLUGIN_ASSERT(additionalImageState.layouts);
391 srcImageLayout = additionalImageState.layouts[mipLevel];
392 }
393 return {
394 state.accessFlags,
395 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
396 srcImageLayout,
397 0,
398 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
399 { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
400 };
401 }
402
GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)403 ResourceBarrier GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
404 {
405 return {
406 state.accessFlags,
407 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
408 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
409 res.byteOffset,
410 res.byteSize,
411 };
412 }
413
GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)414 ResourceBarrier GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)
415 {
416 return {
417 state.accessFlags,
418 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
419 res.imageLayout,
420 0,
421 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
422 };
423 }
424
GetDstImageBarrierMips(const GpuResourceState& state, const BindableImage& src, const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)425 ResourceBarrier GetDstImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
426 const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
427 {
428 uint32_t mipLevel = 0U;
429 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
430 ImageLayout dstImageLayout = dst.imageLayout;
431 if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
432 (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
433 if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
434 mipLevel = dst.mip;
435 mipCount = 1U;
436 } else {
437 mipLevel = src.mip;
438 // all mip levels
439 }
440 }
441 return {
442 state.accessFlags,
443 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
444 dstImageLayout,
445 0,
446 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
447 { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
448 };
449 }
450
ModifyAdditionalImageState( const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)451 void ModifyAdditionalImageState(
452 const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)
453 {
454 #if (RENDER_VALIDATION_ENABLED == 1)
455 // NOTE: should not be called for images without CORE_RESOURCE_HANDLE_ADDITIONAL_STATE
456 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle));
457 #endif
458 if (additionalStateRef.layouts) {
459 if ((res.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
460 (res.mip < RenderGraph::MAX_MIP_STATE_COUNT)) {
461 additionalStateRef.layouts[res.mip] = res.imageLayout;
462 } else {
463 // set layout for all mips
464 for (uint32_t idx = 0; idx < RenderGraph::MAX_MIP_STATE_COUNT; ++idx) {
465 additionalStateRef.layouts[idx] = res.imageLayout;
466 }
467 }
468 } else {
469 #if (RENDER_VALIDATION_ENABLED == 1)
470 PLUGIN_LOG_ONCE_E(to_hex(res.handle.id), "mip layouts missing");
471 #endif
472 }
473 }
474
GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue, const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)475 CommandBarrier GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue,
476 const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)
477 {
478 return {
479 handle,
480
481 ResourceBarrier {
482 0,
483 PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
484 srcImageLayout,
485 0,
486 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
487 ImageSubresourceRange {},
488 },
489 srcGpuQueue,
490
491 ResourceBarrier {
492 0,
493 PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
494 dstImageLayout,
495 0,
496 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
497 ImageSubresourceRange {},
498 },
499 dstGpuQueue,
500 };
501 }
502
PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData, array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)503 void PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,
504 array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)
505 {
506 for (const auto& transferRef : currNodeGpuResourceTransfers) {
507 PLUGIN_ASSERT(transferRef.acquireNodeIdx < (uint32_t)frameRenderNodeContextData.size());
508
509 auto& acquireNodeRef = frameRenderNodeContextData[transferRef.acquireNodeIdx];
510 const GpuQueue acquireGpuQueue = acquireNodeRef.renderCommandList->GetGpuQueue();
511 GpuQueue releaseGpuQueue = acquireGpuQueue;
512
513 if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
514 auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
515 releaseGpuQueue = releaseNodeRef.renderCommandList->GetGpuQueue();
516 }
517
518 const CommandBarrier transferBarrier = GetQueueOwnershipTransferBarrier(transferRef.handle, releaseGpuQueue,
519 acquireGpuQueue, transferRef.optionalReleaseImageLayout, transferRef.optionalAcquireImageLayout);
520
521 // release ownership (NOTE: not done for previous frame)
522 if (transferRef.releaseNodeIdx < (uint32_t)frameRenderNodeContextData.size()) {
523 auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
524 const uint32_t rcIndex = releaseNodeRef.renderCommandList->GetRenderCommandCount() - 1;
525 const RenderCommandWithType& cmdRef = releaseNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
526 PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
527 const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
528 PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
529
530 const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
531 releaseNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
532
533 // inform that we are patching valid barriers
534 releaseNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
535 }
536 // acquire ownership
537 {
538 const uint32_t rcIndex = 0;
539 const RenderCommandWithType& cmdRef = acquireNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
540 PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
541 const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
542 PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
543
544 const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
545 acquireNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
546
547 // inform that we are patching valid barriers
548 acquireNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
549 }
550 }
551 }
552
CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers, const uint32_t customBarrierCount, const RenderHandle handle)553 bool CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers,
554 const uint32_t customBarrierCount, const RenderHandle handle)
555 {
556 bool needsBarrier = RenderHandleUtil::IsDynamicResource(handle);
557 if ((customBarrierCount > 0) && needsBarrier) {
558 needsBarrier = (handledCustomBarriers.count(handle) > 0) ? false : true;
559 }
560 return needsBarrier;
561 }
562 } // namespace
563
RenderGraph(GpuResourceManager& gpuResourceMgr)564 RenderGraph::RenderGraph(GpuResourceManager& gpuResourceMgr) : gpuResourceMgr_(gpuResourceMgr) {}
565
BeginFrame()566 void RenderGraph::BeginFrame()
567 {
568 stateCache_.multiRenderPassStore.renderPasses.clear();
569 stateCache_.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
570 stateCache_.multiRenderPassStore.firstBarrierPointIndex = ~0u;
571 stateCache_.multiRenderPassStore.supportOpen = false;
572 stateCache_.nodeCounter = 0u;
573 stateCache_.checkForBackbufferDependency = false;
574 stateCache_.usesSwapchainImage = false;
575 }
576
ProcessRenderNodeGraph( const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)577 void RenderGraph::ProcessRenderNodeGraph(
578 const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
579 {
580 stateCache_.checkForBackbufferDependency = checkBackbufferDependancy;
581
582 // NOTE: separate gpu buffers and gpu images due to larger structs, layers, mips in images
583 // all levels of mips and layers are not currently tracked -> needs more fine grained modifications
584 // handles:
585 // gpu images in descriptor sets, render passes, blits, and custom barriers
586 // gpu buffers in descriptor sets, and custom barriers
587
588 {
589 // remove resources that will not be tracked anymore and release available slots
590 const GpuResourceManager::StateDestroyConsumeStruct stateResetData = gpuResourceMgr_.ConsumeStateDestroyData();
591 for (const auto& handle : stateResetData.resources) {
592 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
593 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
594 if ((handleType == RenderHandleType::GPU_IMAGE) &&
595 (arrayIndex < static_cast<uint32_t>(gpuImageDataIndices_.size()))) {
596 if (const uint32_t dataIdx = gpuImageDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
597 PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuImageTracking_.size()));
598 gpuImageTracking_[dataIdx] = {}; // reset
599 gpuImageAvailableIndices_.push_back(dataIdx);
600 }
601 gpuImageDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
602 } else if (arrayIndex < static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
603 if (const uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
604 PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuBufferTracking_.size()));
605 gpuBufferTracking_[dataIdx] = {}; // reset
606 gpuBufferAvailableIndices_.push_back(dataIdx);
607 }
608 gpuBufferDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
609 }
610 }
611 }
612
613 gpuBufferDataIndices_.resize(gpuResourceMgr_.GetBufferHandleCount(), INVALID_TRACK_IDX);
614 gpuImageDataIndices_.resize(gpuResourceMgr_.GetImageHandleCount(), INVALID_TRACK_IDX);
615
616 #if (RENDER_DEV_ENABLED == 1)
617 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT || CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES ||
618 CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
619 static uint64_t debugFrame = 0;
620 debugFrame++;
621 PLUGIN_LOG_I("START RENDER GRAPH, FRAME %" PRIu64, debugFrame);
622 }
623 #endif
624
625 // need to store some of the resource for frame state in undefined state (i.e. reset on frame boundaries)
626 ProcessRenderNodeGraphNodeStores(renderNodeGraphNodeStores, stateCache_);
627
628 // store final state for next frame
629 StoreFinalBufferState();
630 StoreFinalImageState(); // processes gpuImageBackbufferState_ as well
631 }
632
633 RenderGraph::SwapchainStates RenderGraph::GetSwapchainResourceStates() const
634 {
635 return swapchainStates_;
636 }
637
638 void RenderGraph::ProcessRenderNodeGraphNodeStores(
639 const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores, StateCache& stateCache)
640 {
641 for (RenderNodeGraphNodeStore* graphStore : renderNodeGraphNodeStores) {
642 PLUGIN_ASSERT(graphStore);
643 if (!graphStore) {
644 continue;
645 }
646
647 for (uint32_t nodeIdx = 0; nodeIdx < (uint32_t)graphStore->renderNodeContextData.size(); ++nodeIdx) {
648 auto& ref = graphStore->renderNodeContextData[nodeIdx];
649 ref.submitInfo.waitForSwapchainAcquireSignal = false; // reset
650 stateCache.usesSwapchainImage = false; // reset
651
652 #if (RENDER_DEV_ENABLED == 1)
653 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
654 PLUGIN_LOG_I("FULL NODENAME %s", graphStore->renderNodeData[nodeIdx].fullName.data());
655 }
656 #endif
657
658 if (stateCache.multiRenderPassStore.supportOpen &&
659 (stateCache.multiRenderPassStore.renderPasses.size() == 0)) {
660 PLUGIN_LOG_E("invalid multi render node render pass subpass stitching");
661 // NOTE: add more error handling and invalidate render command lists
662 }
663 stateCache.multiRenderPassStore.supportOpen = ref.renderCommandList->HasMultiRenderCommandListSubpasses();
664 array_view<const RenderCommandWithType> cmdListRef = ref.renderCommandList->GetRenderCommands();
665 // go through commands that affect or need transitions and barriers
666 ProcessRenderNodeCommands(cmdListRef, nodeIdx, ref, stateCache);
667
668 // needs backbuffer/swapchain wait
669 if (stateCache.usesSwapchainImage) {
670 ref.submitInfo.waitForSwapchainAcquireSignal = true;
671 }
672
673 // patch gpu resource queue transfers
674 if (!currNodeGpuResourceTransfers_.empty()) {
675 PatchGpuResourceQueueTransfers(graphStore->renderNodeContextData, currNodeGpuResourceTransfers_);
676 // clear for next use
677 currNodeGpuResourceTransfers_.clear();
678 }
679
680 stateCache_.nodeCounter++;
681 }
682 }
683 }
684
ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef, const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)685 void RenderGraph::ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef,
686 const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)
687 {
688 for (uint32_t listIdx = 0; listIdx < (uint32_t)cmdListRef.size(); ++listIdx) {
689 auto& cmdRef = cmdListRef[listIdx];
690
691 #if (RENDER_DEV_ENABLED == 1)
692 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
693 DebugPrintCommandListCommand(cmdRef, gpuResourceMgr_);
694 }
695 #endif
696
697 // most of the commands are handled within BarrierPoint
698 switch (cmdRef.type) {
699 case RenderCommandType::BARRIER_POINT:
700 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc), stateCache);
701 break;
702
703 case RenderCommandType::BEGIN_RENDER_PASS:
704 RenderCommand(
705 nodeIdx, listIdx, ref, *static_cast<RenderCommandBeginRenderPass*>(cmdRef.rc), stateCache);
706 break;
707
708 case RenderCommandType::END_RENDER_PASS:
709 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandEndRenderPass*>(cmdRef.rc), stateCache);
710 break;
711
712 case RenderCommandType::NEXT_SUBPASS:
713 case RenderCommandType::DRAW:
714 case RenderCommandType::DRAW_INDIRECT:
715 case RenderCommandType::DISPATCH:
716 case RenderCommandType::DISPATCH_INDIRECT:
717 case RenderCommandType::BIND_PIPELINE:
718 case RenderCommandType::BIND_VERTEX_BUFFERS:
719 case RenderCommandType::BIND_INDEX_BUFFER:
720 case RenderCommandType::COPY_BUFFER:
721 case RenderCommandType::COPY_BUFFER_IMAGE:
722 case RenderCommandType::COPY_IMAGE:
723 case RenderCommandType::BIND_DESCRIPTOR_SETS:
724 case RenderCommandType::PUSH_CONSTANT:
725 case RenderCommandType::BLIT_IMAGE:
726 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE:
727 case RenderCommandType::CLEAR_COLOR_IMAGE:
728 case RenderCommandType::DYNAMIC_STATE_VIEWPORT:
729 case RenderCommandType::DYNAMIC_STATE_SCISSOR:
730 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH:
731 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS:
732 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS:
733 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS:
734 case RenderCommandType::DYNAMIC_STATE_STENCIL:
735 case RenderCommandType::WRITE_TIMESTAMP:
736 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
737 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
738 case RenderCommandType::UNDEFINED:
739 default: {
740 // nop
741 break;
742 }
743 }
744 } // end command for
745 }
746
StoreFinalBufferState()747 void RenderGraph::StoreFinalBufferState()
748 {
749 for (auto& ref : gpuBufferTracking_) {
750 if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
751 ref = {};
752 continue;
753 }
754 if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
755 // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
756 const RenderHandle handle = ref.resource.handle;
757 ref = {};
758 ref.resource.handle = handle;
759 }
760 // need to reset per frame variables for all buffers (so we do not try to patch or debug from previous
761 // frames)
762 ref.prevRc = {};
763 ref.prevRenderNodeIndex = { ~0u };
764 }
765 }
766
StoreFinalImageState()767 void RenderGraph::StoreFinalImageState()
768 {
769 swapchainStates_ = {}; // reset
770
771 #if (RENDER_DEV_ENABLED == 1)
772 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
773 PLUGIN_LOG_I("end_frame image_state:");
774 }
775 #endif
776 for (auto& ref : gpuImageTracking_) {
777 // if resource is not dynamic, we do not track and care
778 if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
779 ref = {};
780 continue;
781 }
782 // handle automatic presentation layout
783 if (stateCache_.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(ref.resource.handle)) {
784 if (ref.prevRc.type == RenderCommandType::BEGIN_RENDER_PASS) {
785 RenderCommandBeginRenderPass& beginRenderPass =
786 *static_cast<RenderCommandBeginRenderPass*>(ref.prevRc.rc);
787 PatchRenderPassFinalLayout(
788 ref.resource.handle, ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC, beginRenderPass, ref);
789 }
790 // NOTE: currently we handle automatic presentation layout in vulkan backend if not in render pass
791 // store final state for backbuffer
792 // currently we only swapchains if they are really in use in this frame
793 const uint32_t flags = ref.state.accessFlags | ref.state.shaderStageFlags | ref.state.pipelineStageFlags;
794 if (flags != 0) {
795 swapchainStates_.swapchains.push_back({ ref.resource.handle, ref.state, ref.resource.imageLayout });
796 }
797 }
798 #if (RENDER_DEV_ENABLED == 1)
799 // print before reset for next frame
800 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
801 DebugPrintImageState(gpuResourceMgr_, ref);
802 }
803 #endif
804 // shallow resources are not tracked
805 // they are always in undefined state in the beging of the frame
806 if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
807 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(ref.resource.handle);
808 // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
809 const RenderHandle handle = ref.resource.handle;
810 ref = {};
811 ref.resource.handle = handle;
812 if (addMips) {
813 PLUGIN_ASSERT(!ref.additionalState.layouts);
814 ref.additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
815 }
816 }
817
818 // need to reset per frame variables for all images (so we do not try to patch from previous frames)
819 ref.prevRc = {};
820 ref.prevRenderNodeIndex = { ~0u };
821 }
822 }
823
RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)824 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
825 RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)
826 {
827 // update layouts for attachments to gpu image state
828 BeginRenderPassParameters params { rc, stateCache, { RenderCommandType::BEGIN_RENDER_PASS, &rc } };
829
830 PLUGIN_ASSERT(rc.renderPassDesc.subpassCount > 0);
831
832 const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
833 if (hasRenderPassDependency) { // stitch render pass subpasses
834 BeginRenderPassHandleDependency(params, commandListCommandIndex, nodeData);
835 }
836
837 const GpuQueue gpuQueue = nodeData.renderCommandList->GetGpuQueue();
838
839 auto finalImageLayouts =
840 array_view(rc.imageLayouts.attachmentFinalLayouts, countof(rc.imageLayouts.attachmentFinalLayouts));
841
842 BeginRenderPassUpdateImageStates(params, gpuQueue, finalImageLayouts, renderNodeIndex);
843
844 for (uint32_t subpassIdx = 0; subpassIdx < rc.renderPassDesc.subpassCount; ++subpassIdx) {
845 const auto& subpassRef = rc.subpasses[subpassIdx];
846 const auto& subpassResourceStatesRef = rc.subpassResourceStates[subpassIdx];
847
848 BeginRenderPassUpdateSubpassImageStates(
849 array_view(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount), rc.renderPassDesc,
850 subpassResourceStatesRef, finalImageLayouts, stateCache);
851
852 BeginRenderPassUpdateSubpassImageStates(
853 array_view(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount), rc.renderPassDesc,
854 subpassResourceStatesRef, finalImageLayouts, stateCache);
855
856 BeginRenderPassUpdateSubpassImageStates(
857 array_view(subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount), rc.renderPassDesc,
858 subpassResourceStatesRef, finalImageLayouts, stateCache);
859
860 if (subpassRef.depthAttachmentCount == 1u) {
861 BeginRenderPassUpdateSubpassImageStates(
862 array_view(&subpassRef.depthAttachmentIndex, subpassRef.depthAttachmentCount), rc.renderPassDesc,
863 subpassResourceStatesRef, finalImageLayouts, stateCache);
864 if (subpassRef.depthResolveAttachmentCount == 1) {
865 BeginRenderPassUpdateSubpassImageStates(
866 array_view(&subpassRef.depthResolveAttachmentIndex, subpassRef.depthResolveAttachmentCount),
867 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts, stateCache);
868 }
869 }
870 if (subpassRef.fragmentShadingRateAttachmentCount == 1u) {
871 BeginRenderPassUpdateSubpassImageStates(array_view(&subpassRef.fragmentShadingRateAttachmentIndex,
872 subpassRef.fragmentShadingRateAttachmentCount),
873 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts, stateCache);
874 }
875 }
876
877 if (hasRenderPassDependency) { // stitch render pass subpasses
878 if (rc.subpassStartIndex > 0) {
879 // stitched to behave as a nextSubpass() and not beginRenderPass()
880 rc.beginType = RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN;
881 }
882 const bool finalSubpass = (rc.subpassStartIndex == rc.renderPassDesc.subpassCount - 1);
883 if (finalSubpass) {
884 UpdateMultiRenderCommandListRenderPasses(stateCache.multiRenderPassStore);
885 // multiRenderPassStore cleared in EndRenderPass
886 }
887 }
888 #if (RENDER_DEV_ENABLED == 1)
889 if (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
890 DebugRenderPassLayoutPrint(gpuResourceMgr_, rc);
891 }
892 #endif
893 }
894
BeginRenderPassHandleDependency( BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)895 void RenderGraph::BeginRenderPassHandleDependency(
896 BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)
897 {
898 params.stateCache.multiRenderPassStore.renderPasses.push_back(¶ms.rc);
899 // store the first begin render pass
900 params.rpForCmdRef = { RenderCommandType::BEGIN_RENDER_PASS,
901 params.stateCache.multiRenderPassStore.renderPasses[0] };
902
903 if (params.rc.subpassStartIndex == 0) { // store the first render pass barrier point
904 // barrier point must be previous command
905 PLUGIN_ASSERT(commandListCommandIndex >= 1);
906 const uint32_t prevCommandIndex = commandListCommandIndex - 1;
907
908 const RenderCommandWithType& barrierPointCmdRef =
909 nodeData.renderCommandList->GetRenderCommands()[prevCommandIndex];
910 PLUGIN_ASSERT(barrierPointCmdRef.type == RenderCommandType::BARRIER_POINT);
911 PLUGIN_ASSERT(static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc));
912
913 params.stateCache.multiRenderPassStore.firstRenderPassBarrierList = nodeData.renderBarrierList.get();
914 params.stateCache.multiRenderPassStore.firstBarrierPointIndex =
915 static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc)->barrierPointIndex;
916 }
917 }
918
BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue, array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)919 void RenderGraph::BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue,
920 array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)
921 {
922 auto& initialImageLayouts = params.rc.imageLayouts.attachmentInitialLayouts;
923 const auto& attachmentHandles = params.rc.renderPassDesc.attachmentHandles;
924 auto& attachments = params.rc.renderPassDesc.attachments;
925 auto& attachmentInputResourceStates = params.rc.inputResourceStates;
926
927 for (uint32_t attachmentIdx = 0; attachmentIdx < params.rc.renderPassDesc.attachmentCount; ++attachmentIdx) {
928 const RenderHandle handle = attachmentHandles[attachmentIdx];
929 // NOTE: invalidate invalid handle commands already in render command list
930 if (!RenderHandleUtil::IsGpuImage(handle)) {
931 #ifdef _DEBUG
932 PLUGIN_LOG_E("invalid handle in render node graph");
933 #endif
934 continue;
935 }
936 auto& stateRef = GetImageResourceStateRef(handle, gpuQueue);
937 ImageLayout imgLayout = stateRef.resource.imageLayout;
938
939 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
940 // image layout is undefined if automatic barriers have been disabled
941 if (params.rc.enableAutomaticLayoutChanges) {
942 const RenderPassDesc::AttachmentDesc& attachmentDesc = attachments[attachmentIdx];
943 if (addMips && (attachmentDesc.mipLevel < RenderGraph::MAX_MIP_STATE_COUNT)) {
944 if (stateRef.additionalState.layouts) {
945 imgLayout = stateRef.additionalState.layouts[attachmentDesc.mipLevel];
946 } else {
947 #if (RENDER_VALIDATION_ENABLED == 1)
948 PLUGIN_LOG_ONCE_E(to_hex(handle.id), "mip layouts missing");
949 #endif
950 }
951 }
952
953 initialImageLayouts[attachmentIdx] = imgLayout;
954 }
955 // undefined layout with load_op_load -> we modify to dont_care (and remove validation warning)
956 if ((imgLayout == ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED) &&
957 (attachments[attachmentIdx].loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_LOAD)) {
958 // dont care (user needs to be sure what is wanted, i.e. in first frame one should clear)
959 attachments[attachmentIdx].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
960 }
961 finalImageLayouts[attachmentIdx] = imgLayout;
962 attachmentInputResourceStates.states[attachmentIdx] = stateRef.state;
963 attachmentInputResourceStates.layouts[attachmentIdx] = imgLayout;
964
965 // store render pass for final layout patching
966 stateRef.prevRc = params.rpForCmdRef;
967 stateRef.prevRenderNodeIndex = renderNodeIndex;
968
969 // flag for backbuffer use
970 if (params.stateCache.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(handle)) {
971 params.stateCache.usesSwapchainImage = true;
972 }
973 }
974 }
975
BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices, const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef, array_view<ImageLayout> finalImageLayouts, StateCache& stateCache)976 void RenderGraph::BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,
977 const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef,
978 array_view<ImageLayout> finalImageLayouts, StateCache& stateCache)
979 {
980 for (const uint32_t attachmentIndex : attatchmentIndices) {
981 // NOTE: handle invalid commands already in render command list and invalidate draws etc.
982 PLUGIN_ASSERT(attachmentIndex < renderPassDesc.attachmentCount);
983 const RenderHandle handle = renderPassDesc.attachmentHandles[attachmentIndex];
984 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
985 const GpuResourceState& refState = subpassResourceStatesRef.states[attachmentIndex];
986 const ImageLayout& refImgLayout = subpassResourceStatesRef.layouts[attachmentIndex];
987 // NOTE: we should support non dynamicity and GENERAL
988
989 finalImageLayouts[attachmentIndex] = refImgLayout;
990 auto& ref = GetImageResourceStateRef(handle, refState.gpuQueue);
991 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
992
993 ref.state = refState;
994 ref.resource.handle = handle;
995 ref.resource.imageLayout = refImgLayout;
996 if (addMips) {
997 const RenderPassDesc::AttachmentDesc& attachmentDesc = renderPassDesc.attachments[attachmentIndex];
998 const BindableImage image {
999 handle,
1000 attachmentDesc.mipLevel,
1001 attachmentDesc.layer,
1002 refImgLayout,
1003 RenderHandle {},
1004 };
1005 ModifyAdditionalImageState(image, ref.additionalState);
1006 }
1007 }
1008 }
1009
1010 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1011 const RenderNodeContextData& nodeData, RenderCommandEndRenderPass& rc, StateCache& stateCache)
1012 {
1013 const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
1014 if (hasRenderPassDependency) {
1015 const bool finalSubpass = (rc.subpassCount == (uint32_t)stateCache.multiRenderPassStore.renderPasses.size());
1016 if (finalSubpass) {
1017 if (rc.subpassStartIndex != (rc.subpassCount - 1)) {
1018 PLUGIN_LOG_E("RenderGraph: error in multi render node render pass subpass ending");
1019 // NOTE: add more error handling and invalidate render command lists
1020 }
1021 rc.endType = RenderPassEndType::END_RENDER_PASS;
1022 stateCache.multiRenderPassStore.renderPasses.clear();
1023 stateCache.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
1024 stateCache.multiRenderPassStore.firstBarrierPointIndex = ~0u;
1025 stateCache.multiRenderPassStore.supportOpen = false;
1026 } else {
1027 rc.endType = RenderPassEndType::END_SUBPASS;
1028 }
1029 }
1030 }
1031
1032 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1033 RenderNodeContextData& nodeData, RenderCommandBarrierPoint& rc, StateCache& stateCache)
1034 {
1035 // go through required descriptors for current upcoming event
1036 const auto& customBarrierListRef = nodeData.renderCommandList->GetCustomBarriers();
1037 const auto& cmdListRef = nodeData.renderCommandList->GetRenderCommands();
1038 const auto& allDescriptorSetHandlesForBarriers = nodeData.renderCommandList->GetDescriptorSetHandles();
1039 const auto& nodeDescriptorSetMgrRef = *nodeData.nodeContextDescriptorSetMgr;
1040
1041 parameterCachePools_.combinedBarriers.clear();
1042 parameterCachePools_.handledCustomBarriers.clear();
1043 ParameterCache parameters { parameterCachePools_.combinedBarriers, parameterCachePools_.handledCustomBarriers,
1044 rc.customBarrierCount, rc.vertexIndexBarrierCount, rc.indirectBufferBarrierCount, renderNodeIndex,
1045 nodeData.renderCommandList->GetGpuQueue(), { RenderCommandType::BARRIER_POINT, &rc }, stateCache };
1046 // first check custom barriers
1047 if (parameters.customBarrierCount > 0) {
1048 HandleCustomBarriers(parameters, rc.customBarrierIndexBegin, customBarrierListRef);
1049 }
1050 // then vertex / index buffer barriers in the barrier point before render pass
1051 if (parameters.vertexInputBarrierCount > 0) {
1052 PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1053 HandleVertexInputBufferBarriers(parameters, rc.vertexIndexBarrierIndexBegin,
1054 nodeData.renderCommandList->GetRenderpassVertexInputBufferBarriers());
1055 }
1056 if (parameters.indirectBufferBarrierCount > 0U) {
1057 PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1058 HandleRenderpassIndirectBufferBarriers(parameters, rc.indirectBufferBarrierIndexBegin,
1059 nodeData.renderCommandList->GetRenderpassIndirectBufferBarriers());
1060 }
1061
1062 // in barrier point the next render command is known for which the barrier is needed
1063 if (rc.renderCommandType == RenderCommandType::CLEAR_COLOR_IMAGE) {
1064 HandleClearImage(parameters, commandListCommandIndex, cmdListRef);
1065 } else if (rc.renderCommandType == RenderCommandType::BLIT_IMAGE) {
1066 HandleBlitImage(parameters, commandListCommandIndex, cmdListRef);
1067 } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER) {
1068 HandleCopyBuffer(parameters, commandListCommandIndex, cmdListRef);
1069 } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER_IMAGE) {
1070 HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef);
1071 } else if (rc.renderCommandType == RenderCommandType::COPY_IMAGE) {
1072 HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef); // NOTE: handles image to image
1073 } else { // descriptor sets
1074 if (rc.renderCommandType == RenderCommandType::DISPATCH_INDIRECT) {
1075 HandleDispatchIndirect(parameters, commandListCommandIndex, cmdListRef);
1076 }
1077 const uint32_t descriptorSetHandleBeginIndex = rc.descriptorSetHandleIndexBegin;
1078 const uint32_t descriptorSetHandleEndIndex = descriptorSetHandleBeginIndex + rc.descriptorSetHandleCount;
1079 const uint32_t descriptorSetHandleMaxIndex =
1080 Math::min(descriptorSetHandleEndIndex, static_cast<uint32_t>(allDescriptorSetHandlesForBarriers.size()));
1081 const auto descriptorSetHandlesForBarriers =
1082 array_view(allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleBeginIndex,
1083 allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleMaxIndex);
1084 HandleDescriptorSets(parameters, descriptorSetHandlesForBarriers, nodeDescriptorSetMgrRef);
1085 }
1086
1087 if (!parameters.combinedBarriers.empty()) {
1088 // use first render pass barrier point with following subpasses
1089 // firstRenderPassBarrierPoint is null for the first subpass
1090 const bool renderPassHasDependancy = stateCache.multiRenderPassStore.supportOpen;
1091 if (renderPassHasDependancy && stateCache.multiRenderPassStore.firstRenderPassBarrierList) {
1092 PLUGIN_ASSERT(!stateCache.multiRenderPassStore.renderPasses.empty());
1093 stateCache.multiRenderPassStore.firstRenderPassBarrierList->AddBarriersToBarrierPoint(
1094 rc.barrierPointIndex, parameters.combinedBarriers);
1095 } else {
1096 nodeData.renderBarrierList->AddBarriersToBarrierPoint(rc.barrierPointIndex, parameters.combinedBarriers);
1097 }
1098 }
1099 #if (RENDER_DEV_ENABLED == 1)
1100 if (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
1101 DebugBarrierPrint(gpuResourceMgr_, parameters.combinedBarriers);
1102 }
1103 #endif
1104 }
1105
UpdateBufferResourceState( RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)1106 inline void RenderGraph::UpdateBufferResourceState(
1107 RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1108 {
1109 stateRef.resource.handle = cb.resourceHandle;
1110 stateRef.state.shaderStageFlags = 0;
1111 stateRef.state.accessFlags = cb.dst.accessFlags;
1112 stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1113 stateRef.state.gpuQueue = params.gpuQueue;
1114 stateRef.prevRc = params.rcWithType;
1115 stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1116 }
1117
UpdateImageResourceState( RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)1118 inline void RenderGraph::UpdateImageResourceState(
1119 RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1120 {
1121 stateRef.resource.handle = cb.resourceHandle;
1122 stateRef.state.shaderStageFlags = 0;
1123 stateRef.state.accessFlags = cb.dst.accessFlags;
1124 stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1125 stateRef.state.gpuQueue = params.gpuQueue;
1126 stateRef.prevRc = params.rcWithType;
1127 stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1128 }
1129
HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin, const array_view<const CommandBarrier>& customBarrierListRef)1130 void RenderGraph::HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1131 const array_view<const CommandBarrier>& customBarrierListRef)
1132 {
1133 params.handledCustomBarriers.reserve(params.customBarrierCount);
1134 PLUGIN_ASSERT(barrierIndexBegin + params.customBarrierCount <= customBarrierListRef.size());
1135 for (auto begin = (customBarrierListRef.begin() + barrierIndexBegin),
1136 end = Math::min(customBarrierListRef.end(), begin + params.customBarrierCount);
1137 begin != end; ++begin) {
1138 // add a copy and modify if needed
1139 auto& cb = params.combinedBarriers.emplace_back(*begin);
1140
1141 // NOTE: undefined type is for non-resource memory/pipeline barriers
1142 const RenderHandleType type = RenderHandleUtil::GetHandleType(cb.resourceHandle);
1143 const bool isDynamicTrack = RenderHandleUtil::IsDynamicResource(cb.resourceHandle);
1144 PLUGIN_ASSERT((type == RenderHandleType::UNDEFINED) || (type == RenderHandleType::GPU_BUFFER) ||
1145 (type == RenderHandleType::GPU_IMAGE));
1146 if (type == RenderHandleType::GPU_BUFFER) {
1147 if (isDynamicTrack) {
1148 auto& stateRef = GetBufferResourceStateRef(cb.resourceHandle, params.gpuQueue);
1149 UpdateBufferResourceState(stateRef, params, cb);
1150 }
1151 params.handledCustomBarriers[cb.resourceHandle] = 0;
1152 } else if (type == RenderHandleType::GPU_IMAGE) {
1153 if (isDynamicTrack) {
1154 const bool isAddMips = RenderHandleUtil::IsDynamicAdditionalStateResource(cb.resourceHandle);
1155 auto& stateRef = GetImageResourceStateRef(cb.resourceHandle, params.gpuQueue);
1156 if (cb.src.optionalImageLayout == CORE_IMAGE_LAYOUT_MAX_ENUM) {
1157 uint32_t mipLevel = 0U;
1158 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
1159 ImageLayout srcImageLayout = stateRef.resource.imageLayout;
1160 if (isAddMips) {
1161 const uint32_t srcMip = cb.src.optionalImageSubresourceRange.baseMipLevel;
1162 const uint32_t dstMip = cb.dst.optionalImageSubresourceRange.baseMipLevel;
1163 if ((srcMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
1164 (dstMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
1165 if (dstMip < RenderGraph::MAX_MIP_STATE_COUNT) {
1166 mipLevel = dstMip;
1167 mipCount = 1U;
1168 } else {
1169 mipLevel = srcMip;
1170 // all mip levels
1171 }
1172 if (stateRef.additionalState.layouts) {
1173 srcImageLayout = stateRef.additionalState.layouts[mipLevel];
1174 } else {
1175 #if (RENDER_VALIDATION_ENABLED == 1)
1176 PLUGIN_LOG_ONCE_E(to_hex(cb.resourceHandle.id), "mip layouts missing");
1177 #endif
1178 }
1179 }
1180 }
1181 cb.src.accessFlags = stateRef.state.accessFlags;
1182 cb.src.pipelineStageFlags =
1183 stateRef.state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1184 cb.src.optionalImageLayout = srcImageLayout;
1185 cb.src.optionalImageSubresourceRange = { 0, mipLevel, mipCount, 0u,
1186 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS };
1187 }
1188 UpdateImageResourceState(stateRef, params, cb);
1189 stateRef.resource.imageLayout = cb.dst.optionalImageLayout;
1190 if (isAddMips) {
1191 const BindableImage image {
1192 cb.resourceHandle,
1193 cb.dst.optionalImageSubresourceRange.baseMipLevel,
1194 cb.dst.optionalImageSubresourceRange.baseArrayLayer,
1195 cb.dst.optionalImageLayout,
1196 RenderHandle {},
1197 };
1198 ModifyAdditionalImageState(image, stateRef.additionalState);
1199 }
1200 }
1201 params.handledCustomBarriers[cb.resourceHandle] = 0;
1202 }
1203 }
1204 }
1205
HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin, const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)1206 void RenderGraph::HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1207 const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)
1208 {
1209 for (uint32_t idx = 0; idx < params.vertexInputBarrierCount; ++idx) {
1210 const uint32_t barrierIndex = barrierIndexBegin + idx;
1211 PLUGIN_ASSERT(barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size());
1212 if (barrierIndex < (uint32_t)vertexInputBufferBarrierListRef.size()) {
1213 const VertexBuffer& vbInput = vertexInputBufferBarrierListRef[barrierIndex];
1214 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1215 CORE_ACCESS_INDEX_READ_BIT | CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1216 CORE_PIPELINE_STAGE_VERTEX_INPUT_BIT, params.gpuQueue };
1217 UpdateStateAndCreateBarriersGpuBuffer(
1218 resourceState, { vbInput.bufferHandle, vbInput.bufferOffset, vbInput.byteSize }, params);
1219 }
1220 }
1221 }
1222
1223 void RenderGraph::HandleRenderpassIndirectBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1224 const array_view<const VertexBuffer>& indirectBufferBarrierListRef)
1225 {
1226 for (uint32_t idx = 0; idx < params.indirectBufferBarrierCount; ++idx) {
1227 const uint32_t barrierIndex = barrierIndexBegin + idx;
1228 PLUGIN_ASSERT(barrierIndex < (uint32_t)indirectBufferBarrierListRef.size());
1229 if (barrierIndex < (uint32_t)indirectBufferBarrierListRef.size()) {
1230 const VertexBuffer& ib = indirectBufferBarrierListRef[barrierIndex];
1231 const bool needsArgsBarrier =
1232 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ib.bufferHandle);
1233 if (needsArgsBarrier) {
1234 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1235 CORE_ACCESS_INDIRECT_COMMAND_READ_BIT, CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue };
1236 UpdateStateAndCreateBarriersGpuBuffer(
1237 resourceState, { ib.bufferHandle, ib.bufferOffset, ib.byteSize }, params);
1238 }
1239 }
1240 }
1241 }
1242
1243 void RenderGraph::HandleClearImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1244 const array_view<const RenderCommandWithType>& cmdListRef)
1245 {
1246 const uint32_t nextListIdx = commandListCommandIndex + 1;
1247 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1248 const auto& nextCmdRef = cmdListRef[nextListIdx];
1249 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::CLEAR_COLOR_IMAGE);
1250
1251 const RenderCommandClearColorImage& nextRc = *static_cast<RenderCommandClearColorImage*>(nextCmdRef.rc);
1252
1253 const bool needsBarrier =
1254 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.handle);
1255 if (needsBarrier) {
1256 BindableImage bRes = {};
1257 bRes.handle = nextRc.handle;
1258 bRes.imageLayout = nextRc.imageLayout;
1259 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1260 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1261 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1262 }
1263 }
1264
1265 void RenderGraph::HandleBlitImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1266 const array_view<const RenderCommandWithType>& cmdListRef)
1267 {
1268 const uint32_t nextListIdx = commandListCommandIndex + 1;
1269 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1270 const auto& nextCmdRef = cmdListRef[nextListIdx];
1271 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BLIT_IMAGE);
1272
1273 const RenderCommandBlitImage& nextRc = *static_cast<RenderCommandBlitImage*>(nextCmdRef.rc);
1274
1275 const bool needsSrcBarrier =
1276 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1277 if (needsSrcBarrier) {
1278 BindableImage bRes = {};
1279 bRes.handle = nextRc.srcHandle;
1280 bRes.imageLayout = nextRc.srcImageLayout;
1281 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1282 GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1283 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1284 }
1285
1286 const bool needsDstBarrier =
1287 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1288 if (needsDstBarrier) {
1289 BindableImage bRes = {};
1290 bRes.handle = nextRc.dstHandle;
1291 bRes.imageLayout = nextRc.dstImageLayout;
1292 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1293 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1294 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1295 }
1296 }
1297
1298 void RenderGraph::HandleCopyBuffer(ParameterCache& params, const uint32_t& commandListCommandIndex,
1299 const array_view<const RenderCommandWithType>& cmdListRef)
1300 {
1301 const uint32_t nextListIdx = commandListCommandIndex + 1;
1302 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1303 const auto& nextCmdRef = cmdListRef[nextListIdx];
1304 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_BUFFER);
1305
1306 const RenderCommandCopyBuffer& nextRc = *static_cast<RenderCommandCopyBuffer*>(nextCmdRef.rc);
1307
1308 const bool needsSrcBarrier =
1309 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1310 if (needsSrcBarrier) {
1311 const BindableBuffer bRes = { nextRc.srcHandle, nextRc.bufferCopy.srcOffset, nextRc.bufferCopy.size };
1312 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1313 GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1314 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1315 }
1316
1317 const bool needsDstBarrier =
1318 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1319 if (needsDstBarrier) {
1320 const BindableBuffer bRes = { nextRc.dstHandle, nextRc.bufferCopy.dstOffset, nextRc.bufferCopy.size };
1321 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1322 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1323 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1324 }
1325 }
1326
1327 void RenderGraph::HandleCopyBufferImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1328 const array_view<const RenderCommandWithType>& cmdListRef)
1329 {
1330 const uint32_t nextListIdx = commandListCommandIndex + 1;
1331 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1332 const auto& nextCmdRef = cmdListRef[nextListIdx];
1333 PLUGIN_ASSERT((nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) ||
1334 (nextCmdRef.type == RenderCommandType::COPY_IMAGE));
1335
1336 // NOTE: two different command types supported
1337 RenderHandle srcHandle;
1338 RenderHandle dstHandle;
1339 ImageSubresourceLayers srcImgLayers;
1340 ImageSubresourceLayers dstImgLayers;
1341 if (nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) {
1342 const RenderCommandCopyBufferImage& nextRc = *static_cast<RenderCommandCopyBufferImage*>(nextCmdRef.rc);
1343 PLUGIN_ASSERT(nextRc.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1344 srcHandle = nextRc.srcHandle;
1345 dstHandle = nextRc.dstHandle;
1346 srcImgLayers = nextRc.bufferImageCopy.imageSubresource;
1347 dstImgLayers = nextRc.bufferImageCopy.imageSubresource;
1348 } else if (nextCmdRef.type == RenderCommandType::COPY_IMAGE) {
1349 const RenderCommandCopyImage& nextRc = *static_cast<RenderCommandCopyImage*>(nextCmdRef.rc);
1350 srcHandle = nextRc.srcHandle;
1351 dstHandle = nextRc.dstHandle;
1352 srcImgLayers = nextRc.imageCopy.srcSubresource;
1353 dstImgLayers = nextRc.imageCopy.dstSubresource;
1354 }
1355
1356 const bool needsSrcBarrier =
1357 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, srcHandle);
1358 if (needsSrcBarrier) {
1359 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(srcHandle);
1360 PLUGIN_UNUSED(handleType);
1361 PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1362 if (handleType == RenderHandleType::GPU_BUFFER) {
1363 BindableBuffer bRes;
1364 bRes.handle = srcHandle;
1365 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1366 GpuResourceState {
1367 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1368 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1369 } else {
1370 BindableImage bRes;
1371 bRes.handle = srcHandle;
1372 bRes.mip = srcImgLayers.mipLevel;
1373 bRes.layer = srcImgLayers.baseArrayLayer;
1374 bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1375 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1376 GpuResourceState {
1377 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1378 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1379 }
1380 }
1381
1382 const bool needsDstBarrier =
1383 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, dstHandle);
1384 if (needsDstBarrier) {
1385 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(dstHandle);
1386 PLUGIN_UNUSED(handleType);
1387 PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1388 if (handleType == RenderHandleType::GPU_BUFFER) {
1389 BindableBuffer bRes;
1390 bRes.handle = dstHandle;
1391 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1392 GpuResourceState {
1393 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1394 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1395 } else {
1396 BindableImage bRes;
1397 bRes.handle = dstHandle;
1398 bRes.mip = dstImgLayers.mipLevel;
1399 bRes.layer = dstImgLayers.baseArrayLayer;
1400 bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1401 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1402 GpuResourceState {
1403 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1404 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1405 }
1406 }
1407 }
1408
1409 void RenderGraph::HandleDispatchIndirect(ParameterCache& params, const uint32_t& commandListCommandIndex,
1410 const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1411 {
1412 const uint32_t nextListIdx = commandListCommandIndex + 1;
1413 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1414 const auto& nextCmdRef = cmdListRef[nextListIdx];
1415 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::DISPATCH_INDIRECT);
1416
1417 const auto& nextRc = *static_cast<RenderCommandDispatchIndirect*>(nextCmdRef.rc);
1418
1419 const bool needsArgsBarrier =
1420 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.argsHandle);
1421 if (needsArgsBarrier) {
1422 const BindableBuffer bRes = { nextRc.argsHandle, nextRc.offset, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1423 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1424 GpuResourceState { CORE_SHADER_STAGE_COMPUTE_BIT, CORE_ACCESS_INDIRECT_COMMAND_READ_BIT,
1425 CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue },
1426 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1427 }
1428 }
1429
1430 void RenderGraph::HandleDescriptorSets(ParameterCache& params,
1431 const array_view<const RenderHandle>& descriptorSetHandlesForBarriers,
1432 const NodeContextDescriptorSetManager& nodeDescriptorSetMgrRef)
1433 {
1434 for (const RenderHandle descriptorSetHandle : descriptorSetHandlesForBarriers) {
1435 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET);
1436
1437 const auto bindingResources = nodeDescriptorSetMgrRef.GetCpuDescriptorSetData(descriptorSetHandle);
1438 const auto& buffers = bindingResources.buffers;
1439 const auto& images = bindingResources.images;
1440 for (const auto& ref : buffers) {
1441 const uint32_t descriptorCount = ref.binding.descriptorCount;
1442 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1443 if (descriptorCount == 0) {
1444 continue;
1445 }
1446 const uint32_t arrayOffset = ref.arrayOffset;
1447 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1448 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1449 // first is the ref, starting from 1 we use array offsets
1450 const auto& bRes = (idx == 0) ? ref : buffers[arrayOffset + idx - 1];
1451 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ref.resource.handle)) {
1452 UpdateStateAndCreateBarriersGpuBuffer(bRes.state, bRes.resource, params);
1453 }
1454 }
1455 }
1456 for (const auto& ref : images) {
1457 const uint32_t descriptorCount = ref.binding.descriptorCount;
1458 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1459 if (descriptorCount == 0) {
1460 continue;
1461 }
1462 const uint32_t arrayOffset = ref.arrayOffset;
1463 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1464 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1465 // first is the ref, starting from 1 we use array offsets
1466 const auto& bRes = (idx == 0) ? ref : images[arrayOffset + idx - 1];
1467 if (CheckForBarrierNeed(
1468 params.handledCustomBarriers, params.customBarrierCount, bRes.resource.handle)) {
1469 UpdateStateAndCreateBarriersGpuImage(bRes.state, bRes.resource, params);
1470 }
1471 }
1472 }
1473 } // end for
1474 }
1475
1476 void RenderGraph::UpdateStateAndCreateBarriersGpuImage(
1477 const GpuResourceState& state, const BindableImage& res, RenderGraph::ParameterCache& params)
1478 {
1479 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1480 if (arrayIndex >= static_cast<uint32_t>(gpuImageDataIndices_.size())) {
1481 return;
1482 }
1483
1484 auto& ref = GetImageResourceStateRef(res.handle, state.gpuQueue);
1485 // NOTE: we previous patched the final render pass layouts here
1486 // ATM: we only path the swapchain image if needed
1487
1488 const GpuResourceState& prevState = ref.state;
1489 const BindableImage& prevImage = ref.resource;
1490 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle);
1491 const ResourceBarrier prevStateRb = addMips ? GetSrcImageBarrierMips(prevState, prevImage, res, ref.additionalState)
1492 : GetSrcImageBarrier(prevState, prevImage);
1493
1494 const bool layoutChanged = (prevStateRb.optionalImageLayout != res.imageLayout);
1495 const bool accessFlagsChanged = (prevStateRb.accessFlags != state.accessFlags);
1496 const bool writeTarget = (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS);
1497 const bool inputAttachment = (state.accessFlags == CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT);
1498 // input attachments are handled with render passes and not with barriers
1499 if ((layoutChanged || accessFlagsChanged || writeTarget) && (!inputAttachment)) {
1500 if ((prevState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1501 (prevState.gpuQueue.type != state.gpuQueue.type)) {
1502 PLUGIN_ASSERT(state.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1503
1504 PLUGIN_ASSERT(ref.prevRenderNodeIndex != params.renderNodeIndex);
1505 currNodeGpuResourceTransfers_.push_back(RenderGraph::GpuQueueTransferState {
1506 res.handle, ref.prevRenderNodeIndex, params.renderNodeIndex, prevImage.imageLayout, res.imageLayout });
1507 } else {
1508 const ResourceBarrier dstImageBarrier =
1509 addMips ? GetDstImageBarrierMips(state, prevImage, res, ref.additionalState)
1510 : GetDstImageBarrier(state, res);
1511 params.combinedBarriers.push_back(
1512 CommandBarrier { res.handle, prevStateRb, prevState.gpuQueue, dstImageBarrier, params.gpuQueue });
1513 }
1514
1515 ref.state = state;
1516 ref.resource = res;
1517 ref.prevRc = params.rcWithType;
1518 ref.prevRenderNodeIndex = params.renderNodeIndex;
1519 if (addMips) {
1520 ModifyAdditionalImageState(res, ref.additionalState);
1521 }
1522 }
1523 }
1524
1525 void RenderGraph::UpdateStateAndCreateBarriersGpuBuffer(
1526 const GpuResourceState& dstState, const BindableBuffer& res, RenderGraph::ParameterCache& params)
1527 {
1528 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1529 if (arrayIndex >= static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
1530 return;
1531 }
1532
1533 // get the current state of the buffer
1534 auto& srcStateRef = GetBufferResourceStateRef(res.handle, dstState.gpuQueue);
1535 const ResourceBarrier prevStateRb = GetSrcBufferBarrier(srcStateRef.state, res);
1536 if ((prevStateRb.accessFlags != dstState.accessFlags) || (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS)) {
1537 params.combinedBarriers.push_back(CommandBarrier {
1538 res.handle, prevStateRb, dstState.gpuQueue, GetDstBufferBarrier(dstState, res), params.gpuQueue });
1539 }
1540
1541 // update the cached state to match the situation after the barrier
1542 srcStateRef.state = dstState;
1543 srcStateRef.resource = res;
1544 srcStateRef.prevRc = params.rcWithType;
1545 srcStateRef.prevRenderNodeIndex = params.renderNodeIndex;
1546 }
1547
1548 void RenderGraph::AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,
1549 const GpuResourceState& newGpuResourceState, const BindableBuffer& newBuffer,
1550 const RenderCommandWithType& rcWithType, vector<CommandBarrier>& barriers,
1551 vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1552 {
1553 auto& stateRef = GetBufferResourceStateRef(newBuffer.handle, newGpuResourceState.gpuQueue);
1554 const GpuResourceState srcState = stateRef.state;
1555 const BindableBuffer srcBuffer = stateRef.resource;
1556
1557 if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1558 (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1559 PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1560 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newBuffer.handle) == RenderHandleType::GPU_IMAGE);
1561 PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1562 currNodeGpuResourceTransfer.push_back(
1563 RenderGraph::GpuQueueTransferState { newBuffer.handle, stateRef.prevRenderNodeIndex, renderNodeIndex,
1564 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED, ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED });
1565 } else {
1566 const ResourceBarrier srcBarrier = GetSrcBufferBarrier(srcState, srcBuffer);
1567 const ResourceBarrier dstBarrier = GetDstBufferBarrier(newGpuResourceState, newBuffer);
1568
1569 barriers.push_back(CommandBarrier {
1570 newBuffer.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1571 }
1572
1573 stateRef.state = newGpuResourceState;
1574 stateRef.resource = newBuffer;
1575 stateRef.prevRc = rcWithType;
1576 stateRef.prevRenderNodeIndex = renderNodeIndex;
1577 }
1578
1579 void RenderGraph::AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,
1580 const GpuResourceState& newGpuResourceState, const BindableImage& newImage, const RenderCommandWithType& rcWithType,
1581 vector<CommandBarrier>& barriers, vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1582 {
1583 // newGpuResourceState has queue transfer image layout in old optionalImageLayout
1584
1585 auto& stateRef = GetImageResourceStateRef(newImage.handle, newGpuResourceState.gpuQueue);
1586 const GpuResourceState srcState = stateRef.state;
1587 const BindableImage srcImage = stateRef.resource;
1588 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(newImage.handle);
1589
1590 if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1591 (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1592 PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1593 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newImage.handle) == RenderHandleType::GPU_IMAGE);
1594 PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1595 currNodeGpuResourceTransfer.push_back(RenderGraph::GpuQueueTransferState { newImage.handle,
1596 stateRef.prevRenderNodeIndex, renderNodeIndex, srcImage.imageLayout, newImage.imageLayout });
1597 } else {
1598 const ResourceBarrier srcBarrier =
1599 addMips ? GetSrcImageBarrierMips(srcState, srcImage, newImage, stateRef.additionalState)
1600 : GetSrcImageBarrier(srcState, srcImage);
1601 const ResourceBarrier dstBarrier =
1602 addMips ? GetDstImageBarrierMips(newGpuResourceState, srcImage, newImage, stateRef.additionalState)
1603 : GetDstImageBarrier(newGpuResourceState, newImage);
1604
1605 barriers.push_back(CommandBarrier {
1606 newImage.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1607 }
1608
1609 stateRef.state = newGpuResourceState;
1610 stateRef.resource = newImage;
1611 stateRef.prevRc = rcWithType;
1612 stateRef.prevRenderNodeIndex = renderNodeIndex;
1613 if (addMips) {
1614 ModifyAdditionalImageState(newImage, stateRef.additionalState);
1615 }
1616 }
1617
1618 RenderGraph::RenderGraphBufferState& RenderGraph::GetBufferResourceStateRef(
1619 const RenderHandle handle, const GpuQueue& queue)
1620 {
1621 // NOTE: Do not call with non dynamic trackable
1622 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1623 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_BUFFER);
1624 if (arrayIndex < gpuBufferDataIndices_.size()) {
1625 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1626 uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex];
1627 if (dataIdx == INVALID_TRACK_IDX) {
1628 if (!gpuBufferAvailableIndices_.empty()) {
1629 dataIdx = gpuBufferAvailableIndices_.back();
1630 gpuBufferAvailableIndices_.pop_back();
1631 } else {
1632 dataIdx = static_cast<uint32_t>(gpuBufferTracking_.size());
1633 gpuBufferTracking_.emplace_back();
1634 }
1635 gpuBufferDataIndices_[arrayIndex] = dataIdx;
1636
1637 gpuBufferTracking_[dataIdx].resource.handle = handle;
1638 gpuBufferTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1639 }
1640 return gpuBufferTracking_[dataIdx];
1641 }
1642
1643 return defaultBufferState_;
1644 }
1645
1646 RenderGraph::RenderGraphImageState& RenderGraph::GetImageResourceStateRef(
1647 const RenderHandle handle, const GpuQueue& queue)
1648 {
1649 // NOTE: Do not call with non dynamic trackable
1650 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1651 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1652 if (arrayIndex < gpuImageDataIndices_.size()) {
1653 // NOTE: render pass attachments expected to be dynamic resources always
1654 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1655 uint32_t dataIdx = gpuImageDataIndices_[arrayIndex];
1656 if (dataIdx == INVALID_TRACK_IDX) {
1657 if (!gpuImageAvailableIndices_.empty()) {
1658 dataIdx = gpuImageAvailableIndices_.back();
1659 gpuImageAvailableIndices_.pop_back();
1660 } else {
1661 dataIdx = static_cast<uint32_t>(gpuImageTracking_.size());
1662 gpuImageTracking_.emplace_back();
1663 }
1664 gpuImageDataIndices_[arrayIndex] = dataIdx;
1665
1666 gpuImageTracking_[dataIdx].resource.handle = handle;
1667 gpuImageTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1668 if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1669 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1670 gpuImageTracking_[dataIdx].additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
1671 }
1672 }
1673 return gpuImageTracking_[dataIdx];
1674 }
1675
1676 return defaultImageState_;
1677 }
1678 RENDER_END_NAMESPACE()
1679