1/*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "render_bloom.h"
17
18#include <base/containers/fixed_string.h>
19#include <base/containers/unordered_map.h>
20#include <base/math/vector.h>
21#include <render/datastore/intf_render_data_store_manager.h>
22#include <render/datastore/intf_render_data_store_pod.h>
23#include <render/datastore/render_data_store_render_pods.h>
24#include <render/device/intf_gpu_resource_manager.h>
25#include <render/device/intf_shader_manager.h>
26#include <render/namespace.h>
27#include <render/nodecontext/intf_node_context_descriptor_set_manager.h>
28#include <render/nodecontext/intf_node_context_pso_manager.h>
29#include <render/nodecontext/intf_pipeline_descriptor_set_binder.h>
30#include <render/nodecontext/intf_render_command_list.h>
31#include <render/nodecontext/intf_render_node_context_manager.h>
32#include <render/nodecontext/intf_render_node_util.h>
33
34#include "util/log.h"
35
36// shaders
37#include <render/shaders/common/render_post_process_structs_common.h>
38
39using namespace BASE_NS;
40
41RENDER_BEGIN_NAMESPACE()
42namespace {
43constexpr DynamicStateEnum DYNAMIC_STATES[] = { CORE_DYNAMIC_STATE_ENUM_VIEWPORT, CORE_DYNAMIC_STATE_ENUM_SCISSOR };
44}
45
46void RenderBloom::Init(IRenderNodeContextManager& renderNodeContextMgr, const BloomInfo& bloomInfo)
47{
48    bloomInfo_ = bloomInfo;
49
50    // NOTE: target counts etc. should probably be resized based on configuration
51    CreatePsos(renderNodeContextMgr);
52
53    auto& gpuResourceMgr = renderNodeContextMgr.GetGpuResourceManager();
54    samplerHandle_ = gpuResourceMgr.Create(samplerHandle_,
55        GpuSamplerDesc {
56            Filter::CORE_FILTER_LINEAR,                                  // magFilter
57            Filter::CORE_FILTER_LINEAR,                                  // minFilter
58            Filter::CORE_FILTER_LINEAR,                                  // mipMapMode
59            SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeU
60            SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeV
61            SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeW
62        });
63}
64
65void RenderBloom::PreExecute(IRenderNodeContextManager& renderNodeContextMgr, const BloomInfo& bloomInfo,
66    const PostProcessConfiguration& ppConfig)
67{
68    bloomInfo_ = bloomInfo;
69
70    const GpuImageDesc& imgDesc =
71        renderNodeContextMgr.GetGpuResourceManager().GetImageDescriptor(bloomInfo_.input.handle);
72    uint32_t sizeDenom = 1u;
73    if (ppConfig.bloomConfiguration.bloomQualityType == BloomConfiguration::QUALITY_TYPE_LOW) {
74        sizeDenom = 2u;
75    }
76    CreateTargets(renderNodeContextMgr, Math::UVec2(imgDesc.width, imgDesc.height) / sizeDenom);
77}
78
79void RenderBloom::Execute(IRenderNodeContextManager& renderNodeContextMgr, IRenderCommandList& cmdList,
80    const PostProcessConfiguration& ppConfig)
81{
82    bloomEnabled_ = false;
83    BloomConfiguration bloomConfiguration;
84    if (ppConfig.enableFlags & PostProcessConfiguration::ENABLE_BLOOM_BIT) {
85        bloomConfiguration.thresholdHard = ppConfig.bloomConfiguration.thresholdHard;
86        bloomConfiguration.thresholdSoft = ppConfig.bloomConfiguration.thresholdSoft;
87        bloomConfiguration.amountCoefficient = ppConfig.bloomConfiguration.amountCoefficient;
88        bloomConfiguration.dirtMaskCoefficient = ppConfig.bloomConfiguration.dirtMaskCoefficient;
89
90        bloomEnabled_ = true;
91    }
92
93    const auto bloomQualityType = ppConfig.bloomConfiguration.bloomQualityType;
94    PLUGIN_ASSERT(bloomQualityType < CORE_BLOOM_QUALITY_COUNT);
95    if (bloomInfo_.useCompute) {
96        psos_.downscale = psos_.downscaleHandlesCompute[bloomQualityType].regular;
97        psos_.downscaleAndThreshold = psos_.downscaleHandlesCompute[bloomQualityType].threshold;
98    } else {
99        psos_.downscale = psos_.downscaleHandles[bloomQualityType].regular;
100        psos_.downscaleAndThreshold = psos_.downscaleHandles[bloomQualityType].threshold;
101    }
102
103    if (!bloomEnabled_) {
104        bloomConfiguration.amountCoefficient = 0.0f;
105    }
106
107    bloomParameters_ = Math::Vec4(
108        // .x = thresholdHard, luma values below this won't bloom
109        bloomConfiguration.thresholdHard,
110        // .y = thresholdSoft, luma values from this value to hard threshold will reduce bloom input from 1.0 -> 0.0
111        // i.e. this creates softer threshold for bloom
112        bloomConfiguration.thresholdSoft,
113        // .z = amountCoefficient, will multiply the colors from the bloom textures when combined with original color
114        // target
115        bloomConfiguration.amountCoefficient,
116        // .w = -will multiply the dirt mask effect
117        bloomConfiguration.dirtMaskCoefficient);
118
119    const bool validBinders = binders_.globalSet0.get() != nullptr;
120    if (validBinders) {
121        if (bloomInfo_.useCompute) {
122            ComputeBloom(renderNodeContextMgr, cmdList);
123        } else {
124            GraphicsBloom(renderNodeContextMgr, cmdList);
125        }
126    }
127}
128
129DescriptorCounts RenderBloom::GetDescriptorCounts() const
130{
131    // NOTE: when added support for various bloom target counts, might need to be calculated for max
132    return DescriptorCounts { {
133        { CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 32u },
134        { CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 32u },
135        { CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE, 32u },
136        { CORE_DESCRIPTOR_TYPE_SAMPLER, 24u },
137        { CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2u },
138    } };
139}
140
141RenderHandle RenderBloom::GetFinalTarget() const
142{
143    if (RenderHandleUtil::IsValid(bloomInfo_.output.handle)) {
144        return bloomInfo_.output.handle;
145    } else {
146        // output tex1 on compute and tex2 on graphics
147        return bloomInfo_.useCompute ? (targets_.tex1[0u].GetHandle()) : (targets_.tex2[0u].GetHandle());
148    }
149}
150
151void RenderBloom::UpdateGlobalSet(IRenderCommandList& cmdList)
152{
153    auto& binder = *binders_.globalSet0;
154    binder.ClearBindings();
155    uint32_t binding = 0u;
156    binder.BindBuffer(binding++, bloomInfo_.globalUbo, 0);
157    binder.BindBuffer(binding++, bloomInfo_.globalUbo, sizeof(GlobalPostProcessStruct));
158    cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
159}
160
161void RenderBloom::ComputeBloom(IRenderNodeContextManager& renderNodeContextMgr, IRenderCommandList& cmdList)
162{
163    constexpr PushConstant pc { ShaderStageFlagBits::CORE_SHADER_STAGE_COMPUTE_BIT,
164        sizeof(LocalPostProcessPushConstantStruct) };
165
166    UpdateGlobalSet(cmdList);
167    if (bloomEnabled_) {
168        ComputeDownscaleAndThreshold(pc, cmdList);
169        ComputeDownscale(pc, cmdList);
170        ComputeUpscale(pc, cmdList);
171    }
172    // needs to be done even when bloom is disabled if node is in use
173    if (RenderHandleUtil::IsValid(bloomInfo_.output.handle)) {
174        ComputeCombine(pc, cmdList);
175    }
176}
177
178void RenderBloom::ComputeDownscaleAndThreshold(const PushConstant& pc, IRenderCommandList& cmdList)
179{
180    RenderHandle sets[2u] {};
181    sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
182    {
183        auto& binder = *binders_.downscaleAndThreshold;
184        sets[1u] = binder.GetDescriptorSetHandle();
185        binder.ClearBindings();
186
187        uint32_t binding = 0;
188        binder.BindImage(binding++, { targets_.tex1[0].GetHandle() });
189        binder.BindImage(binding++, { bloomInfo_.input });
190        binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
191
192        cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
193    }
194
195    cmdList.BindPipeline(psos_.downscaleAndThreshold);
196    const ShaderThreadGroup tgs = psos_.downscaleAndThresholdTGS;
197
198    // bind all sets
199    cmdList.BindDescriptorSets(0, sets);
200
201    const auto targetSize = targets_.tex1Size[0];
202
203    LocalPostProcessPushConstantStruct uPc;
204    uPc.factor = bloomParameters_;
205    uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
206        1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
207
208    cmdList.PushConstantData(pc, arrayviewU8(uPc));
209
210    cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
211}
212
213void RenderBloom::ComputeDownscale(const PushConstant& pc, IRenderCommandList& cmdList)
214{
215    cmdList.BindPipeline(psos_.downscale);
216    const ShaderThreadGroup tgs = psos_.downscaleTGS;
217
218    RenderHandle sets[2u] {};
219    sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
220    for (size_t i = 1; i < targets_.tex1.size(); ++i) {
221        {
222            auto& binder = *binders_.downscale[i];
223            sets[1u] = binder.GetDescriptorSetHandle();
224            binder.ClearBindings();
225
226            uint32_t binding = 0;
227            binder.BindImage(binding++, { targets_.tex1[i].GetHandle() });
228            binder.BindImage(binding++, { targets_.tex1[i - 1].GetHandle() });
229            binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
230
231            cmdList.UpdateDescriptorSet(
232                binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
233        }
234        cmdList.BindDescriptorSets(0u, sets);
235
236        const auto targetSize = targets_.tex1Size[i];
237
238        LocalPostProcessPushConstantStruct uPc;
239        uPc.factor = bloomParameters_;
240        uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
241            1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
242        cmdList.PushConstantData(pc, arrayviewU8(uPc));
243
244        cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
245    }
246}
247
248void RenderBloom::ComputeUpscale(const PushConstant& pc, IRenderCommandList& cmdList)
249{
250    cmdList.BindPipeline(psos_.upscale);
251    const ShaderThreadGroup tgs = psos_.upscaleTGS;
252
253    RenderHandle sets[2u] {};
254    sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
255
256    for (size_t i = targets_.tex1.size() - 1; i != 0; --i) {
257        {
258            auto& binder = *binders_.upscale[i];
259            sets[1u] = binder.GetDescriptorSetHandle();
260            binder.ClearBindings();
261
262            binder.BindImage(0u, { targets_.tex1[i - 1].GetHandle() });
263            binder.BindImage(1u, { targets_.tex1[i].GetHandle() });
264            binder.BindSampler(2u, { samplerHandle_.GetHandle() });
265
266            cmdList.UpdateDescriptorSet(
267                binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
268        }
269        cmdList.BindDescriptorSets(0u, sets);
270
271        const auto targetSize = targets_.tex1Size[i - 1];
272
273        LocalPostProcessPushConstantStruct uPc;
274        uPc.factor = bloomParameters_;
275        uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
276            1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
277        cmdList.PushConstantData(pc, arrayviewU8(uPc));
278
279        cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
280    }
281}
282
283void RenderBloom::ComputeCombine(const PushConstant& pc, IRenderCommandList& cmdList)
284{
285    cmdList.BindPipeline(psos_.combine);
286    const ShaderThreadGroup tgs = psos_.combineTGS;
287
288    RenderHandle sets[2u] {};
289    sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
290    {
291        auto& binder = *binders_.combine;
292        sets[1u] = binder.GetDescriptorSetHandle();
293        binder.ClearBindings();
294        // bind resources to set 1
295        uint32_t binding = 0;
296        binder.BindImage(binding++, { bloomInfo_.output });
297        binder.BindImage(binding++, { bloomInfo_.input });
298        binder.BindImage(binding++, { targets_.tex1[0].GetHandle() });
299        binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
300
301        // update the descriptor set bindings for set 1
302        cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
303    }
304
305    cmdList.BindDescriptorSets(0u, sets);
306
307    const auto targetSize = baseSize_;
308
309    LocalPostProcessPushConstantStruct uPc;
310    uPc.factor = bloomParameters_;
311    uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
312        1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
313    cmdList.PushConstantData(pc, arrayviewU8(uPc));
314
315    cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
316}
317
318void RenderBloom::GraphicsBloom(IRenderNodeContextManager& renderNodeContextMgr, IRenderCommandList& cmdList)
319{
320    RenderPass renderPass;
321    renderPass.renderPassDesc.attachmentCount = 1;
322    renderPass.renderPassDesc.subpassCount = 1;
323    renderPass.renderPassDesc.attachments[0].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
324    renderPass.renderPassDesc.attachments[0].storeOp = AttachmentStoreOp::CORE_ATTACHMENT_STORE_OP_STORE;
325
326    RenderPassSubpassDesc& subpassDesc = renderPass.subpassDesc;
327    subpassDesc.colorAttachmentCount = 1;
328    subpassDesc.colorAttachmentIndices[0] = 0;
329
330    constexpr PushConstant pc { ShaderStageFlagBits::CORE_SHADER_STAGE_FRAGMENT_BIT,
331        sizeof(LocalPostProcessPushConstantStruct) };
332
333    UpdateGlobalSet(cmdList);
334    if (bloomEnabled_) {
335        RenderDownscaleAndThreshold(renderPass, pc, cmdList);
336        RenderDownscale(renderPass, pc, cmdList);
337        RenderUpscale(renderPass, pc, cmdList);
338    }
339    // combine (needs to be done even when bloom is disabled if node is in use
340    if (RenderHandleUtil::IsValid(bloomInfo_.output.handle)) {
341        RenderCombine(renderPass, pc, cmdList);
342    }
343}
344
345void RenderBloom::RenderDownscaleAndThreshold(
346    RenderPass& renderPass, const PushConstant& pc, IRenderCommandList& cmdList)
347{
348    const auto targetSize = targets_.tex1Size[0];
349    const ViewportDesc viewportDesc { 0, 0, static_cast<float>(targetSize.x), static_cast<float>(targetSize.y) };
350    const ScissorDesc scissorDesc = { 0, 0, targetSize.x, targetSize.y };
351
352    renderPass.renderPassDesc.attachmentHandles[0] = targets_.tex1[0].GetHandle();
353    renderPass.renderPassDesc.renderArea = { 0, 0, targetSize.x, targetSize.y };
354    cmdList.BeginRenderPass(renderPass.renderPassDesc, 0, renderPass.subpassDesc);
355
356    cmdList.SetDynamicStateViewport(viewportDesc);
357    cmdList.SetDynamicStateScissor(scissorDesc);
358    cmdList.BindPipeline(psos_.downscaleAndThreshold);
359
360    RenderHandle sets[2u] {};
361    sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
362    {
363        auto& binder = *binders_.downscaleAndThreshold;
364        sets[1u] = binder.GetDescriptorSetHandle();
365        binder.ClearBindings();
366
367        binder.BindImage(0u, { bloomInfo_.input });
368        binder.BindSampler(1u, { samplerHandle_.GetHandle() });
369        cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
370    }
371    cmdList.BindDescriptorSets(0u, sets);
372
373    LocalPostProcessPushConstantStruct uPc;
374    uPc.factor = bloomParameters_;
375    uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
376        1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
377
378    cmdList.PushConstantData(pc, arrayviewU8(uPc));
379    cmdList.Draw(3u, 1u, 0u, 0u);
380    cmdList.EndRenderPass();
381}
382
383void RenderBloom::RenderDownscale(RenderPass& renderPass, const PushConstant& pc, IRenderCommandList& cmdList)
384{
385    LocalPostProcessPushConstantStruct uPc;
386    uPc.factor = bloomParameters_;
387
388    RenderHandle sets[2u] {};
389    sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
390    for (size_t idx = 1; idx < targets_.tex1.size(); ++idx) {
391        const auto targetSize = targets_.tex1Size[idx];
392        const ViewportDesc viewportDesc { 0, 0, static_cast<float>(targetSize.x), static_cast<float>(targetSize.y) };
393        const ScissorDesc scissorDesc = { 0, 0, targetSize.x, targetSize.y };
394
395        renderPass.renderPassDesc.attachmentHandles[0] = targets_.tex1[idx].GetHandle();
396        renderPass.renderPassDesc.renderArea = { 0, 0, targetSize.x, targetSize.y };
397        cmdList.BeginRenderPass(renderPass.renderPassDesc, 0, renderPass.subpassDesc);
398
399        cmdList.SetDynamicStateViewport(viewportDesc);
400        cmdList.SetDynamicStateScissor(scissorDesc);
401
402        cmdList.BindPipeline(psos_.downscale);
403
404        {
405            auto& binder = *binders_.downscale[idx];
406            sets[1u] = binder.GetDescriptorSetHandle();
407            binder.ClearBindings();
408            binder.BindImage(0u, { targets_.tex1[idx - 1].GetHandle() });
409            binder.BindSampler(1u, { samplerHandle_.GetHandle() });
410            cmdList.UpdateDescriptorSet(
411                binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
412        }
413        cmdList.BindDescriptorSets(0u, sets);
414
415        uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
416            1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
417
418        cmdList.PushConstantData(pc, arrayviewU8(uPc));
419        cmdList.Draw(3u, 1u, 0u, 0u);
420        cmdList.EndRenderPass();
421    }
422}
423
424void RenderBloom::RenderUpscale(RenderPass& renderPass, const PushConstant& pc, IRenderCommandList& cmdList)
425{
426    RenderPass renderPassUpscale = renderPass;
427    renderPassUpscale.subpassDesc.inputAttachmentCount = 1;
428    renderPassUpscale.subpassDesc.inputAttachmentIndices[0] = 0;
429    renderPassUpscale.renderPassDesc.attachments[0].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
430    renderPassUpscale.renderPassDesc.attachments[0].storeOp = AttachmentStoreOp::CORE_ATTACHMENT_STORE_OP_STORE;
431
432    RenderHandle sets[2u] {};
433    sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
434    PLUGIN_ASSERT(targets_.tex1.size() == targets_.tex2.size());
435    RenderHandle input;
436    if (targets_.tex1.size() >= 1) {
437        input = targets_.tex1[targets_.tex1.size() - 1].GetHandle();
438    }
439    for (size_t idx = targets_.tex1.size() - 1; idx != 0; --idx) {
440        const auto targetSize = targets_.tex1Size[idx - 1];
441        const ViewportDesc viewportDesc { 0, 0, static_cast<float>(targetSize.x), static_cast<float>(targetSize.y) };
442        const ScissorDesc scissorDesc = { 0, 0, targetSize.x, targetSize.y };
443
444        // tex2 as output
445        renderPassUpscale.renderPassDesc.attachmentHandles[0] = targets_.tex2[idx - 1].GetHandle();
446        renderPassUpscale.renderPassDesc.renderArea = { 0, 0, targetSize.x, targetSize.y };
447        cmdList.BeginRenderPass(renderPassUpscale.renderPassDesc, 0, renderPassUpscale.subpassDesc);
448
449        cmdList.SetDynamicStateViewport(viewportDesc);
450        cmdList.SetDynamicStateScissor(scissorDesc);
451
452        cmdList.BindPipeline(psos_.upscale);
453
454        {
455            auto& binder = *binders_.upscale[idx];
456            sets[1u] = binder.GetDescriptorSetHandle();
457            binder.ClearBindings();
458
459            uint32_t binding = 0;
460            binder.BindImage(binding++, { input });
461            binder.BindImage(binding++, { targets_.tex1[idx - 1].GetHandle() });
462            binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
463            cmdList.UpdateDescriptorSet(
464                binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
465        }
466        cmdList.BindDescriptorSets(0u, sets);
467        LocalPostProcessPushConstantStruct uPc;
468        uPc.factor = bloomParameters_;
469        uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
470            1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
471
472        cmdList.PushConstantData(pc, arrayviewU8(uPc));
473        cmdList.Draw(3u, 1u, 0u, 0u);
474        cmdList.EndRenderPass();
475
476        // next pass input
477        input = renderPassUpscale.renderPassDesc.attachmentHandles[0];
478    }
479}
480
481void RenderBloom::RenderCombine(RenderPass& renderPass, const PushConstant& pc, IRenderCommandList& cmdList)
482{
483    const auto targetSize = baseSize_;
484
485    renderPass.renderPassDesc.attachmentHandles[0] = bloomInfo_.output.handle;
486    renderPass.renderPassDesc.renderArea = { 0, 0, targetSize.x, targetSize.y };
487    cmdList.BeginRenderPass(renderPass.renderPassDesc, 0, renderPass.subpassDesc);
488
489    cmdList.SetDynamicStateViewport(baseViewportDesc_);
490    cmdList.SetDynamicStateScissor(baseScissorDesc_);
491
492    cmdList.BindPipeline(psos_.combine);
493
494    RenderHandle sets[2u] {};
495    sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
496    {
497        auto& binder = *binders_.combine;
498        sets[1u] = binder.GetDescriptorSetHandle();
499        binder.ClearBindings();
500
501        uint32_t binding = 0;
502        binder.BindImage(binding++, { bloomInfo_.input });
503        // tex2 handle has the final result
504        binder.BindImage(binding++, { targets_.tex2[0].GetHandle() });
505        binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
506
507        cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
508    }
509    cmdList.BindDescriptorSets(0u, sets);
510
511    LocalPostProcessPushConstantStruct uPc;
512    uPc.factor = bloomParameters_;
513    uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
514        1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
515
516    cmdList.PushConstantData(pc, arrayviewU8(uPc));
517    cmdList.Draw(3u, 1u, 0u, 0u);
518    cmdList.EndRenderPass();
519}
520
521void RenderBloom::CreateTargets(IRenderNodeContextManager& renderNodeContextMgr, const Math::UVec2 baseSize)
522{
523    if (baseSize.x != baseSize_.x || baseSize.y != baseSize_.y) {
524        baseSize_ = baseSize;
525
526        format_ = Format::BASE_FORMAT_B10G11R11_UFLOAT_PACK32;
527        ImageUsageFlags usageFlags = CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT |
528                                     CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
529
530        if (bloomInfo_.useCompute) {
531            format_ = Format::BASE_FORMAT_R16G16B16A16_SFLOAT; // used due to GLES
532            usageFlags = CORE_IMAGE_USAGE_STORAGE_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT;
533        } else {
534            baseViewportDesc_ = { 0.0f, 0.0f, static_cast<float>(baseSize.x), static_cast<float>(baseSize.y), 0.0f,
535                1.0f };
536            baseScissorDesc_ = { 0, 0, baseSize.x, baseSize.y };
537        }
538
539        // create target image
540        const Math::UVec2 startTargetSize = baseSize_;
541        GpuImageDesc desc {
542            ImageType::CORE_IMAGE_TYPE_2D,
543            ImageViewType::CORE_IMAGE_VIEW_TYPE_2D,
544            format_,
545            ImageTiling::CORE_IMAGE_TILING_OPTIMAL,
546            usageFlags,
547            MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
548            0,
549            EngineImageCreationFlagBits::CORE_ENGINE_IMAGE_CREATION_DYNAMIC_BARRIERS |
550                EngineImageCreationFlagBits::CORE_ENGINE_IMAGE_CREATION_RESET_STATE_ON_FRAME_BORDERS,
551            startTargetSize.x,
552            startTargetSize.y,
553            1u,
554            1u,
555            1u,
556            SampleCountFlagBits::CORE_SAMPLE_COUNT_1_BIT,
557            {},
558        };
559
560        auto& gpuResourceMgr = renderNodeContextMgr.GetGpuResourceManager();
561#if (RENDER_VALIDATION_ENABLED == 1)
562        const string_view nodeName = renderNodeContextMgr.GetName();
563#endif
564        for (size_t idx = 0; idx < targets_.tex1.size(); ++idx) {
565            // every bloom target is half the size of the original/ previous bloom target
566            desc.width /= 2u;
567            desc.height /= 2u;
568            desc.width = (desc.width >= 1u) ? desc.width : 1u;
569            desc.height = (desc.height >= 1u) ? desc.height : 1u;
570            targets_.tex1Size[idx] = Math::UVec2(desc.width, desc.height);
571#if (RENDER_VALIDATION_ENABLED == 1)
572            const auto baseTargetName = nodeName + "_Bloom_" + to_string(idx);
573            targets_.tex1[idx] = gpuResourceMgr.Create(baseTargetName + "_A", desc);
574            if (!bloomInfo_.useCompute) {
575                targets_.tex2[idx] = gpuResourceMgr.Create(baseTargetName + "_B", desc);
576            }
577#else
578            targets_.tex1[idx] = gpuResourceMgr.Create(targets_.tex1[idx], desc);
579            if (!bloomInfo_.useCompute) {
580                targets_.tex2[idx] = gpuResourceMgr.Create(targets_.tex2[idx], desc);
581            }
582#endif
583        }
584    }
585}
586
587void RenderBloom::CreatePsos(IRenderNodeContextManager& renderNodeContextMgr)
588{
589    if (bloomInfo_.useCompute) {
590        CreateComputePsos(renderNodeContextMgr);
591    } else {
592        CreateRenderPsos(renderNodeContextMgr);
593    }
594}
595
596void RenderBloom::CreateComputePsos(IRenderNodeContextManager& renderNodeContextMgr)
597{
598    const auto& shaderMgr = renderNodeContextMgr.GetShaderManager();
599    INodeContextPsoManager& psoMgr = renderNodeContextMgr.GetPsoManager();
600    INodeContextDescriptorSetManager& dSetMgr = renderNodeContextMgr.GetDescriptorSetManager();
601
602    constexpr BASE_NS::pair<BloomConfiguration::BloomQualityType, uint32_t> configurations[] = {
603        { BloomConfiguration::BloomQualityType::QUALITY_TYPE_LOW, RenderBloom::CORE_BLOOM_QUALITY_LOW },
604        { BloomConfiguration::BloomQualityType::QUALITY_TYPE_NORMAL, RenderBloom::CORE_BLOOM_QUALITY_NORMAL },
605        { BloomConfiguration::BloomQualityType::QUALITY_TYPE_HIGH, RenderBloom::CORE_BLOOM_QUALITY_HIGH }
606    };
607    for (const auto& configuration : configurations) {
608        {
609            auto shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_downscale.shader");
610            const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
611            ShaderSpecializationConstantView specializations = shaderMgr.GetReflectionSpecialization(shader);
612            const ShaderSpecializationConstantDataView specDataView {
613                { specializations.constants.data(), specializations.constants.size() },
614                { &configuration.second, 1u },
615            };
616
617            psos_.downscaleHandlesCompute[configuration.first].regular =
618                psoMgr.GetComputePsoHandle(shader, pl, specDataView);
619        }
620        {
621            auto shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_downscale_threshold.shader");
622            const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
623
624            ShaderSpecializationConstantView specializations = shaderMgr.GetReflectionSpecialization(shader);
625            const ShaderSpecializationConstantDataView specDataView {
626                { specializations.constants.data(), specializations.constants.size() },
627                { &configuration.second, 1u },
628            };
629            psos_.downscaleHandlesCompute[configuration.first].threshold =
630                psoMgr.GetComputePsoHandle(shader, pl, specDataView);
631        }
632    }
633
634    constexpr uint32_t globalSet = 0u;
635    constexpr uint32_t localSetIdx = 1u;
636    // the first one creates the global set as well
637    {
638        const RenderHandle shaderHandle =
639            shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_downscale_threshold.shader");
640        const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
641        psos_.downscaleAndThreshold = psoMgr.GetComputePsoHandle(shaderHandle, pl, {});
642        psos_.downscaleAndThresholdTGS = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
643
644        const auto& gBinds = pl.descriptorSetLayouts[globalSet].bindings;
645        binders_.globalSet0 = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(gBinds), gBinds);
646
647        const auto& lBinds = pl.descriptorSetLayouts[localSetIdx].bindings;
648        binders_.downscaleAndThreshold = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(lBinds), lBinds);
649    }
650    {
651        const RenderHandle shaderHandle =
652            shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_downscale.shader");
653        const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
654        psos_.downscale = psoMgr.GetComputePsoHandle(shaderHandle, pl, {});
655        psos_.downscaleTGS = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
656
657        PLUGIN_ASSERT(binders_.downscale.size() >= TARGET_COUNT);
658        const auto& binds = pl.descriptorSetLayouts[localSetIdx].bindings;
659        for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
660            binders_.downscale[idx] = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
661        }
662    }
663    {
664        const RenderHandle shaderHandle =
665            shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_upscale.shader");
666        const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
667        psos_.upscale = psoMgr.GetComputePsoHandle(shaderHandle, pl, {});
668        psos_.upscaleTGS = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
669
670        PLUGIN_ASSERT(binders_.upscale.size() >= TARGET_COUNT);
671        const auto& binds = pl.descriptorSetLayouts[localSetIdx].bindings;
672        for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
673            binders_.upscale[idx] = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
674        }
675    }
676    {
677        const RenderHandle shaderHandle =
678            shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_combine.shader");
679        const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
680        psos_.combine = psoMgr.GetComputePsoHandle(shaderHandle, pl, {});
681        psos_.combineTGS = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
682
683        const auto& binds = pl.descriptorSetLayouts[localSetIdx].bindings;
684        binders_.combine = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
685    }
686}
687
688std::pair<RenderHandle, const PipelineLayout&> RenderBloom::CreateAndReflectRenderPso(
689    IRenderNodeContextManager& renderNodeContextMgr, const string_view shader, const RenderPass& renderPass)
690{
691    const auto& shaderMgr = renderNodeContextMgr.GetShaderManager();
692    const RenderHandle shaderHandle = shaderMgr.GetShaderHandle(shader.data());
693    const RenderHandle graphicsStateHandle = shaderMgr.GetGraphicsStateHandleByShaderHandle(shaderHandle);
694    const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
695
696    auto& psoMgr = renderNodeContextMgr.GetPsoManager();
697    const RenderHandle pso = psoMgr.GetGraphicsPsoHandle(
698        shaderHandle, graphicsStateHandle, pl, {}, {}, { DYNAMIC_STATES, countof(DYNAMIC_STATES) });
699    return { pso, pl };
700}
701
702void RenderBloom::CreateRenderPsos(IRenderNodeContextManager& renderNodeContextMgr)
703{
704    RenderPass renderPass;
705    renderPass.renderPassDesc.attachmentCount = 1;
706    renderPass.renderPassDesc.attachmentHandles[0] = bloomInfo_.input.handle;
707    renderPass.renderPassDesc.renderArea = { 0, 0, baseSize_.x, baseSize_.y };
708    renderPass.renderPassDesc.subpassCount = 1;
709    renderPass.renderPassDesc.attachments[0].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
710    renderPass.renderPassDesc.attachments[0].storeOp = AttachmentStoreOp::CORE_ATTACHMENT_STORE_OP_STORE;
711
712    RenderPassSubpassDesc subpassDesc = renderPass.subpassDesc;
713    subpassDesc.colorAttachmentCount = 1;
714    subpassDesc.colorAttachmentIndices[0] = 0;
715
716    constexpr BASE_NS::pair<BloomConfiguration::BloomQualityType, uint32_t> configurations[] = {
717        { BloomConfiguration::BloomQualityType::QUALITY_TYPE_LOW, RenderBloom::CORE_BLOOM_QUALITY_LOW },
718        { BloomConfiguration::BloomQualityType::QUALITY_TYPE_NORMAL, RenderBloom::CORE_BLOOM_QUALITY_NORMAL },
719        { BloomConfiguration::BloomQualityType::QUALITY_TYPE_HIGH, RenderBloom::CORE_BLOOM_QUALITY_HIGH }
720    };
721
722    const IRenderNodeShaderManager& shaderMgr = renderNodeContextMgr.GetShaderManager();
723    INodeContextPsoManager& psoMgr = renderNodeContextMgr.GetPsoManager();
724
725    for (const auto& configuration : configurations) {
726        {
727            auto shader = shaderMgr.GetShaderHandle("rendershaders://shader/bloom_downscale.shader");
728            const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
729            ShaderSpecializationConstantView specializations = shaderMgr.GetReflectionSpecialization(shader);
730            const ShaderSpecializationConstantDataView specDataView {
731                { specializations.constants.data(), specializations.constants.size() },
732                { &configuration.second, 1u },
733            };
734            const RenderHandle graphicsState = shaderMgr.GetGraphicsStateHandleByShaderHandle(shader);
735            psos_.downscaleHandles[configuration.first].regular = psoMgr.GetGraphicsPsoHandle(
736                shader, graphicsState, pl, {}, specDataView, { DYNAMIC_STATES, countof(DYNAMIC_STATES) });
737        }
738
739        {
740            auto shader = shaderMgr.GetShaderHandle("rendershaders://shader/bloom_downscale_threshold.shader");
741            const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
742            ShaderSpecializationConstantView specializations = shaderMgr.GetReflectionSpecialization(shader);
743            const ShaderSpecializationConstantDataView specDataView {
744                { specializations.constants.data(), specializations.constants.size() },
745                { &configuration.second, 1u },
746            };
747            const RenderHandle graphicsState = shaderMgr.GetGraphicsStateHandleByShaderHandle(shader);
748            psos_.downscaleHandles[configuration.first].threshold = psoMgr.GetGraphicsPsoHandle(
749                shader, graphicsState, pl, {}, specDataView, { DYNAMIC_STATES, countof(DYNAMIC_STATES) });
750        }
751    }
752
753    INodeContextDescriptorSetManager& dSetMgr = renderNodeContextMgr.GetDescriptorSetManager();
754    constexpr uint32_t globalSet = 0u;
755    constexpr uint32_t localSet = 1u;
756    // the first one creates the global set as well
757    {
758        const auto [pso, pipelineLayout] = CreateAndReflectRenderPso(
759            renderNodeContextMgr, "rendershaders://shader/bloom_downscale_threshold.shader", renderPass);
760        psos_.downscaleAndThreshold = pso;
761
762        const auto& gBinds = pipelineLayout.descriptorSetLayouts[globalSet].bindings;
763        binders_.globalSet0 = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(gBinds), gBinds);
764
765        const auto& lBinds = pipelineLayout.descriptorSetLayouts[localSet].bindings;
766        binders_.downscaleAndThreshold = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(lBinds), lBinds);
767    }
768    {
769        const auto [pso, pipelineLayout] = CreateAndReflectRenderPso(
770            renderNodeContextMgr, "rendershaders://shader/bloom_downscale.shader", renderPass);
771        psos_.downscale = pso;
772        const auto& binds = pipelineLayout.descriptorSetLayouts[localSet].bindings;
773        for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
774            binders_.downscale[idx] = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
775        }
776    }
777    {
778        const auto [pso, pipelineLayout] =
779            CreateAndReflectRenderPso(renderNodeContextMgr, "rendershaders://shader/bloom_upscale.shader", renderPass);
780        psos_.upscale = pso;
781        const auto& binds = pipelineLayout.descriptorSetLayouts[localSet].bindings;
782        for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
783            binders_.upscale[idx] = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
784        }
785    }
786    {
787        const auto [pso, pipelineLayout] =
788            CreateAndReflectRenderPso(renderNodeContextMgr, "rendershaders://shader/bloom_combine.shader", renderPass);
789        psos_.combine = pso;
790        const auto& binds = pipelineLayout.descriptorSetLayouts[localSet].bindings;
791        binders_.combine = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
792    }
793}
794RENDER_END_NAMESPACE()
795