1/*
2 * Copyright 2020 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/gpu/d3d/GrD3DResourceProvider.h"
9
10#include "include/gpu/GrContextOptions.h"
11#include "include/gpu/GrDirectContext.h"
12#include "include/private/SkOpts_spi.h"
13#include "src/gpu/GrDirectContextPriv.h"
14#include "src/gpu/d3d/GrD3DBuffer.h"
15#include "src/gpu/d3d/GrD3DCommandList.h"
16#include "src/gpu/d3d/GrD3DGpu.h"
17#include "src/gpu/d3d/GrD3DPipelineState.h"
18#include "src/gpu/d3d/GrD3DPipelineStateBuilder.h"
19#include "src/gpu/d3d/GrD3DRenderTarget.h"
20
21GrD3DResourceProvider::GrD3DResourceProvider(GrD3DGpu* gpu)
22        : fGpu(gpu)
23        , fCpuDescriptorManager(gpu)
24        , fDescriptorTableManager(gpu)
25        , fPipelineStateCache(new PipelineStateCache(gpu))
26        , fShaderResourceDescriptorTableCache(gpu)
27        , fSamplerDescriptorTableCache(gpu) {
28}
29
30void GrD3DResourceProvider::destroyResources() {
31    fSamplers.reset();
32
33    fPipelineStateCache->release();
34}
35
36std::unique_ptr<GrD3DDirectCommandList> GrD3DResourceProvider::findOrCreateDirectCommandList() {
37    if (fAvailableDirectCommandLists.count()) {
38        std::unique_ptr<GrD3DDirectCommandList> list =
39                std::move(fAvailableDirectCommandLists.back());
40        fAvailableDirectCommandLists.pop_back();
41        return list;
42    }
43    return GrD3DDirectCommandList::Make(fGpu);
44}
45
46void GrD3DResourceProvider::recycleDirectCommandList(
47        std::unique_ptr<GrD3DDirectCommandList> commandList) {
48    commandList->reset();
49    fAvailableDirectCommandLists.push_back(std::move(commandList));
50}
51
52sk_sp<GrD3DRootSignature> GrD3DResourceProvider::findOrCreateRootSignature(int numTextureSamplers,
53                                                                           int numUAVs) {
54    for (int i = 0; i < fRootSignatures.count(); ++i) {
55        if (fRootSignatures[i]->isCompatible(numTextureSamplers, numUAVs)) {
56            return fRootSignatures[i];
57        }
58    }
59
60    auto rootSig = GrD3DRootSignature::Make(fGpu, numTextureSamplers, numUAVs);
61    if (!rootSig) {
62        return nullptr;
63    }
64    fRootSignatures.push_back(rootSig);
65    return rootSig;
66}
67
68sk_sp<GrD3DCommandSignature> GrD3DResourceProvider::findOrCreateCommandSignature(
69        GrD3DCommandSignature::ForIndexed indexed, unsigned int slot) {
70    for (int i = 0; i < fCommandSignatures.count(); ++i) {
71        if (fCommandSignatures[i]->isCompatible(indexed, slot)) {
72            return fCommandSignatures[i];
73        }
74    }
75
76    auto commandSig = GrD3DCommandSignature::Make(fGpu, indexed, slot);
77    if (!commandSig) {
78        return nullptr;
79    }
80    fCommandSignatures.push_back(commandSig);
81    return commandSig;
82}
83
84GrD3DDescriptorHeap::CPUHandle GrD3DResourceProvider::createRenderTargetView(
85        ID3D12Resource* textureResource) {
86    return fCpuDescriptorManager.createRenderTargetView(fGpu, textureResource);
87}
88
89void GrD3DResourceProvider::recycleRenderTargetView(
90        const GrD3DDescriptorHeap::CPUHandle& rtvDescriptor) {
91    fCpuDescriptorManager.recycleRenderTargetView(rtvDescriptor);
92}
93
94GrD3DDescriptorHeap::CPUHandle GrD3DResourceProvider::createDepthStencilView(
95        ID3D12Resource* textureResource) {
96    return fCpuDescriptorManager.createDepthStencilView(fGpu, textureResource);
97}
98
99void GrD3DResourceProvider::recycleDepthStencilView(
100        const GrD3DDescriptorHeap::CPUHandle& dsvDescriptor) {
101    fCpuDescriptorManager.recycleDepthStencilView(dsvDescriptor);
102}
103
104GrD3DDescriptorHeap::CPUHandle GrD3DResourceProvider::createConstantBufferView(
105        ID3D12Resource* bufferResource, size_t offset, size_t size) {
106    return fCpuDescriptorManager.createConstantBufferView(fGpu, bufferResource, offset, size);
107}
108
109GrD3DDescriptorHeap::CPUHandle GrD3DResourceProvider::createShaderResourceView(
110        ID3D12Resource* resource, unsigned int highestMip, unsigned int mipLevels) {
111    return fCpuDescriptorManager.createShaderResourceView(fGpu, resource, highestMip, mipLevels);
112}
113
114GrD3DDescriptorHeap::CPUHandle GrD3DResourceProvider::createUnorderedAccessView(
115        ID3D12Resource* resource, unsigned int mipSlice) {
116    return fCpuDescriptorManager.createUnorderedAccessView(fGpu, resource, mipSlice);
117}
118
119void GrD3DResourceProvider::recycleShaderView(
120        const GrD3DDescriptorHeap::CPUHandle& view) {
121    fCpuDescriptorManager.recycleShaderView(view);
122}
123
124static D3D12_TEXTURE_ADDRESS_MODE wrap_mode_to_d3d_address_mode(GrSamplerState::WrapMode wrapMode) {
125    switch (wrapMode) {
126    case GrSamplerState::WrapMode::kClamp:
127        return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
128    case GrSamplerState::WrapMode::kRepeat:
129        return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
130    case GrSamplerState::WrapMode::kMirrorRepeat:
131        return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
132    case GrSamplerState::WrapMode::kClampToBorder:
133        return D3D12_TEXTURE_ADDRESS_MODE_BORDER;
134    }
135    SK_ABORT("Unknown wrap mode.");
136}
137
138static D3D12_FILTER d3d_filter(GrSamplerState sampler) {
139    switch (sampler.mipmapMode()) {
140        // When the mode is kNone we disable filtering using maxLOD.
141        case GrSamplerState::MipmapMode::kNone:
142        case GrSamplerState::MipmapMode::kNearest:
143            switch (sampler.filter()) {
144                case GrSamplerState::Filter::kNearest: return D3D12_FILTER_MIN_MAG_MIP_POINT;
145                case GrSamplerState::Filter::kLinear:  return D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
146            }
147            SkUNREACHABLE;
148        case GrSamplerState::MipmapMode::kLinear:
149            switch (sampler.filter()) {
150                case GrSamplerState::Filter::kNearest: return D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
151                case GrSamplerState::Filter::kLinear:  return D3D12_FILTER_MIN_MAG_MIP_LINEAR;
152            }
153            SkUNREACHABLE;
154    }
155    SkUNREACHABLE;
156}
157
158D3D12_CPU_DESCRIPTOR_HANDLE GrD3DResourceProvider::findOrCreateCompatibleSampler(
159        const GrSamplerState& params) {
160    uint32_t key = params.asIndex();
161    D3D12_CPU_DESCRIPTOR_HANDLE* samplerPtr = fSamplers.find(key);
162    if (samplerPtr) {
163        return *samplerPtr;
164    }
165
166    D3D12_FILTER filter = d3d_filter(params);
167    // We disable MIP filtering using maxLOD. Otherwise, we want the max LOD to be unbounded.
168    float maxLOD = params.mipmapped() == GrMipmapped::kYes ? std::numeric_limits<float>::max()
169                                                           : 0.f;
170    D3D12_TEXTURE_ADDRESS_MODE addressModeU = wrap_mode_to_d3d_address_mode(params.wrapModeX());
171    D3D12_TEXTURE_ADDRESS_MODE addressModeV = wrap_mode_to_d3d_address_mode(params.wrapModeY());
172
173    D3D12_CPU_DESCRIPTOR_HANDLE sampler =
174            fCpuDescriptorManager.createSampler(
175            fGpu, filter, maxLOD, addressModeU, addressModeV).fHandle;
176    fSamplers.set(key, sampler);
177    return sampler;
178}
179
180sk_sp<GrD3DDescriptorTable> GrD3DResourceProvider::findOrCreateShaderViewTable(
181    const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>& shaderViews) {
182
183    auto createFunc = [this](GrD3DGpu* gpu, unsigned int numDesc) {
184        return this->fDescriptorTableManager.createShaderViewTable(gpu, numDesc);
185    };
186    return fShaderResourceDescriptorTableCache.findOrCreateDescTable(shaderViews, createFunc);
187}
188
189sk_sp<GrD3DDescriptorTable> GrD3DResourceProvider::findOrCreateSamplerTable(
190        const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>& samplers) {
191    auto createFunc = [this](GrD3DGpu* gpu, unsigned int numDesc) {
192        return this->fDescriptorTableManager.createSamplerTable(gpu, numDesc);
193    };
194    return fShaderResourceDescriptorTableCache.findOrCreateDescTable(samplers, createFunc);
195}
196
197GrD3DPipelineState* GrD3DResourceProvider::findOrCreateCompatiblePipelineState(
198        GrD3DRenderTarget* rt, const GrProgramInfo& info) {
199    return fPipelineStateCache->refPipelineState(rt, info);
200}
201
202sk_sp<GrD3DPipeline> GrD3DResourceProvider::findOrCreateMipmapPipeline() {
203    if (!fMipmapPipeline) {
204        // Note: filtering for non-even widths and heights samples at the 0.25 and 0.75
205        // locations and averages the result. As the initial samples are bilerped this is
206        // approximately a triangle filter. We should look into doing a better kernel but
207        // this should hold us for now.
208        const char* shader =
209            "SamplerState textureSampler : register(s0, space1);\n"
210            "Texture2D<float4> inputTexture : register(t1, space1);\n"
211            "RWTexture2D<float4> outUAV : register(u2, space1);\n"
212            "\n"
213            "cbuffer UniformBuffer : register(b0, space0) {\n"
214            "    float2 inverseDims;\n"
215            "    uint mipLevel;\n"
216            "    uint sampleMode;\n"
217            "}\n"
218            "\n"
219            "[numthreads(8, 8, 1)]\n"
220            "void main(uint groupIndex : SV_GroupIndex, uint3 threadID : SV_DispatchThreadID) {\n"
221            "    float2 uv = inverseDims * (threadID.xy + 0.5);\n"
222            "    float4 mipVal;\n"
223            "    switch (sampleMode) {\n"
224            "        case 0: {\n"
225            "            mipVal = inputTexture.SampleLevel(textureSampler, uv, mipLevel);\n"
226            "            break;\n"
227            "        }\n"
228            "        case 1: {\n"
229            "            float2 uvdiff = inverseDims * 0.25;\n"
230            "            mipVal = inputTexture.SampleLevel(textureSampler, uv-uvdiff, mipLevel);\n"
231            "            mipVal += inputTexture.SampleLevel(textureSampler, uv+uvdiff, mipLevel);\n"
232            "            uvdiff.y = -uvdiff.y;\n"
233            "            mipVal += inputTexture.SampleLevel(textureSampler, uv-uvdiff, mipLevel);\n"
234            "            mipVal += inputTexture.SampleLevel(textureSampler, uv+uvdiff, mipLevel);\n"
235            "            mipVal *= 0.25;\n"
236            "            break;\n"
237            "        }\n"
238            "        case 2: {\n"
239            "            float2 uvdiff = float2(inverseDims.x * 0.25, 0);\n"
240            "            mipVal = inputTexture.SampleLevel(textureSampler, uv-uvdiff, mipLevel);\n"
241            "            mipVal += inputTexture.SampleLevel(textureSampler, uv+uvdiff, mipLevel);\n"
242            "            mipVal *= 0.5;\n"
243            "            break;\n"
244            "        }\n"
245            "        case 3: {\n"
246            "            float2 uvdiff = float2(0, inverseDims.y * 0.25);\n"
247            "            mipVal = inputTexture.SampleLevel(textureSampler, uv-uvdiff, mipLevel);\n"
248            "            mipVal += inputTexture.SampleLevel(textureSampler, uv+uvdiff, mipLevel);\n"
249            "            mipVal *= 0.5;\n"
250            "            break;\n"
251            "        }\n"
252            "    }\n"
253            "\n"
254            "    outUAV[threadID.xy] = mipVal;\n"
255            "}\n";
256
257        sk_sp<GrD3DRootSignature> rootSig = this->findOrCreateRootSignature(1, 1);
258
259        fMipmapPipeline =
260                GrD3DPipelineStateBuilder::MakeComputePipeline(fGpu, rootSig.get(), shader);
261    }
262
263    return fMipmapPipeline;
264}
265
266D3D12_GPU_VIRTUAL_ADDRESS GrD3DResourceProvider::uploadConstantData(void* data, size_t size) {
267    // constant size has to be aligned to 256
268    constexpr int kConstantAlignment = 256;
269
270    // upload the data
271    size_t paddedSize = SkAlignTo(size, kConstantAlignment);
272    GrRingBuffer::Slice slice = fGpu->uniformsRingBuffer()->suballocate(paddedSize);
273    char* destPtr = static_cast<char*>(slice.fBuffer->map()) + slice.fOffset;
274    memcpy(destPtr, data, size);
275
276    // create the associated constant buffer view descriptor
277    GrD3DBuffer* d3dBuffer = static_cast<GrD3DBuffer*>(slice.fBuffer);
278    D3D12_GPU_VIRTUAL_ADDRESS gpuAddress = d3dBuffer->d3dResource()->GetGPUVirtualAddress();
279    return gpuAddress + slice.fOffset;
280}
281
282void GrD3DResourceProvider::prepForSubmit() {
283    fDescriptorTableManager.prepForSubmit(fGpu);
284    // Any heap memory used for these will be returned when the command buffer finishes,
285    // so we have to invalidate all entries.
286    fShaderResourceDescriptorTableCache.release();
287    fSamplerDescriptorTableCache.release();
288}
289
290////////////////////////////////////////////////////////////////////////////////////////////////
291
292#ifdef GR_PIPELINE_STATE_CACHE_STATS
293// Display pipeline state cache usage
294static const bool c_DisplayMtlPipelineCache{false};
295#endif
296
297struct GrD3DResourceProvider::PipelineStateCache::Entry {
298    Entry(GrD3DGpu* gpu, std::unique_ptr<GrD3DPipelineState> pipelineState)
299            : fGpu(gpu), fPipelineState(std::move(pipelineState)) {}
300
301    GrD3DGpu* fGpu;
302    std::unique_ptr<GrD3DPipelineState> fPipelineState;
303};
304
305GrD3DResourceProvider::PipelineStateCache::PipelineStateCache(GrD3DGpu* gpu)
306        : fMap(gpu->getContext()->priv().options().fRuntimeProgramCacheSize)
307        , fGpu(gpu)
308#ifdef GR_PIPELINE_STATE_CACHE_STATS
309        , fTotalRequests(0)
310        , fCacheMisses(0)
311#endif
312{
313}
314
315GrD3DResourceProvider::PipelineStateCache::~PipelineStateCache() {
316    // dump stats
317#ifdef GR_PIPELINE_STATE_CACHE_STATS
318    if (c_DisplayMtlPipelineCache) {
319        SkDebugf("--- Pipeline State Cache ---\n");
320        SkDebugf("Total requests: %d\n", fTotalRequests);
321        SkDebugf("Cache misses: %d\n", fCacheMisses);
322        SkDebugf("Cache miss %%: %f\n",
323                 (fTotalRequests > 0) ? 100.f * fCacheMisses / fTotalRequests : 0.f);
324        SkDebugf("---------------------\n");
325    }
326#endif
327}
328
329void GrD3DResourceProvider::PipelineStateCache::release() {
330    fMap.reset();
331}
332
333GrD3DPipelineState* GrD3DResourceProvider::PipelineStateCache::refPipelineState(
334        GrD3DRenderTarget* renderTarget, const GrProgramInfo& programInfo) {
335#ifdef GR_PIPELINE_STATE_CACHE_STATS
336    ++fTotalRequests;
337#endif
338
339    const GrCaps* caps = fGpu->caps();
340
341    GrProgramDesc desc = caps->makeDesc(renderTarget, programInfo);
342    if (!desc.isValid()) {
343        GrCapsDebugf(fGpu->caps(), "Failed to build mtl program descriptor!\n");
344        return nullptr;
345    }
346
347    std::unique_ptr<Entry>* entry = fMap.find(desc);
348    if (!entry) {
349#ifdef GR_PIPELINE_STATE_CACHE_STATS
350        ++fCacheMisses;
351#endif
352        std::unique_ptr<GrD3DPipelineState> pipelineState =
353                GrD3DPipelineStateBuilder::MakePipelineState(fGpu, renderTarget, desc, programInfo);
354        if (!pipelineState) {
355            return nullptr;
356        }
357        entry = fMap.insert(desc, std::unique_ptr<Entry>(
358                new Entry(fGpu, std::move(pipelineState))));
359        return ((*entry)->fPipelineState).get();
360    }
361    return ((*entry)->fPipelineState).get();
362}
363
364void GrD3DResourceProvider::PipelineStateCache::markPipelineStateUniformsDirty() {
365    fMap.foreach ([](const GrProgramDesc*, std::unique_ptr<Entry>* entry) {
366        (*entry)->fPipelineState->markUniformsDirty();
367    });
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////
371
372void GrD3DResourceProvider::DescriptorTableCache::release() {
373    fMap.reset();
374}
375
376sk_sp<GrD3DDescriptorTable> GrD3DResourceProvider::DescriptorTableCache::findOrCreateDescTable(
377        const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>& cpuDescriptors,
378        std::function<sk_sp<GrD3DDescriptorTable>(GrD3DGpu*, unsigned int numDesc)> createFunc) {
379    sk_sp<GrD3DDescriptorTable>* entry = fMap.find(cpuDescriptors);
380    if (entry) {
381        return *entry;
382    }
383
384    unsigned int numDescriptors = cpuDescriptors.size();
385    SkASSERT(numDescriptors <= kRangeSizesCount);
386    sk_sp<GrD3DDescriptorTable> descTable = createFunc(fGpu, numDescriptors);
387    fGpu->device()->CopyDescriptors(1, descTable->baseCpuDescriptorPtr(), &numDescriptors,
388                                    numDescriptors, cpuDescriptors.data(), fRangeSizes,
389                                    descTable->type());
390    entry = fMap.insert(cpuDescriptors, std::move(descTable));
391    return *entry;
392}
393