1/*
2 * Copyright 2012 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h"
9
10#include "src/core/SkGpuBlurUtils.h"
11#include "src/gpu/GrTexture.h"
12#include "src/gpu/GrTextureProxy.h"
13#include "src/gpu/effects/GrTextureEffect.h"
14#include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h"
15#include "src/gpu/glsl/GrGLSLProgramDataManager.h"
16#include "src/gpu/glsl/GrGLSLUniformHandler.h"
17#include "src/sksl/dsl/priv/DSLFPs.h"
18
19// For brevity
20using UniformHandle = GrGLSLProgramDataManager::UniformHandle;
21using Direction = GrGaussianConvolutionFragmentProcessor::Direction;
22
23class GrGaussianConvolutionFragmentProcessor::Impl : public ProgramImpl {
24public:
25    void emitCode(EmitArgs&) override;
26
27private:
28    void onSetData(const GrGLSLProgramDataManager&, const GrFragmentProcessor&) override;
29
30    UniformHandle fKernelUni;
31    UniformHandle fOffsetsUni;
32    UniformHandle fKernelWidthUni;
33    UniformHandle fIncrementUni;
34};
35
36enum class LoopType {
37    kUnrolled,
38    kFixedLength,
39    kVariableLength,
40};
41
42static LoopType loop_type(const GrShaderCaps& caps) {
43    // This checks that bitwise integer operations and array indexing by non-consts are allowed.
44    if (caps.generation() < k130_GrGLSLGeneration) {
45        return LoopType::kUnrolled;
46    }
47    // If we're in reduced shader mode and we can have a loop then use a uniform to limit the
48    // number of iterations so we don't need a code variation for each width.
49    return caps.reducedShaderMode() ? LoopType::kVariableLength : LoopType::kFixedLength;
50}
51
52void GrGaussianConvolutionFragmentProcessor::Impl::emitCode(EmitArgs& args) {
53    const GrGaussianConvolutionFragmentProcessor& ce =
54            args.fFp.cast<GrGaussianConvolutionFragmentProcessor>();
55
56    using namespace SkSL::dsl;
57    StartFragmentProcessor(this, &args);
58    GlobalVar increment(kUniform_Modifier, kHalf2_Type, "Increment");
59    Declare(increment);
60    fIncrementUni = VarUniformHandle(increment);
61
62    int width = SkGpuBlurUtils::LinearKernelWidth(ce.fRadius);
63
64    LoopType loopType = loop_type(*args.fShaderCaps);
65
66    int arrayCount;
67    if (loopType == LoopType::kVariableLength) {
68        // Size the kernel uniform for the maximum width.
69        arrayCount = (SkGpuBlurUtils::LinearKernelWidth(kMaxKernelRadius) + 3) / 4;
70    } else {
71        arrayCount = (width + 3) / 4;
72        SkASSERT(4 * arrayCount >= width);
73    }
74
75    GlobalVar kernel(kUniform_Modifier, Array(kHalf4_Type, arrayCount), "Kernel");
76    Declare(kernel);
77    fKernelUni = VarUniformHandle(kernel);
78
79
80    GlobalVar offsets(kUniform_Modifier, Array(kHalf4_Type, arrayCount), "Offsets");
81    Declare(offsets);
82    fOffsetsUni = VarUniformHandle(offsets);
83
84    Var color(kHalf4_Type, "color", Half4(0));
85    Declare(color);
86
87    Var coord(kFloat2_Type, "coord", sk_SampleCoord());
88    Declare(coord);
89
90    switch (loopType) {
91        case LoopType::kUnrolled:
92            for (int i = 0; i < width; i++) {
93                color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) *
94                         kernel[i / 4][i & 0x3];
95            }
96            break;
97        case LoopType::kFixedLength: {
98            Var i(kInt_Type, "i", 0);
99            For(Declare(i), i < width, i++,
100                color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) *
101                         kernel[i / 4][i & 0x3]);
102            break;
103        }
104        case LoopType::kVariableLength: {
105            GlobalVar kernelWidth(kUniform_Modifier, kInt_Type, "kernelWidth");
106            Declare(kernelWidth);
107            fKernelWidthUni = VarUniformHandle(kernelWidth);
108            Var i(kInt_Type, "i", 0);
109            For(Declare(i), i < kernelWidth, i++,
110                color += SampleChild(/*index=*/0, coord + offsets[i / 4][i & 3] * increment) *
111                         kernel[i / 4][i & 0x3]);
112            break;
113        }
114    }
115
116    Return(color);
117    EndFragmentProcessor();
118}
119
120void GrGaussianConvolutionFragmentProcessor::Impl::onSetData(const GrGLSLProgramDataManager& pdman,
121                                                             const GrFragmentProcessor& processor) {
122    const auto& conv = processor.cast<GrGaussianConvolutionFragmentProcessor>();
123
124    float increment[2] = {};
125    increment[static_cast<int>(conv.fDirection)] = 1;
126    pdman.set2fv(fIncrementUni, 1, increment);
127
128    int width = SkGpuBlurUtils::LinearKernelWidth(conv.fRadius);
129    int arrayCount = (width + 3)/4;
130    SkDEBUGCODE(size_t arraySize = 4*arrayCount;)
131    SkASSERT(arraySize >= static_cast<size_t>(width));
132    SkASSERT(arraySize <= SK_ARRAY_COUNT(GrGaussianConvolutionFragmentProcessor::fKernel));
133    pdman.set4fv(fKernelUni, arrayCount, conv.fKernel);
134    pdman.set4fv(fOffsetsUni, arrayCount, conv.fOffsets);
135    if (fKernelWidthUni.isValid()) {
136        pdman.set1i(fKernelWidthUni, width);
137    }
138}
139
140///////////////////////////////////////////////////////////////////////////////
141
142std::unique_ptr<GrFragmentProcessor> GrGaussianConvolutionFragmentProcessor::Make(
143        GrSurfaceProxyView view,
144        SkAlphaType alphaType,
145        Direction dir,
146        int halfWidth,
147        float gaussianSigma,
148        GrSamplerState::WrapMode wm,
149        const SkIRect& subset,
150        const SkIRect* pixelDomain,
151        const GrCaps& caps) {
152    std::unique_ptr<GrFragmentProcessor> child;
153    bool is_zero_sigma = SkGpuBlurUtils::IsEffectivelyZeroSigma(gaussianSigma);
154    // We should sample as nearest if there will be no shader to preserve existing behaviour, but
155    // the linear blur requires a linear sample.
156    GrSamplerState::Filter filter = is_zero_sigma ?
157        GrSamplerState::Filter::kNearest : GrSamplerState::Filter::kLinear;
158    GrSamplerState sampler(wm, filter);
159    if (is_zero_sigma) {
160        halfWidth = 0;
161    }
162    // It's pretty common to blur a subset of an input texture. In reduced shader mode we always
163    // apply the wrap mode in the shader.
164    bool alwaysUseShaderTileMode = caps.reducedShaderMode();
165    if (pixelDomain && !alwaysUseShaderTileMode) {
166        // Inset because we expect to be invoked at pixel centers.
167        SkRect domain = SkRect::Make(*pixelDomain).makeInset(0.5, 0.5f);
168        switch (dir) {
169            case Direction::kX: domain.outset(halfWidth, 0); break;
170            case Direction::kY: domain.outset(0, halfWidth); break;
171        }
172        child = GrTextureEffect::MakeSubset(std::move(view),
173                                            alphaType,
174                                            SkMatrix::I(),
175                                            sampler,
176                                            SkRect::Make(subset),
177                                            domain,
178                                            caps,
179                                            GrTextureEffect::kDefaultBorder);
180    } else {
181        child = GrTextureEffect::MakeSubset(std::move(view),
182                                            alphaType,
183                                            SkMatrix::I(),
184                                            sampler,
185                                            SkRect::Make(subset),
186                                            caps,
187                                            GrTextureEffect::kDefaultBorder,
188                                            alwaysUseShaderTileMode);
189    }
190
191    if (is_zero_sigma) {
192        return child;
193    }
194    return std::unique_ptr<GrFragmentProcessor>(new GrGaussianConvolutionFragmentProcessor(
195            std::move(child), dir, halfWidth, gaussianSigma));
196}
197
198GrGaussianConvolutionFragmentProcessor::GrGaussianConvolutionFragmentProcessor(
199        std::unique_ptr<GrFragmentProcessor> child,
200        Direction direction,
201        int radius,
202        float gaussianSigma)
203        : INHERITED(kGrGaussianConvolutionFragmentProcessor_ClassID,
204                    ProcessorOptimizationFlags(child.get()))
205        , fRadius(radius)
206        , fDirection(direction) {
207    this->registerChild(std::move(child), SkSL::SampleUsage::Explicit());
208    SkASSERT(radius <= kMaxKernelRadius);
209    SkGpuBlurUtils::Compute1DLinearGaussianKernel(fKernel, fOffsets, gaussianSigma, fRadius);
210    this->setUsesSampleCoordsDirectly();
211}
212
213GrGaussianConvolutionFragmentProcessor::GrGaussianConvolutionFragmentProcessor(
214        const GrGaussianConvolutionFragmentProcessor& that)
215        : INHERITED(that)
216        , fRadius(that.fRadius)
217        , fDirection(that.fDirection) {
218    memcpy(fKernel, that.fKernel, SkGpuBlurUtils::LinearKernelWidth(fRadius) * sizeof(float));
219    memcpy(fOffsets, that.fOffsets, SkGpuBlurUtils::LinearKernelWidth(fRadius) * sizeof(float));
220}
221
222SkString GrGaussianConvolutionFragmentProcessor::getShaderDfxInfo() const
223{
224    SkString format;
225    format.printf("ShaderDfx_GrGaussianConvolution_%d", fRadius);
226    return format;
227}
228
229void GrGaussianConvolutionFragmentProcessor::onAddToKey(const GrShaderCaps& shaderCaps,
230                                                        GrProcessorKeyBuilder* b) const {
231    if (loop_type(shaderCaps) != LoopType::kVariableLength) {
232        b->add32(fRadius);
233    }
234}
235
236std::unique_ptr<GrFragmentProcessor::ProgramImpl>
237GrGaussianConvolutionFragmentProcessor::onMakeProgramImpl() const {
238    return std::make_unique<Impl>();
239}
240
241bool GrGaussianConvolutionFragmentProcessor::onIsEqual(const GrFragmentProcessor& sBase) const {
242    const auto& that = sBase.cast<GrGaussianConvolutionFragmentProcessor>();
243    return fRadius == that.fRadius && fDirection == that.fDirection &&
244           std::equal(fKernel, fKernel + SkGpuBlurUtils::LinearKernelWidth(fRadius), that.fKernel) &&
245           std::equal(fOffsets, fOffsets + SkGpuBlurUtils::LinearKernelWidth(fRadius), that.fOffsets);
246}
247
248///////////////////////////////////////////////////////////////////////////////
249
250GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrGaussianConvolutionFragmentProcessor);
251
252#if GR_TEST_UTILS
253std::unique_ptr<GrFragmentProcessor> GrGaussianConvolutionFragmentProcessor::TestCreate(
254        GrProcessorTestData* d) {
255    auto [view, ct, at] = d->randomView();
256
257    Direction dir = d->fRandom->nextBool() ? Direction::kY : Direction::kX;
258    SkIRect subset{
259            static_cast<int>(d->fRandom->nextRangeU(0, view.width()  - 1)),
260            static_cast<int>(d->fRandom->nextRangeU(0, view.height() - 1)),
261            static_cast<int>(d->fRandom->nextRangeU(0, view.width()  - 1)),
262            static_cast<int>(d->fRandom->nextRangeU(0, view.height() - 1)),
263    };
264    subset.sort();
265
266    auto wm = static_cast<GrSamplerState::WrapMode>(
267            d->fRandom->nextULessThan(GrSamplerState::kWrapModeCount));
268    int radius = d->fRandom->nextRangeU(1, kMaxKernelRadius);
269    float sigma = radius / 3.f;
270    SkIRect temp;
271    SkIRect* domain = nullptr;
272    if (d->fRandom->nextBool()) {
273        temp = {
274                static_cast<int>(d->fRandom->nextRangeU(0, view.width()  - 1)),
275                static_cast<int>(d->fRandom->nextRangeU(0, view.height() - 1)),
276                static_cast<int>(d->fRandom->nextRangeU(0, view.width()  - 1)),
277                static_cast<int>(d->fRandom->nextRangeU(0, view.height() - 1)),
278        };
279        temp.sort();
280        domain = &temp;
281    }
282
283    return GrGaussianConvolutionFragmentProcessor::Make(std::move(view), at, dir, radius, sigma, wm,
284                                                        subset, domain, *d->caps());
285}
286#endif
287