1/*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7#include "src/gpu/effects/GrMatrixConvolutionEffect.h"
8
9#include "include/private/SkHalf.h"
10#include "src/gpu/GrDirectContextPriv.h"
11#include "src/gpu/GrProxyProvider.h"
12#include "src/gpu/GrRecordingContextPriv.h"
13#include "src/gpu/GrTexture.h"
14#include "src/gpu/GrTextureProxy.h"
15#include "src/gpu/GrThreadSafeCache.h"
16#include "src/gpu/SkGr.h"
17#include "src/gpu/effects/GrTextureEffect.h"
18#include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h"
19#include "src/gpu/glsl/GrGLSLProgramDataManager.h"
20#include "src/gpu/glsl/GrGLSLUniformHandler.h"
21
22class GrMatrixConvolutionEffect::Impl : public ProgramImpl {
23public:
24    void emitCode(EmitArgs&) override;
25
26private:
27    void onSetData(const GrGLSLProgramDataManager&, const GrFragmentProcessor&) override;
28
29    typedef GrGLSLProgramDataManager::UniformHandle UniformHandle;
30
31    void emitKernelBlock(EmitArgs&, SkIPoint);
32
33    UniformHandle               fKernelUni;
34    UniformHandle               fKernelOffsetUni;
35    UniformHandle               fGainUni;
36    UniformHandle               fBiasUni;
37    UniformHandle               fKernelBiasUni;
38
39    using INHERITED = ProgramImpl;
40};
41
42GrMatrixConvolutionEffect::KernelWrapper::MakeResult
43GrMatrixConvolutionEffect::KernelWrapper::Make(GrRecordingContext* rContext,
44                                               SkISize size,
45                                               const GrCaps& caps,
46                                               const SkScalar* values) {
47    if (!rContext || !values || size.isEmpty()) {
48        return {};
49    }
50
51    const int length = size.area();
52    // Small kernel -> just fill the array.
53    KernelWrapper result(size);
54    if (length <= kMaxUniformSize) {
55        for (int i = 0; i < length; i++) {
56            result.fArray[i] = SkScalarToFloat(values[i]);
57        }
58        return {result, nullptr};
59    }
60
61    BiasAndGain& scalableSampler = result.fBiasAndGain;
62    bool useA16 =
63        rContext->defaultBackendFormat(kA16_float_SkColorType, GrRenderable::kNo).isValid();
64    SkScalar min = values[0];
65    if (!useA16) {
66        // Determine min and max values to figure out inner gain & bias.
67        SkScalar max = values[0];
68        for (int i = 1; i < length; i++) {
69            if (values[i] < min) {
70                min = values[i];
71            }
72            if (values[i] > max) {
73                max = values[i];
74            }
75        }
76        // Treat near-0 gain (i.e. box blur) as 1, and let the kernelBias
77        // move everything up to the final value.
78        const SkScalar computedGain = max - min;
79        scalableSampler.fGain =
80            SkScalarNearlyZero(computedGain) ? 1.0f : SkScalarToFloat(computedGain);
81        // Inner bias is pre-inner-gain so we divide that out.
82        scalableSampler.fBias = SkScalarToFloat(min) / scalableSampler.fGain;
83    }
84
85    // TODO: Pick cache or dont-cache based on observed perf.
86    static constexpr bool kCacheKernelTexture = true;
87
88    GrUniqueKey key;
89    if (kCacheKernelTexture) {
90        static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
91        GrUniqueKey::Builder builder(&key, kDomain, length, "Matrix Convolution Kernel");
92        // Texture cache key is the exact content of the kernel.
93        static_assert(sizeof(float) == 4);
94        for (int i = 0; i < length; i++) {
95            builder[i] = *(const uint32_t*)&values[i];
96        }
97        builder.finish();
98    }
99
100    // Find or create a texture.
101    auto threadSafeCache = rContext->priv().threadSafeCache();
102
103    SkColorType colorType = useA16 ? kA16_float_SkColorType : kAlpha_8_SkColorType;
104
105    GrSurfaceProxyView view;
106    if (kCacheKernelTexture && (view = threadSafeCache->find(key))) {
107        SkASSERT(view.origin() == kTopLeft_GrSurfaceOrigin);
108        auto kernelFP = GrTextureEffect::Make(std::move(view), kUnknown_SkAlphaType);
109        return {result, std::move(kernelFP)};
110    }
111
112    SkBitmap bm;
113    auto info = SkImageInfo::Make({length, 1}, colorType, kPremul_SkAlphaType, nullptr);
114    if (!bm.tryAllocPixels(info)) {
115        return {};
116    }
117    for (int i = 0; i < length; i++) {
118        if (useA16) {
119            *bm.getAddr16(i, 0) = SkFloatToHalf(values[i]);
120        } else {
121            *bm.getAddr8(i, 0) =
122                SkScalarRoundToInt((values[i] - min) / scalableSampler.fGain * 255);
123        }
124    }
125    bm.setImmutable();
126
127    view = std::get<0>(GrMakeUncachedBitmapProxyView(rContext, bm));
128    if (!view) {
129        return {};
130    }
131
132    if (kCacheKernelTexture) {
133        view = threadSafeCache->add(key, view);
134    }
135
136    SkASSERT(view.origin() == kTopLeft_GrSurfaceOrigin);
137    auto kernelFP = GrTextureEffect::Make(std::move(view), kUnknown_SkAlphaType);
138    return {result, std::move(kernelFP)};
139}
140
141bool GrMatrixConvolutionEffect::KernelWrapper::operator==(const KernelWrapper& k) const {
142    if (fSize != k.fSize) {
143        return false;
144    } else if (this->isSampled()) {
145        return fBiasAndGain == k.fBiasAndGain;
146    } else {
147        return std::equal(fArray.begin(), fArray.begin() + fSize.area(), k.fArray.begin());
148    }
149}
150
151bool GrMatrixConvolutionEffect::KernelWrapper::BiasAndGain::operator==(
152                                                                const BiasAndGain& k) const {
153    return fGain == k.fGain && fBias == k.fBias;
154}
155
156// For sampled kernels, emit a for loop that does all the kernel accumulation.
157// For uniform kernels, emit a single iteration. Function is called repeatedly in a for loop.
158// loc is ignored for sampled kernels.
159void GrMatrixConvolutionEffect::Impl::emitKernelBlock(EmitArgs& args, SkIPoint loc) {
160    const GrMatrixConvolutionEffect& mce = args.fFp.cast<GrMatrixConvolutionEffect>();
161    GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
162    GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
163    int kernelWidth = mce.fKernel.size().width();
164    int kernelHeight = mce.fKernel.size().height();
165    int kernelArea = kernelWidth * kernelHeight;
166
167    if (mce.fKernel.isSampled()) {
168        fragBuilder->codeAppendf("for (int i = 0; i < %d; ++i)", (int)kernelArea);
169    }
170
171    GrGLSLShaderBuilder::ShaderBlock block(fragBuilder);
172
173    fragBuilder->codeAppend("half k;");
174    fragBuilder->codeAppend("half2 sourceOffset;");
175    if (mce.fKernel.isSampled()) {
176        const char* kernelBias = uniformHandler->getUniformCStr(fKernelBiasUni);
177        SkString kernelSample = this->invokeChild(1, args, "float2(float(i) + 0.5, 0.5)");
178        fragBuilder->codeAppendf("k = %s.w + %s;", kernelSample.c_str(), kernelBias);
179        fragBuilder->codeAppendf("sourceOffset.y = floor(half(i) / %d);", kernelWidth);
180        fragBuilder->codeAppendf("sourceOffset.x = half(i) - sourceOffset.y * %d;", kernelWidth);
181    } else {
182        fragBuilder->codeAppendf("sourceOffset = half2(%d, %d);", loc.x(), loc.y());
183        int offset = loc.y() * kernelWidth + loc.x();
184        const char* kernel = uniformHandler->getUniformCStr(fKernelUni);
185        fragBuilder->codeAppendf("k = %s[%d][%d];", kernel, offset / 4, offset & 0x3);
186    }
187
188    auto sample = this->invokeChild(0, args, "coord + sourceOffset");
189    fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
190    if (!mce.fConvolveAlpha) {
191        fragBuilder->codeAppend("c = unpremul(c);");
192        fragBuilder->codeAppend("c.rgb = saturate(c.rgb);");
193    }
194    fragBuilder->codeAppend("sum += c * k;");
195}
196
197void GrMatrixConvolutionEffect::Impl::emitCode(EmitArgs& args) {
198    const GrMatrixConvolutionEffect& mce = args.fFp.cast<GrMatrixConvolutionEffect>();
199
200    int kernelWidth = mce.fKernel.size().width();
201    int kernelHeight = mce.fKernel.size().height();
202
203    int arrayCount = (kernelWidth * kernelHeight + 3) / 4;
204    SkASSERT(4 * arrayCount >= kernelWidth * kernelHeight);
205
206    GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
207    if (mce.fKernel.isSampled()) {
208        fKernelBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag,
209                                                    kHalf_GrSLType, "KernelBias");
210    } else {
211        fKernelUni = uniformHandler->addUniformArray(&mce, kFragment_GrShaderFlag,
212                                                     kHalf4_GrSLType, "Kernel", arrayCount);
213    }
214    fKernelOffsetUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf2_GrSLType,
215                                                  "KernelOffset");
216    fGainUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf_GrSLType, "Gain");
217    fBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf_GrSLType, "Bias");
218
219    const char* kernelOffset = uniformHandler->getUniformCStr(fKernelOffsetUni);
220    const char* gain = uniformHandler->getUniformCStr(fGainUni);
221    const char* bias = uniformHandler->getUniformCStr(fBiasUni);
222
223    GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
224    fragBuilder->codeAppend("half4 sum = half4(0);");
225    fragBuilder->codeAppendf("float2 coord = %s - %s;", args.fSampleCoord, kernelOffset);
226
227    if (mce.fKernel.isSampled()) {
228        this->emitKernelBlock(args, {});
229    } else {
230        for (int x = 0; x < kernelWidth; ++x) {
231            for (int y = 0; y < kernelHeight; ++y) {
232                this->emitKernelBlock(args, SkIPoint::Make(x, y));
233            }
234        }
235    }
236
237    fragBuilder->codeAppendf("half4 color;");
238    if (mce.fConvolveAlpha) {
239        fragBuilder->codeAppendf("color = sum * %s + %s;", gain, bias);
240        fragBuilder->codeAppendf("color.a = saturate(color.a);");
241        fragBuilder->codeAppendf("color.rgb = clamp(color.rgb, 0.0, color.a);");
242    } else {
243        auto sample = this->invokeChild(0, args);
244        fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
245        fragBuilder->codeAppendf("color.a = c.a;");
246        fragBuilder->codeAppendf("color.rgb = saturate(sum.rgb * %s + %s);", gain, bias);
247        fragBuilder->codeAppendf("color.rgb *= color.a;");
248    }
249    fragBuilder->codeAppendf("return color;");
250}
251
252void GrMatrixConvolutionEffect::Impl::onSetData(const GrGLSLProgramDataManager& pdman,
253                                                const GrFragmentProcessor& processor) {
254    const GrMatrixConvolutionEffect& conv = processor.cast<GrMatrixConvolutionEffect>();
255    pdman.set2f(fKernelOffsetUni, conv.fKernelOffset.fX, conv.fKernelOffset.fY);
256    float totalGain = conv.fGain;
257    if (conv.fKernel.isSampled()) {
258        totalGain *= conv.fKernel.biasAndGain().fGain;
259        pdman.set1f(fKernelBiasUni, conv.fKernel.biasAndGain().fBias);
260    } else {
261        int kernelCount = conv.fKernel.size().area();
262        int arrayCount = (kernelCount + 3) / 4;
263        SkASSERT(4 * arrayCount >= kernelCount);
264        pdman.set4fv(fKernelUni, arrayCount, conv.fKernel.array().data());
265    }
266    pdman.set1f(fBiasUni, conv.fBias);
267    pdman.set1f(fGainUni, totalGain);
268}
269
270GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor> child,
271                                                     const KernelWrapper& kernel,
272                                                     std::unique_ptr<GrFragmentProcessor> kernelFP,
273                                                     SkScalar gain,
274                                                     SkScalar bias,
275                                                     const SkIPoint& kernelOffset,
276                                                     bool convolveAlpha)
277        // To advertise either the modulation or opaqueness optimizations we'd have to examine the
278        // parameters.
279        : INHERITED(kGrMatrixConvolutionEffect_ClassID, kNone_OptimizationFlags)
280        , fKernel(kernel)
281        , fGain(SkScalarToFloat(gain))
282        , fBias(SkScalarToFloat(bias) / 255.0f)
283        , fConvolveAlpha(convolveAlpha) {
284    this->registerChild(std::move(child), SkSL::SampleUsage::Explicit());
285    this->registerChild(std::move(kernelFP), SkSL::SampleUsage::Explicit());
286    fKernelOffset = {static_cast<float>(kernelOffset.x()),
287                     static_cast<float>(kernelOffset.y())};
288    this->setUsesSampleCoordsDirectly();
289}
290
291GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(const GrMatrixConvolutionEffect& that)
292        : INHERITED(that)
293        , fKernel(that.fKernel)
294        , fGain(that.fGain)
295        , fBias(that.fBias)
296        , fKernelOffset(that.fKernelOffset)
297        , fConvolveAlpha(that.fConvolveAlpha) {}
298
299std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::clone() const {
300    return std::unique_ptr<GrFragmentProcessor>(new GrMatrixConvolutionEffect(*this));
301}
302
303SkString GrMatrixConvolutionEffect::getShaderDfxInfo() const
304{
305    SkString format;
306    format.printf("ShaderDfx_GrMatrixConvolutionEffect_%d_%d_%d",
307        fKernel.size().width(), fKernel.size().height(), fConvolveAlpha);
308    return format;
309}
310
311void GrMatrixConvolutionEffect::onAddToKey(const GrShaderCaps& caps,
312                                           GrProcessorKeyBuilder* b) const {
313    SkASSERT(this->fKernel.size().width() <= 0x7FFF && this->fKernel.size().height() <= 0xFFFF);
314    uint32_t key = this->fKernel.size().width() << 16 | this->fKernel.size().height();
315    key |= fConvolveAlpha ? 1U << 31 : 0;
316    b->add32(key);
317}
318
319std::unique_ptr<GrFragmentProcessor::ProgramImpl>
320GrMatrixConvolutionEffect::onMakeProgramImpl() const {
321    return std::make_unique<Impl>();
322}
323
324bool GrMatrixConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
325    const GrMatrixConvolutionEffect& s = sBase.cast<GrMatrixConvolutionEffect>();
326    return fKernel == s.fKernel             &&
327           fGain == s.fGain                 &&
328           fBias == s.fBias                 &&
329           fKernelOffset == s.fKernelOffset &&
330           fConvolveAlpha == s.fConvolveAlpha;
331}
332
333std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrRecordingContext* context,
334                                                                     GrSurfaceProxyView srcView,
335                                                                     const SkIRect& srcBounds,
336                                                                     const SkISize& kernelSize,
337                                                                     const SkScalar* kernel,
338                                                                     SkScalar gain,
339                                                                     SkScalar bias,
340                                                                     const SkIPoint& kernelOffset,
341                                                                     GrSamplerState::WrapMode wm,
342                                                                     bool convolveAlpha,
343                                                                     const GrCaps& caps) {
344    auto [kernelWrapper, kernelFP] = KernelWrapper::Make(context, kernelSize, caps, kernel);
345    if (!kernelWrapper.isValid()) {
346        return nullptr;
347    }
348    GrSamplerState sampler(wm, GrSamplerState::Filter::kNearest);
349    auto child = GrTextureEffect::MakeSubset(std::move(srcView), kPremul_SkAlphaType, SkMatrix::I(),
350                                             sampler, SkRect::Make(srcBounds), caps);
351    return std::unique_ptr<GrFragmentProcessor>(
352            new GrMatrixConvolutionEffect(std::move(child), kernelWrapper, std::move(kernelFP),
353                                          gain, bias, kernelOffset, convolveAlpha));
354}
355
356GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrMatrixConvolutionEffect);
357
358#if GR_TEST_UTILS
359std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::TestCreate(GrProcessorTestData* d) {
360    auto [view, ct, at] = d->randomView();
361
362    static constexpr size_t kMaxTestKernelSize = 2 * kMaxUniformSize;
363    int width = d->fRandom->nextRangeU(1, kMaxTestKernelSize);
364    int height = d->fRandom->nextRangeU(1, kMaxTestKernelSize / width);
365    SkISize kernelSize = SkISize::Make(width, height);
366    std::unique_ptr<SkScalar[]> kernel(new SkScalar[width * height]);
367    for (int i = 0; i < width * height; i++) {
368        kernel.get()[i] = d->fRandom->nextSScalar1();
369    }
370    SkScalar gain = d->fRandom->nextSScalar1();
371    SkScalar bias = d->fRandom->nextSScalar1();
372
373    uint32_t kernalOffsetX = d->fRandom->nextRangeU(0, kernelSize.width());
374    uint32_t kernalOffsetY = d->fRandom->nextRangeU(0, kernelSize.height());
375    SkIPoint kernelOffset = SkIPoint::Make(kernalOffsetX, kernalOffsetY);
376
377    uint32_t boundsX = d->fRandom->nextRangeU(0, view.width());
378    uint32_t boundsY = d->fRandom->nextRangeU(0, view.height());
379    uint32_t boundsW = d->fRandom->nextRangeU(0, view.width());
380    uint32_t boundsH = d->fRandom->nextRangeU(0, view.height());
381    SkIRect bounds = SkIRect::MakeXYWH(boundsX, boundsY, boundsW, boundsH);
382
383    auto wm = static_cast<GrSamplerState::WrapMode>(
384            d->fRandom->nextULessThan(GrSamplerState::kWrapModeCount));
385    bool convolveAlpha = d->fRandom->nextBool();
386    return GrMatrixConvolutionEffect::Make(d->context(),
387                                           std::move(view),
388                                           bounds,
389                                           kernelSize,
390                                           kernel.get(),
391                                           gain,
392                                           bias,
393                                           kernelOffset,
394                                           wm,
395                                           convolveAlpha,
396                                           *d->caps());
397}
398#endif
399