1/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/core/SkGpuBlurUtils.h"
9
10#include "include/core/SkBitmap.h"
11#include "include/core/SkRect.h"
12#include "src/core/SkMathPriv.h"
13
14#if SK_SUPPORT_GPU
15#include "include/gpu/GrRecordingContext.h"
16#include "src/gpu/GrCaps.h"
17#include "src/gpu/GrRecordingContextPriv.h"
18#include "src/gpu/SkGr.h"
19#include "src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h"
20#include "src/gpu/effects/GrMatrixConvolutionEffect.h"
21#include "src/gpu/effects/GrTextureEffect.h"
22
23#if SK_GPU_V1
24#include "src/gpu/v1/SurfaceDrawContext_v1.h"
25
26using Direction = GrGaussianConvolutionFragmentProcessor::Direction;
27
28static void fill_in_2D_gaussian_kernel(float* kernel, int width, int height,
29                                       SkScalar sigmaX, SkScalar sigmaY) {
30    const float twoSigmaSqrdX = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaX));
31    const float twoSigmaSqrdY = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaY));
32
33    // SkGpuBlurUtils::GaussianBlur() should have detected the cases where a 2D blur
34    // degenerates to a 1D on X or Y, or to the identity.
35    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaX) &&
36             !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaY));
37    SkASSERT(!SkScalarNearlyZero(twoSigmaSqrdX) && !SkScalarNearlyZero(twoSigmaSqrdY));
38
39    const float sigmaXDenom = 1.0f / twoSigmaSqrdX;
40    const float sigmaYDenom = 1.0f / twoSigmaSqrdY;
41    const int xRadius = width / 2;
42    const int yRadius = height / 2;
43
44    float sum = 0.0f;
45    for (int x = 0; x < width; x++) {
46        float xTerm = static_cast<float>(x - xRadius);
47        xTerm = xTerm * xTerm * sigmaXDenom;
48        for (int y = 0; y < height; y++) {
49            float yTerm = static_cast<float>(y - yRadius);
50            float xyTerm = sk_float_exp(-(xTerm + yTerm * yTerm * sigmaYDenom));
51            // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
52            // is dropped here, since we renormalize the kernel below.
53            kernel[y * width + x] = xyTerm;
54            sum += xyTerm;
55        }
56    }
57    // Normalize the kernel
58    float scale = 1.0f / sum;
59    for (int i = 0; i < width * height; ++i) {
60        kernel[i] *= scale;
61    }
62}
63
64/**
65 * Draws 'dstRect' into 'surfaceFillContext' evaluating a 1D Gaussian over 'srcView'. The src rect
66 * is 'dstRect' offset by 'dstToSrcOffset'. 'mode' and 'bounds' are applied to the src coords.
67 */
68static void convolve_gaussian_1d(skgpu::SurfaceFillContext* sfc,
69                                 GrSurfaceProxyView srcView,
70                                 const SkIRect srcSubset,
71                                 SkIVector dstToSrcOffset,
72                                 const SkIRect& dstRect,
73                                 SkAlphaType srcAlphaType,
74                                 Direction direction,
75                                 int radius,
76                                 float sigma,
77                                 SkTileMode mode) {
78    SkASSERT(radius && !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma));
79    auto wm = SkTileModeToWrapMode(mode);
80    auto srcRect = dstRect.makeOffset(dstToSrcOffset);
81    // NOTE: This could just be GrMatrixConvolutionEffect with one of the dimensions set to 1
82    // and the appropriate kernel already computed, but there's value in keeping the shader simpler.
83    // TODO(michaelludwig): Is this true? If not, is the shader key simplicity worth it two have
84    // two convolution effects?
85    std::unique_ptr<GrFragmentProcessor> conv =
86            GrGaussianConvolutionFragmentProcessor::Make(std::move(srcView),
87                                                         srcAlphaType,
88                                                         direction,
89                                                         radius,
90                                                         sigma,
91                                                         wm,
92                                                         srcSubset,
93                                                         &srcRect,
94                                                         *sfc->caps());
95    sfc->fillRectToRectWithFP(srcRect, dstRect, std::move(conv));
96}
97
98static std::unique_ptr<skgpu::v1::SurfaceDrawContext> convolve_gaussian_2d(
99        GrRecordingContext* rContext,
100        GrSurfaceProxyView srcView,
101        GrColorType srcColorType,
102        const SkIRect& srcBounds,
103        const SkIRect& dstBounds,
104        int radiusX,
105        int radiusY,
106        SkScalar sigmaX,
107        SkScalar sigmaY,
108        SkTileMode mode,
109        sk_sp<SkColorSpace> finalCS,
110        SkBackingFit dstFit) {
111    SkASSERT(radiusX && radiusY);
112    SkASSERT(!SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaX) &&
113             !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigmaY));
114    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
115    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
116    auto sdc = skgpu::v1::SurfaceDrawContext::Make(
117            rContext, srcColorType, std::move(finalCS), dstFit, dstBounds.size(), SkSurfaceProps(),
118            1, GrMipmapped::kNo, srcView.proxy()->isProtected(), srcView.origin());
119    if (!sdc) {
120        return nullptr;
121    }
122
123    SkISize size = SkISize::Make(SkGpuBlurUtils::KernelWidth(radiusX),
124                                 SkGpuBlurUtils::KernelWidth(radiusY));
125    SkIPoint kernelOffset = SkIPoint::Make(radiusX, radiusY);
126    GrPaint paint;
127    auto wm = SkTileModeToWrapMode(mode);
128
129    // GaussianBlur() should have downsampled the request until we can handle the 2D blur with
130    // just a uniform array.
131    SkASSERT(size.area() <= GrMatrixConvolutionEffect::kMaxUniformSize);
132    float kernel[GrMatrixConvolutionEffect::kMaxUniformSize];
133    fill_in_2D_gaussian_kernel(kernel, size.width(), size.height(), sigmaX, sigmaY);
134    auto conv = GrMatrixConvolutionEffect::Make(rContext, std::move(srcView), srcBounds,
135                                                size, kernel, 1.0f, 0.0f, kernelOffset, wm, true,
136                                                *sdc->caps());
137
138    paint.setColorFragmentProcessor(std::move(conv));
139    paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
140
141    // 'dstBounds' is actually in 'srcView' proxy space. It represents the blurred area from src
142    // space that we want to capture in the new RTC at {0, 0}. Hence, we use its size as the rect to
143    // draw and it directly as the local rect.
144    sdc->fillRectToRect(nullptr, std::move(paint), GrAA::kNo, SkMatrix::I(),
145                        SkRect::Make(dstBounds.size()), SkRect::Make(dstBounds));
146
147    return sdc;
148}
149
150static std::unique_ptr<skgpu::v1::SurfaceDrawContext> convolve_gaussian(
151        GrRecordingContext* rContext,
152        GrSurfaceProxyView srcView,
153        GrColorType srcColorType,
154        SkAlphaType srcAlphaType,
155        SkIRect srcBounds,
156        SkIRect dstBounds,
157        Direction direction,
158        int radius,
159        float sigma,
160        SkTileMode mode,
161        sk_sp<SkColorSpace> finalCS,
162        SkBackingFit fit) {
163    using namespace SkGpuBlurUtils;
164    SkASSERT(radius > 0 && !SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma));
165    // Logically we're creating an infinite blur of 'srcBounds' of 'srcView' with 'mode' tiling
166    // and then capturing the 'dstBounds' portion in a new RTC where the top left of 'dstBounds' is
167    // at {0, 0} in the new RTC.
168    //
169    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
170    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
171    auto dstSDC = skgpu::v1::SurfaceDrawContext::Make(
172            rContext, srcColorType, std::move(finalCS), fit, dstBounds.size(), SkSurfaceProps(), 1,
173            GrMipmapped::kNo, srcView.proxy()->isProtected(), srcView.origin());
174    if (!dstSDC) {
175        return nullptr;
176    }
177    // This represents the translation from 'dstSurfaceDrawContext' coords to 'srcView' coords.
178    auto rtcToSrcOffset = dstBounds.topLeft();
179
180    auto srcBackingBounds = SkIRect::MakeSize(srcView.proxy()->backingStoreDimensions());
181    // We've implemented splitting the dst bounds up into areas that do and do not need to
182    // use shader based tiling but only for some modes...
183    bool canSplit = mode == SkTileMode::kDecal || mode == SkTileMode::kClamp;
184    // ...but it's not worth doing the splitting if we'll get HW tiling instead of shader tiling.
185    bool canHWTile =
186            srcBounds.contains(srcBackingBounds)         &&
187            !rContext->priv().caps()->reducedShaderMode() && // this mode always uses shader tiling
188            !(mode == SkTileMode::kDecal && !rContext->priv().caps()->clampToBorderSupport());
189    if (!canSplit || canHWTile) {
190        auto dstRect = SkIRect::MakeSize(dstBounds.size());
191        convolve_gaussian_1d(dstSDC.get(), std::move(srcView), srcBounds,
192                             rtcToSrcOffset, dstRect, srcAlphaType, direction, radius, sigma, mode);
193        return dstSDC;
194    }
195
196    // 'left' and 'right' are the sub rects of 'srcBounds' where 'mode' must be enforced.
197    // 'mid' is the area where we can ignore the mode because the kernel does not reach to the
198    // edge of 'srcBounds'.
199    SkIRect mid, left, right;
200    // 'top' and 'bottom' are areas of 'dstBounds' that are entirely above/below 'srcBounds'.
201    // These are areas that we can simply clear in the dst in kDecal mode. If 'srcBounds'
202    // straddles the top edge of 'dstBounds' then 'top' will be inverted and we will skip
203    // processing for the rect. Similar for 'bottom'. The positional/directional labels above refer
204    // to the Direction::kX case and one should think of these as 'left' and 'right' for
205    // Direction::kY.
206    SkIRect top, bottom;
207    if (Direction::kX == direction) {
208        top    = {dstBounds.left(), dstBounds.top()   , dstBounds.right(), srcBounds.top()   };
209        bottom = {dstBounds.left(), srcBounds.bottom(), dstBounds.right(), dstBounds.bottom()};
210
211        // Inset for sub-rect of 'srcBounds' where the x-dir kernel doesn't reach the edges, clipped
212        // vertically to dstBounds.
213        int midA = std::max(srcBounds.top()   , dstBounds.top()   );
214        int midB = std::min(srcBounds.bottom(), dstBounds.bottom());
215        mid = {srcBounds.left() + radius, midA, srcBounds.right() - radius, midB};
216        if (mid.isEmpty()) {
217            // There is no middle where the bounds can be ignored. Make the left span the whole
218            // width of dst and we will not draw mid or right.
219            left = {dstBounds.left(), mid.top(), dstBounds.right(), mid.bottom()};
220        } else {
221            left  = {dstBounds.left(), mid.top(), mid.left()       , mid.bottom()};
222            right = {mid.right(),      mid.top(), dstBounds.right(), mid.bottom()};
223        }
224    } else {
225        // This is the same as the x direction code if you turn your head 90 degrees CCW. Swap x and
226        // y and swap top/bottom with left/right.
227        top    = {dstBounds.left(),  dstBounds.top(), srcBounds.left() , dstBounds.bottom()};
228        bottom = {srcBounds.right(), dstBounds.top(), dstBounds.right(), dstBounds.bottom()};
229
230        int midA = std::max(srcBounds.left() , dstBounds.left() );
231        int midB = std::min(srcBounds.right(), dstBounds.right());
232        mid = {midA, srcBounds.top() + radius, midB, srcBounds.bottom() - radius};
233
234        if (mid.isEmpty()) {
235            left = {mid.left(), dstBounds.top(), mid.right(), dstBounds.bottom()};
236        } else {
237            left  = {mid.left(), dstBounds.top(), mid.right(), mid.top()         };
238            right = {mid.left(), mid.bottom()   , mid.right(), dstBounds.bottom()};
239        }
240    }
241
242    auto convolve = [&](SkIRect rect) {
243        // Transform rect into the render target's coord system.
244        rect.offset(-rtcToSrcOffset);
245        convolve_gaussian_1d(dstSDC.get(), srcView, srcBounds, rtcToSrcOffset, rect,
246                             srcAlphaType, direction, radius, sigma, mode);
247    };
248    auto clear = [&](SkIRect rect) {
249        // Transform rect into the render target's coord system.
250        rect.offset(-rtcToSrcOffset);
251        dstSDC->clearAtLeast(rect, SK_PMColor4fTRANSPARENT);
252    };
253
254    // Doing mid separately will cause two draws to occur (left and right batch together). At
255    // small sizes of mid it is worse to issue more draws than to just execute the slightly
256    // more complicated shader that implements the tile mode across mid. This threshold is
257    // very arbitrary right now. It is believed that a 21x44 mid on a Moto G4 is a significant
258    // regression compared to doing one draw but it has not been locally evaluated or tuned.
259    // The optimal cutoff is likely to vary by GPU.
260    if (!mid.isEmpty() && mid.width()*mid.height() < 256*256) {
261        left.join(mid);
262        left.join(right);
263        mid = SkIRect::MakeEmpty();
264        right = SkIRect::MakeEmpty();
265        // It's unknown whether for kDecal it'd be better to expand the draw rather than a draw and
266        // up to two clears.
267        if (mode == SkTileMode::kClamp) {
268            left.join(top);
269            left.join(bottom);
270            top = SkIRect::MakeEmpty();
271            bottom = SkIRect::MakeEmpty();
272        }
273    }
274
275    if (!top.isEmpty()) {
276        if (mode == SkTileMode::kDecal) {
277            clear(top);
278        } else {
279            convolve(top);
280        }
281    }
282
283    if (!bottom.isEmpty()) {
284        if (mode == SkTileMode::kDecal) {
285            clear(bottom);
286        } else {
287            convolve(bottom);
288        }
289    }
290
291    if (mid.isEmpty()) {
292        convolve(left);
293    } else {
294        convolve(left);
295        convolve(right);
296        convolve(mid);
297    }
298    return dstSDC;
299}
300
301// Expand the contents of 'src' to fit in 'dstSize'. At this point, we are expanding an intermediate
302// image, so there's no need to account for a proxy offset from the original input.
303static std::unique_ptr<skgpu::v1::SurfaceDrawContext> reexpand(
304        GrRecordingContext* rContext,
305        std::unique_ptr<skgpu::SurfaceContext> src,
306        const SkRect& srcBounds,
307        SkISize dstSize,
308        sk_sp<SkColorSpace> colorSpace,
309        SkBackingFit fit) {
310    GrSurfaceProxyView srcView = src->readSurfaceView();
311    if (!srcView.asTextureProxy()) {
312        return nullptr;
313    }
314
315    GrColorType srcColorType = src->colorInfo().colorType();
316    SkAlphaType srcAlphaType = src->colorInfo().alphaType();
317
318    src.reset(); // no longer needed
319
320    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
321    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
322    auto dstSDC = skgpu::v1::SurfaceDrawContext::Make(
323            rContext, srcColorType, std::move(colorSpace), fit, dstSize, SkSurfaceProps(), 1,
324            GrMipmapped::kNo, srcView.proxy()->isProtected(), srcView.origin());
325    if (!dstSDC) {
326        return nullptr;
327    }
328
329    GrPaint paint;
330    auto fp = GrTextureEffect::MakeSubset(std::move(srcView), srcAlphaType, SkMatrix::I(),
331                                          GrSamplerState::Filter::kLinear, srcBounds, srcBounds,
332                                          *rContext->priv().caps());
333    paint.setColorFragmentProcessor(std::move(fp));
334    paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
335
336    dstSDC->fillRectToRect(nullptr, std::move(paint), GrAA::kNo, SkMatrix::I(),
337                           SkRect::Make(dstSize), srcBounds);
338
339    return dstSDC;
340}
341
342static std::unique_ptr<skgpu::v1::SurfaceDrawContext> two_pass_gaussian(
343        GrRecordingContext* rContext,
344        GrSurfaceProxyView srcView,
345        GrColorType srcColorType,
346        SkAlphaType srcAlphaType,
347        sk_sp<SkColorSpace> colorSpace,
348        SkIRect srcBounds,
349        SkIRect dstBounds,
350        float sigmaX,
351        float sigmaY,
352        int radiusX,
353        int radiusY,
354        SkTileMode mode,
355        SkBackingFit fit) {
356    SkASSERT(radiusX || radiusY);
357    std::unique_ptr<skgpu::v1::SurfaceDrawContext> dstSDC;
358    if (radiusX > 0) {
359        SkBackingFit xFit = radiusY > 0 ? SkBackingFit::kApprox : fit;
360        // Expand the dstBounds vertically to produce necessary content for the y-pass. Then we will
361        // clip these in a tile-mode dependent way to ensure the tile-mode gets implemented
362        // correctly. However, if we're not going to do a y-pass then we must use the original
363        // dstBounds without clipping to produce the correct output size.
364        SkIRect xPassDstBounds = dstBounds;
365        if (radiusY) {
366            xPassDstBounds.outset(0, radiusY);
367            if (mode == SkTileMode::kRepeat || mode == SkTileMode::kMirror) {
368                int srcH = srcBounds.height();
369                int srcTop = srcBounds.top();
370                if (mode == SkTileMode::kMirror) {
371                    srcTop -= srcH;
372                    srcH *= 2;
373                }
374
375                float floatH = srcH;
376                // First row above the dst rect where we should restart the tile mode.
377                int n = sk_float_floor2int_no_saturate((xPassDstBounds.top() - srcTop)/floatH);
378                int topClip = srcTop + n*srcH;
379
380                // First row above below the dst rect where we should restart the tile mode.
381                n = sk_float_ceil2int_no_saturate(
382                        (xPassDstBounds.bottom() - srcBounds.bottom())/floatH);
383                int bottomClip = srcBounds.bottom() + n*srcH;
384
385                xPassDstBounds.fTop    = std::max(xPassDstBounds.top(),    topClip);
386                xPassDstBounds.fBottom = std::min(xPassDstBounds.bottom(), bottomClip);
387            } else {
388                if (xPassDstBounds.fBottom <= srcBounds.top()) {
389                    if (mode == SkTileMode::kDecal) {
390                        return nullptr;
391                    }
392                    xPassDstBounds.fTop = srcBounds.top();
393                    xPassDstBounds.fBottom = xPassDstBounds.fTop + 1;
394                } else if (xPassDstBounds.fTop >= srcBounds.bottom()) {
395                    if (mode == SkTileMode::kDecal) {
396                        return nullptr;
397                    }
398                    xPassDstBounds.fBottom = srcBounds.bottom();
399                    xPassDstBounds.fTop = xPassDstBounds.fBottom - 1;
400                } else {
401                    xPassDstBounds.fTop    = std::max(xPassDstBounds.fTop,    srcBounds.top());
402                    xPassDstBounds.fBottom = std::min(xPassDstBounds.fBottom, srcBounds.bottom());
403                }
404                int leftSrcEdge  = srcBounds.fLeft  - radiusX ;
405                int rightSrcEdge = srcBounds.fRight + radiusX;
406                if (mode == SkTileMode::kClamp) {
407                    // In clamp the column just outside the src bounds has the same value as the
408                    // column just inside, unlike decal.
409                    leftSrcEdge  += 1;
410                    rightSrcEdge -= 1;
411                }
412                if (xPassDstBounds.fRight <= leftSrcEdge) {
413                    if (mode == SkTileMode::kDecal) {
414                        return nullptr;
415                    }
416                    xPassDstBounds.fLeft = xPassDstBounds.fRight - 1;
417                } else {
418                    xPassDstBounds.fLeft = std::max(xPassDstBounds.fLeft, leftSrcEdge);
419                }
420                if (xPassDstBounds.fLeft >= rightSrcEdge) {
421                    if (mode == SkTileMode::kDecal) {
422                        return nullptr;
423                    }
424                    xPassDstBounds.fRight = xPassDstBounds.fLeft + 1;
425                } else {
426                    xPassDstBounds.fRight = std::min(xPassDstBounds.fRight, rightSrcEdge);
427                }
428            }
429        }
430        dstSDC = convolve_gaussian(
431                rContext, std::move(srcView), srcColorType, srcAlphaType, srcBounds, xPassDstBounds,
432                Direction::kX, radiusX, sigmaX, mode, colorSpace, xFit);
433        if (!dstSDC) {
434            return nullptr;
435        }
436        srcView = dstSDC->readSurfaceView();
437        SkIVector newDstBoundsOffset = dstBounds.topLeft() - xPassDstBounds.topLeft();
438        dstBounds = SkIRect::MakeSize(dstBounds.size()).makeOffset(newDstBoundsOffset);
439        srcBounds = SkIRect::MakeSize(xPassDstBounds.size());
440    }
441
442    if (!radiusY) {
443        return dstSDC;
444    }
445
446    return convolve_gaussian(rContext, std::move(srcView), srcColorType, srcAlphaType, srcBounds,
447                             dstBounds, Direction::kY, radiusY, sigmaY, mode, colorSpace, fit);
448}
449#endif // SK_GPU_V1
450
451namespace SkGpuBlurUtils {
452
453#if SK_GPU_V1
454std::unique_ptr<skgpu::v1::SurfaceDrawContext> GaussianBlur(GrRecordingContext* rContext,
455                                                            GrSurfaceProxyView srcView,
456                                                            GrColorType srcColorType,
457                                                            SkAlphaType srcAlphaType,
458                                                            sk_sp<SkColorSpace> colorSpace,
459                                                            SkIRect dstBounds,
460                                                            SkIRect srcBounds,
461                                                            float sigmaX,
462                                                            float sigmaY,
463                                                            SkTileMode mode,
464                                                            SkBackingFit fit) {
465    SkASSERT(rContext);
466    TRACE_EVENT2("skia.gpu", "GaussianBlur", "sigmaX", sigmaX, "sigmaY", sigmaY);
467
468    if (!srcView.asTextureProxy()) {
469        return nullptr;
470    }
471
472    int maxRenderTargetSize = rContext->priv().caps()->maxRenderTargetSize();
473    if (dstBounds.width() > maxRenderTargetSize || dstBounds.height() > maxRenderTargetSize) {
474        return nullptr;
475    }
476
477    int radiusX = SigmaRadius(sigmaX);
478    int radiusY = SigmaRadius(sigmaY);
479    // Attempt to reduce the srcBounds in order to detect that we can set the sigmas to zero or
480    // to reduce the amount of work to rescale the source if sigmas are large. TODO: Could consider
481    // how to minimize the required source bounds for repeat/mirror modes.
482    if (mode == SkTileMode::kClamp || mode == SkTileMode::kDecal) {
483        SkIRect reach = dstBounds.makeOutset(radiusX, radiusY);
484        SkIRect intersection;
485        if (!intersection.intersect(reach, srcBounds)) {
486            if (mode == SkTileMode::kDecal) {
487                return nullptr;
488            } else {
489                if (reach.fLeft >= srcBounds.fRight) {
490                    srcBounds.fLeft = srcBounds.fRight - 1;
491                } else if (reach.fRight <= srcBounds.fLeft) {
492                    srcBounds.fRight = srcBounds.fLeft + 1;
493                }
494                if (reach.fTop >= srcBounds.fBottom) {
495                    srcBounds.fTop = srcBounds.fBottom - 1;
496                } else if (reach.fBottom <= srcBounds.fTop) {
497                    srcBounds.fBottom = srcBounds.fTop + 1;
498                }
499            }
500        } else {
501            srcBounds = intersection;
502        }
503    }
504
505    if (mode != SkTileMode::kDecal) {
506        // All non-decal tile modes are equivalent for one pixel width/height src and amount to a
507        // single color value repeated at each column/row. Applying the normalized kernel to that
508        // column/row yields that same color. So no blurring is necessary.
509        if (srcBounds.width() == 1) {
510            sigmaX = 0.f;
511            radiusX = 0;
512        }
513        if (srcBounds.height() == 1) {
514            sigmaY = 0.f;
515            radiusY = 0;
516        }
517    }
518
519    // If we determined that there is no blurring necessary in either direction then just do a
520    // a draw that applies the tile mode.
521    if (!radiusX && !radiusY) {
522        // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
523        // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
524        auto result = skgpu::v1::SurfaceDrawContext::Make(rContext,
525                                                          srcColorType,
526                                                          std::move(colorSpace),
527                                                          fit,
528                                                          dstBounds.size(),
529                                                          SkSurfaceProps(),
530                                                          1,
531                                                          GrMipmapped::kNo,
532                                                          srcView.proxy()->isProtected(),
533                                                          srcView.origin());
534        if (!result) {
535            return nullptr;
536        }
537        GrSamplerState sampler(SkTileModeToWrapMode(mode), GrSamplerState::Filter::kNearest);
538        auto fp = GrTextureEffect::MakeSubset(std::move(srcView),
539                                              srcAlphaType,
540                                              SkMatrix::I(),
541                                              sampler,
542                                              SkRect::Make(srcBounds),
543                                              SkRect::Make(dstBounds),
544                                              *rContext->priv().caps());
545        result->fillRectToRectWithFP(dstBounds, SkIRect::MakeSize(dstBounds.size()), std::move(fp));
546        return result;
547    }
548
549    if (sigmaX <= kMaxSigma && sigmaY <= kMaxSigma) {
550        SkASSERT(radiusX <= GrGaussianConvolutionFragmentProcessor::kMaxKernelRadius);
551        SkASSERT(radiusY <= GrGaussianConvolutionFragmentProcessor::kMaxKernelRadius);
552        // For really small blurs (certainly no wider than 5x5 on desktop GPUs) it is faster to just
553        // launch a single non separable kernel vs two launches.
554        const int kernelSize = (2 * radiusX + 1) * (2 * radiusY + 1);
555        if (radiusX > 0 && radiusY > 0 &&
556            kernelSize <= GrMatrixConvolutionEffect::kMaxUniformSize &&
557            !rContext->priv().caps()->reducedShaderMode()) {
558            // Apply the proxy offset to src bounds and offset directly
559            return convolve_gaussian_2d(rContext, std::move(srcView), srcColorType, srcBounds,
560                                        dstBounds, radiusX, radiusY, sigmaX, sigmaY, mode,
561                                        std::move(colorSpace), fit);
562        }
563        // This will automatically degenerate into a single pass of X or Y if only one of the
564        // radii are non-zero.
565        return two_pass_gaussian(rContext, std::move(srcView), srcColorType, srcAlphaType,
566                                 std::move(colorSpace), srcBounds, dstBounds, sigmaX, sigmaY,
567                                 radiusX, radiusY, mode, fit);
568    }
569
570    GrColorInfo colorInfo(srcColorType, srcAlphaType, colorSpace);
571    auto srcCtx = rContext->priv().makeSC(srcView, colorInfo);
572    SkASSERT(srcCtx);
573
574    float scaleX = sigmaX > kMaxSigma ? kMaxSigma/sigmaX : 1.f;
575    float scaleY = sigmaY > kMaxSigma ? kMaxSigma/sigmaY : 1.f;
576    // We round down here so that when we recalculate sigmas we know they will be below
577    // kMaxSigma (but clamp to 1 do we don't have an empty texture).
578    SkISize rescaledSize = {std::max(sk_float_floor2int(srcBounds.width() *scaleX), 1),
579                            std::max(sk_float_floor2int(srcBounds.height()*scaleY), 1)};
580    // Compute the sigmas using the actual scale factors used once we integerized the
581    // rescaledSize.
582    scaleX = static_cast<float>(rescaledSize.width()) /srcBounds.width();
583    scaleY = static_cast<float>(rescaledSize.height())/srcBounds.height();
584    sigmaX *= scaleX;
585    sigmaY *= scaleY;
586
587    // When we are in clamp mode any artifacts in the edge pixels due to downscaling may be
588    // exacerbated because of the tile mode. The particularly egregious case is when the original
589    // image has transparent black around the edges and the downscaling pulls in some non-zero
590    // values from the interior. Ultimately it'd be better for performance if the calling code could
591    // give us extra context around the blur to account for this. We don't currently have a good way
592    // to communicate this up stack. So we leave a 1 pixel border around the rescaled src bounds.
593    // We populate the top 1 pixel tall row of this border by rescaling the top row of the original
594    // source bounds into it. Because this is only rescaling in x (i.e. rescaling a 1 pixel high
595    // row into a shorter but still 1 pixel high row) we won't read any interior values. And similar
596    // for the other three borders. We'll adjust the source/dest bounds rescaled blur so that this
597    // border of extra pixels is used as the edge pixels for clamp mode but the dest bounds
598    // corresponds only to the pixels inside the border (the normally rescaled pixels inside this
599    // border).
600    // Moreover, if we clamped the rescaled size to 1 column or row then we still have a sigma
601    // that is greater than kMaxSigma. By using a pad and making the src 3 wide/tall instead of
602    // 1 we can recurse again and do another downscale. Since mirror and repeat modes are trivial
603    // for a single col/row we only add padding based on sigma exceeding kMaxSigma for decal.
604    int padX = mode == SkTileMode::kClamp ||
605               (mode == SkTileMode::kDecal && sigmaX > kMaxSigma) ? 1 : 0;
606    int padY = mode == SkTileMode::kClamp ||
607               (mode == SkTileMode::kDecal && sigmaY > kMaxSigma) ? 1 : 0;
608    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
609    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
610    auto rescaledSDC = skgpu::v1::SurfaceDrawContext::Make(
611            srcCtx->recordingContext(),
612            colorInfo.colorType(),
613            colorInfo.refColorSpace(),
614            SkBackingFit::kApprox,
615            {rescaledSize.width() + 2*padX, rescaledSize.height() + 2*padY},
616            SkSurfaceProps(),
617            1,
618            GrMipmapped::kNo,
619            srcCtx->asSurfaceProxy()->isProtected(),
620            srcCtx->origin());
621    if (!rescaledSDC) {
622        return nullptr;
623    }
624    if ((padX || padY) && mode == SkTileMode::kDecal) {
625        rescaledSDC->clear(SkPMColor4f{0, 0, 0, 0});
626    }
627    if (!srcCtx->rescaleInto(rescaledSDC.get(),
628                             SkIRect::MakeSize(rescaledSize).makeOffset(padX, padY),
629                             srcBounds,
630                             SkSurface::RescaleGamma::kSrc,
631                             SkSurface::RescaleMode::kRepeatedLinear)) {
632        return nullptr;
633    }
634    if (mode == SkTileMode::kClamp) {
635        SkASSERT(padX == 1 && padY == 1);
636        // Rather than run a potentially multi-pass rescaler on single rows/columns we just do a
637        // single bilerp draw. If we find this quality unacceptable we should think more about how
638        // to rescale these with better quality but without 4 separate multi-pass downscales.
639        auto cheapDownscale = [&](SkIRect dstRect, SkIRect srcRect) {
640            rescaledSDC->drawTexture(nullptr,
641                                     srcCtx->readSurfaceView(),
642                                     srcAlphaType,
643                                     GrSamplerState::Filter::kLinear,
644                                     GrSamplerState::MipmapMode::kNone,
645                                     SkBlendMode::kSrc,
646                                     SK_PMColor4fWHITE,
647                                     SkRect::Make(srcRect),
648                                     SkRect::Make(dstRect),
649                                     GrAA::kNo,
650                                     GrQuadAAFlags::kNone,
651                                     SkCanvas::SrcRectConstraint::kFast_SrcRectConstraint,
652                                     SkMatrix::I(),
653                                     nullptr);
654        };
655        auto [dw, dh] = rescaledSize;
656        // The are the src rows and columns from the source that we will scale into the dst padding.
657        float sLCol = srcBounds.left();
658        float sTRow = srcBounds.top();
659        float sRCol = srcBounds.right() - 1;
660        float sBRow = srcBounds.bottom() - 1;
661
662        int sx = srcBounds.left();
663        int sy = srcBounds.top();
664        int sw = srcBounds.width();
665        int sh = srcBounds.height();
666
667        // Downscale the edges from the original source. These draws should batch together (and with
668        // the above interior rescaling when it is a single pass).
669        cheapDownscale(SkIRect::MakeXYWH(     0,      1,  1, dh),
670                       SkIRect::MakeXYWH( sLCol,     sy,  1, sh));
671        cheapDownscale(SkIRect::MakeXYWH(     1,      0, dw,  1),
672                       SkIRect::MakeXYWH(    sx,  sTRow, sw,  1));
673        cheapDownscale(SkIRect::MakeXYWH(dw + 1,      1,  1, dh),
674                       SkIRect::MakeXYWH( sRCol,     sy,  1, sh));
675        cheapDownscale(SkIRect::MakeXYWH(     1, dh + 1, dw,  1),
676                       SkIRect::MakeXYWH(    sx,  sBRow, sw,  1));
677
678        // Copy the corners from the original source. These would batch with the edges except that
679        // at time of writing we recognize these can use kNearest and downgrade the filter. So they
680        // batch with each other but not the edge draws.
681        cheapDownscale(SkIRect::MakeXYWH(    0,     0,  1, 1),
682                       SkIRect::MakeXYWH(sLCol, sTRow,  1, 1));
683        cheapDownscale(SkIRect::MakeXYWH(dw + 1,     0, 1, 1),
684                       SkIRect::MakeXYWH(sRCol, sTRow,  1, 1));
685        cheapDownscale(SkIRect::MakeXYWH(dw + 1,dh + 1, 1, 1),
686                       SkIRect::MakeXYWH(sRCol, sBRow,  1, 1));
687        cheapDownscale(SkIRect::MakeXYWH(    0, dh + 1, 1, 1),
688                       SkIRect::MakeXYWH(sLCol, sBRow,  1, 1));
689    }
690    srcView = rescaledSDC->readSurfaceView();
691    // Drop the contexts so we don't hold the proxies longer than necessary.
692    rescaledSDC.reset();
693    srcCtx.reset();
694
695    // Compute the dst bounds in the scaled down space. First move the origin to be at the top
696    // left since we trimmed off everything above and to the left of the original src bounds during
697    // the rescale.
698    SkRect scaledDstBounds = SkRect::Make(dstBounds.makeOffset(-srcBounds.topLeft()));
699    scaledDstBounds.fLeft   *= scaleX;
700    scaledDstBounds.fTop    *= scaleY;
701    scaledDstBounds.fRight  *= scaleX;
702    scaledDstBounds.fBottom *= scaleY;
703    // Account for padding in our rescaled src, if any.
704    scaledDstBounds.offset(padX, padY);
705    // Turn the scaled down dst bounds into an integer pixel rect.
706    auto scaledDstBoundsI = scaledDstBounds.roundOut();
707
708    SkIRect scaledSrcBounds = SkIRect::MakeSize(srcView.dimensions());
709    auto sdc = GaussianBlur(rContext,
710                            std::move(srcView),
711                            srcColorType,
712                            srcAlphaType,
713                            colorSpace,
714                            scaledDstBoundsI,
715                            scaledSrcBounds,
716                            sigmaX,
717                            sigmaY,
718                            mode,
719                            fit);
720    if (!sdc) {
721        return nullptr;
722    }
723    // We rounded out the integer scaled dst bounds. Select the fractional dst bounds from the
724    // integer dimension blurred result when we scale back up.
725    scaledDstBounds.offset(-scaledDstBoundsI.left(), -scaledDstBoundsI.top());
726    return reexpand(rContext, std::move(sdc), scaledDstBounds, dstBounds.size(),
727                    std::move(colorSpace), fit);
728}
729#endif // SK_GPU_V1
730
731bool ComputeBlurredRRectParams(const SkRRect& srcRRect, const SkRRect& devRRect,
732                               SkScalar sigma, SkScalar xformedSigma,
733                               SkRRect* rrectToDraw,
734                               SkISize* widthHeight,
735                               SkScalar rectXs[kBlurRRectMaxDivisions],
736                               SkScalar rectYs[kBlurRRectMaxDivisions],
737                               SkScalar texXs[kBlurRRectMaxDivisions],
738                               SkScalar texYs[kBlurRRectMaxDivisions]) {
739    unsigned int devBlurRadius = 3*SkScalarCeilToInt(xformedSigma-1/6.0f);
740    SkScalar srcBlurRadius = 3.0f * sigma;
741
742    const SkRect& devOrig = devRRect.getBounds();
743    const SkVector& devRadiiUL = devRRect.radii(SkRRect::kUpperLeft_Corner);
744    const SkVector& devRadiiUR = devRRect.radii(SkRRect::kUpperRight_Corner);
745    const SkVector& devRadiiLR = devRRect.radii(SkRRect::kLowerRight_Corner);
746    const SkVector& devRadiiLL = devRRect.radii(SkRRect::kLowerLeft_Corner);
747
748    const int devLeft  = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fX, devRadiiLL.fX));
749    const int devTop   = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fY, devRadiiUR.fY));
750    const int devRight = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUR.fX, devRadiiLR.fX));
751    const int devBot   = SkScalarCeilToInt(std::max<SkScalar>(devRadiiLL.fY, devRadiiLR.fY));
752
753    // This is a conservative check for nine-patchability
754    if (devOrig.fLeft + devLeft + devBlurRadius >= devOrig.fRight  - devRight - devBlurRadius ||
755        devOrig.fTop  + devTop  + devBlurRadius >= devOrig.fBottom - devBot   - devBlurRadius) {
756        return false;
757    }
758
759    const SkVector& srcRadiiUL = srcRRect.radii(SkRRect::kUpperLeft_Corner);
760    const SkVector& srcRadiiUR = srcRRect.radii(SkRRect::kUpperRight_Corner);
761    const SkVector& srcRadiiLR = srcRRect.radii(SkRRect::kLowerRight_Corner);
762    const SkVector& srcRadiiLL = srcRRect.radii(SkRRect::kLowerLeft_Corner);
763
764    const SkScalar srcLeft  = std::max<SkScalar>(srcRadiiUL.fX, srcRadiiLL.fX);
765    const SkScalar srcTop   = std::max<SkScalar>(srcRadiiUL.fY, srcRadiiUR.fY);
766    const SkScalar srcRight = std::max<SkScalar>(srcRadiiUR.fX, srcRadiiLR.fX);
767    const SkScalar srcBot   = std::max<SkScalar>(srcRadiiLL.fY, srcRadiiLR.fY);
768
769    int newRRWidth = 2*devBlurRadius + devLeft + devRight + 1;
770    int newRRHeight = 2*devBlurRadius + devTop + devBot + 1;
771    widthHeight->fWidth = newRRWidth + 2 * devBlurRadius;
772    widthHeight->fHeight = newRRHeight + 2 * devBlurRadius;
773
774    const SkRect srcProxyRect = srcRRect.getBounds().makeOutset(srcBlurRadius, srcBlurRadius);
775
776    rectXs[0] = srcProxyRect.fLeft;
777    rectXs[1] = srcProxyRect.fLeft + 2*srcBlurRadius + srcLeft;
778    rectXs[2] = srcProxyRect.fRight - 2*srcBlurRadius - srcRight;
779    rectXs[3] = srcProxyRect.fRight;
780
781    rectYs[0] = srcProxyRect.fTop;
782    rectYs[1] = srcProxyRect.fTop + 2*srcBlurRadius + srcTop;
783    rectYs[2] = srcProxyRect.fBottom - 2*srcBlurRadius - srcBot;
784    rectYs[3] = srcProxyRect.fBottom;
785
786    texXs[0] = 0.0f;
787    texXs[1] = 2.0f*devBlurRadius + devLeft;
788    texXs[2] = 2.0f*devBlurRadius + devLeft + 1;
789    texXs[3] = SkIntToScalar(widthHeight->fWidth);
790
791    texYs[0] = 0.0f;
792    texYs[1] = 2.0f*devBlurRadius + devTop;
793    texYs[2] = 2.0f*devBlurRadius + devTop + 1;
794    texYs[3] = SkIntToScalar(widthHeight->fHeight);
795
796    const SkRect newRect = SkRect::MakeXYWH(SkIntToScalar(devBlurRadius),
797                                            SkIntToScalar(devBlurRadius),
798                                            SkIntToScalar(newRRWidth),
799                                            SkIntToScalar(newRRHeight));
800    SkVector newRadii[4];
801    newRadii[0] = { SkScalarCeilToScalar(devRadiiUL.fX), SkScalarCeilToScalar(devRadiiUL.fY) };
802    newRadii[1] = { SkScalarCeilToScalar(devRadiiUR.fX), SkScalarCeilToScalar(devRadiiUR.fY) };
803    newRadii[2] = { SkScalarCeilToScalar(devRadiiLR.fX), SkScalarCeilToScalar(devRadiiLR.fY) };
804    newRadii[3] = { SkScalarCeilToScalar(devRadiiLL.fX), SkScalarCeilToScalar(devRadiiLL.fY) };
805
806    rrectToDraw->setRectRadii(newRect, newRadii);
807    return true;
808}
809
810// TODO: it seems like there should be some synergy with SkBlurMask::ComputeBlurProfile
811// TODO: maybe cache this on the cpu side?
812int CreateIntegralTable(float sixSigma, SkBitmap* table) {
813    // The texture we're producing represents the integral of a normal distribution over a
814    // six-sigma range centered at zero. We want enough resolution so that the linear
815    // interpolation done in texture lookup doesn't introduce noticeable artifacts. We
816    // conservatively choose to have 2 texels for each dst pixel.
817    int minWidth = 2 * sk_float_ceil2int(sixSigma);
818    // Bin by powers of 2 with a minimum so we get good profile reuse.
819    int width = std::max(SkNextPow2(minWidth), 32);
820
821    if (!table) {
822        return width;
823    }
824
825    if (!table->tryAllocPixels(SkImageInfo::MakeA8(width, 1))) {
826        return 0;
827    }
828    *table->getAddr8(0, 0) = 255;
829    const float invWidth = 1.f / width;
830    for (int i = 1; i < width - 1; ++i) {
831        float x = (i + 0.5f) * invWidth;
832        x = (-6 * x + 3) * SK_ScalarRoot2Over2;
833        float integral = 0.5f * (std::erf(x) + 1.f);
834        *table->getAddr8(i, 0) = SkToU8(sk_float_round2int(255.f * integral));
835    }
836
837    *table->getAddr8(width - 1, 0) = 0;
838    table->setImmutable();
839    return table->width();
840}
841
842
843void Compute1DGaussianKernel(float* kernel, float sigma, int radius) {
844    SkASSERT(radius == SigmaRadius(sigma));
845    if (SkGpuBlurUtils::IsEffectivelyZeroSigma(sigma)) {
846        // Calling SigmaRadius() produces 1, just computing ceil(sigma)*3 produces 3
847        SkASSERT(KernelWidth(radius) == 1);
848        std::fill_n(kernel, 1, 0.f);
849        kernel[0] = 1.f;
850        return;
851    }
852
853    // If this fails, kEffectivelyZeroSigma isn't big enough to prevent precision issues
854    SkASSERT(!SkScalarNearlyZero(2.f * sigma * sigma));
855
856    const float sigmaDenom = 1.0f / (2.f * sigma * sigma);
857    int size = KernelWidth(radius);
858    float sum = 0.0f;
859    for (int i = 0; i < size; ++i) {
860        float term = static_cast<float>(i - radius);
861        // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
862        // is dropped here, since we renormalize the kernel below.
863        kernel[i] = sk_float_exp(-term * term * sigmaDenom);
864        sum += kernel[i];
865    }
866    // Normalize the kernel
867    float scale = 1.0f / sum;
868    for (int i = 0; i < size; ++i) {
869        kernel[i] *= scale;
870    }
871}
872
873void Compute1DLinearGaussianKernel(float* kernel, float* offset, float sigma, int radius) {
874    // Given 2 adjacent gaussian points, they are blended as: Wi * Ci + Wj * Cj.
875    // The GPU will mix Ci and Cj as Ci * (1 - x) + Cj * x during sampling.
876    // Compute W', x such that W' * (Ci * (1 - x) + Cj * x) = Wi * Ci + Wj * Cj.
877    // Solving W' * x = Wj, W' * (1 - x) = Wi:
878    // W' = Wi + Wj
879    // x = Wj / (Wi + Wj)
880    auto get_new_weight = [](float* new_w, float* offset, float wi, float wj) {
881        *new_w = wi + wj;
882        *offset = wj / (wi + wj);
883    };
884
885    // Create a temporary standard kernel.
886    int size = KernelWidth(radius);
887    std::unique_ptr<float[]> temp_kernel(new float[size]);
888    Compute1DGaussianKernel(temp_kernel.get(), sigma, radius);
889
890    // Note that halfsize isn't just size / 2, but radius + 1. This is the size of the output array.
891    int halfsize = LinearKernelWidth(radius);
892    int halfradius = halfsize / 2;
893    int low_index = halfradius - 1;
894
895    // Compute1DGaussianKernel produces a full 2N + 1 kernel. Since the kernel can be mirrored,
896    // compute only the upper half and mirror to the lower half.
897
898    int index = radius;
899    if (radius & 1) {
900        // If N is odd, then use two samples.
901        // The centre texel gets sampled twice, so halve its influence for each sample.
902        // We essentially sample like this:
903        // Texel edges
904        // v    v    v    v
905        // |    |    |    |
906        // \-----^---/ Lower sample
907        //      \---^-----/ Upper sample
908        get_new_weight(&kernel[halfradius], &offset[halfradius],
909                       temp_kernel[index] * 0.5f, temp_kernel[index + 1]);
910        kernel[low_index] = kernel[halfradius];
911        offset[low_index] = -offset[halfradius];
912        index++;
913        low_index--;
914    } else {
915        // If N is even, then there are an even number of texels on either side of the centre texel.
916        // Sample the centre texel directly.
917        kernel[halfradius] = temp_kernel[index];
918        offset[halfradius] = 0.0f;
919    }
920    index++;
921
922    // Every other pair gets one sample.
923    for (int i = halfradius + 1; i < halfsize; index += 2, i++, low_index--) {
924        get_new_weight(&kernel[i], &offset[i], temp_kernel[index], temp_kernel[index + 1]);
925        offset[i] += static_cast<float>(index - radius);
926
927        // Mirror to lower half.
928        kernel[low_index] = kernel[i];
929        offset[low_index] = -offset[i];
930    }
931}
932
933}  // namespace SkGpuBlurUtils
934
935#endif
936