1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/gpu/gradients/GrGradientShader.h"
9
10#include "src/gpu/gradients/GrGradientBitmapCache.h"
11
12#include "include/gpu/GrRecordingContext.h"
13#include "src/core/SkMathPriv.h"
14#include "src/core/SkRuntimeEffectPriv.h"
15#include "src/gpu/GrCaps.h"
16#include "src/gpu/GrColor.h"
17#include "src/gpu/GrColorInfo.h"
18#include "src/gpu/GrRecordingContextPriv.h"
19#include "src/gpu/SkGr.h"
20#include "src/gpu/effects/GrMatrixEffect.h"
21#include "src/gpu/effects/GrSkSLFP.h"
22#include "src/gpu/effects/GrTextureEffect.h"
23
24using Vec4 = skvx::Vec<4, float>;
25
26// Intervals smaller than this (that aren't hard stops) on low-precision-only devices force us to
27// use the textured gradient
28static const SkScalar kLowPrecisionIntervalLimit = 0.01f;
29
30// Each cache entry costs 1K or 2K of RAM. Each bitmap will be 1x256 at either 32bpp or 64bpp.
31static const int kMaxNumCachedGradientBitmaps = 32;
32static const int kGradientTextureSize = 256;
33
34// NOTE: signature takes raw pointers to the color/pos arrays and a count to make it easy for
35// MakeColorizer to transparently take care of hard stops at the end points of the gradient.
36static std::unique_ptr<GrFragmentProcessor> make_textured_colorizer(const SkPMColor4f* colors,
37        const SkScalar* positions, int count, bool premul, const GrFPArgs& args) {
38    static GrGradientBitmapCache gCache(kMaxNumCachedGradientBitmaps, kGradientTextureSize);
39
40    // Use 8888 or F16, depending on the destination config.
41    // TODO: Use 1010102 for opaque gradients, at least if destination is 1010102?
42    SkColorType colorType = kRGBA_8888_SkColorType;
43    if (GrColorTypeIsWiderThan(args.fDstColorInfo->colorType(), 8)) {
44        auto f16Format = args.fContext->priv().caps()->getDefaultBackendFormat(
45                GrColorType::kRGBA_F16, GrRenderable::kNo);
46        if (f16Format.isValid()) {
47            colorType = kRGBA_F16_SkColorType;
48        }
49    }
50    SkAlphaType alphaType = premul ? kPremul_SkAlphaType : kUnpremul_SkAlphaType;
51
52    SkBitmap bitmap;
53    gCache.getGradient(colors, positions, count, colorType, alphaType, &bitmap);
54    SkASSERT(1 == bitmap.height() && SkIsPow2(bitmap.width()));
55    SkASSERT(bitmap.isImmutable());
56
57    auto view = std::get<0>(GrMakeCachedBitmapProxyView(args.fContext, bitmap, GrMipmapped::kNo));
58    if (!view) {
59        SkDebugf("Gradient won't draw. Could not create texture.");
60        return nullptr;
61    }
62
63    auto m = SkMatrix::Scale(view.width(), 1.f);
64    return GrTextureEffect::Make(std::move(view), alphaType, m, GrSamplerState::Filter::kLinear);
65}
66
67
68static std::unique_ptr<GrFragmentProcessor> make_single_interval_colorizer(const SkPMColor4f& start,
69                                                                           const SkPMColor4f& end) {
70    static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
71        uniform half4 start;
72        uniform half4 end;
73        half4 main(float2 coord) {
74            // Clamping and/or wrapping was already handled by the parent shader so the output
75            // color is a simple lerp.
76            return mix(start, end, half(coord.x));
77        }
78    )");
79    return GrSkSLFP::Make(effect, "SingleIntervalColorizer", /*inputFP=*/nullptr,
80                          GrSkSLFP::OptFlags::kNone,
81                          "start", start,
82                          "end", end);
83}
84
85static std::unique_ptr<GrFragmentProcessor> make_dual_interval_colorizer(const SkPMColor4f& c0,
86                                                                         const SkPMColor4f& c1,
87                                                                         const SkPMColor4f& c2,
88                                                                         const SkPMColor4f& c3,
89                                                                         float threshold) {
90    static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
91        uniform float4 scale[2];
92        uniform float4 bias[2];
93        uniform half threshold;
94
95        half4 main(float2 coord) {
96            half t = half(coord.x);
97
98            float4 s, b;
99            if (t < threshold) {
100                s = scale[0];
101                b = bias[0];
102            } else {
103                s = scale[1];
104                b = bias[1];
105            }
106
107            return half4(t * s + b);
108        }
109    )");
110
111    // Derive scale and biases from the 4 colors and threshold
112    Vec4 vc0 = Vec4::Load(c0.vec());
113    Vec4 vc1 = Vec4::Load(c1.vec());
114    Vec4 vc2 = Vec4::Load(c2.vec());
115    Vec4 vc3 = Vec4::Load(c3.vec());
116
117    const Vec4 scale[2] = {(vc1 - vc0) / threshold,
118                           (vc3 - vc2) / (1 - threshold)};
119    const Vec4 bias[2]  = {vc0,
120                           vc2 - threshold * scale[1]};
121    return GrSkSLFP::Make(effect, "DualIntervalColorizer", /*inputFP=*/nullptr,
122                          GrSkSLFP::OptFlags::kNone,
123                          "scale", SkMakeSpan(scale),
124                          "bias", SkMakeSpan(bias),
125                          "threshold", threshold);
126}
127
128// The "unrolled" colorizer contains hand-written nested ifs which perform a binary search.
129// This works on ES2 hardware that doesn't support non-constant array indexes.
130// However, to keep code size under control, we are limited to a small number of stops.
131static constexpr int kMaxUnrolledColorCount    = 16;
132static constexpr int kMaxUnrolledIntervalCount = kMaxUnrolledColorCount / 2;
133
134static std::unique_ptr<GrFragmentProcessor> make_unrolled_colorizer(int intervalCount,
135                                                                    const SkPMColor4f* scale,
136                                                                    const SkPMColor4f* bias,
137                                                                    SkRect thresholds1_7,
138                                                                    SkRect thresholds9_13) {
139    SkASSERT(intervalCount >= 1 && intervalCount <= 8);
140
141    static SkOnce                 once[kMaxUnrolledIntervalCount];
142    static sk_sp<SkRuntimeEffect> effects[kMaxUnrolledIntervalCount];
143
144    once[intervalCount - 1]([intervalCount] {
145        SkString sksl;
146
147        // The 7 threshold positions that define the boundaries of the 8 intervals (excluding t = 0,
148        // and t = 1) are packed into two half4s instead of having up to 7 separate scalar uniforms.
149        // For low interval counts, the extra components are ignored in the shader, but the uniform
150        // simplification is worth it. It is assumed thresholds are provided in increasing value,
151        // mapped as:
152        //  - thresholds1_7.x = boundary between (0,1) and (2,3) -> 1_2
153        //  -              .y = boundary between (2,3) and (4,5) -> 3_4
154        //  -              .z = boundary between (4,5) and (6,7) -> 5_6
155        //  -              .w = boundary between (6,7) and (8,9) -> 7_8
156        //  - thresholds9_13.x = boundary between (8,9) and (10,11) -> 9_10
157        //  -               .y = boundary between (10,11) and (12,13) -> 11_12
158        //  -               .z = boundary between (12,13) and (14,15) -> 13_14
159        //  -               .w = unused
160        sksl.append("uniform half4 thresholds1_7, thresholds9_13;");
161
162        // With the current hardstop detection threshold of 0.00024, the maximum scale and bias
163        // values will be on the order of 4k (since they divide by dt). That is well outside the
164        // precision capabilities of half floats, which can lead to inaccurate gradient calculations
165        sksl.appendf("uniform float4 scale[%d];", intervalCount);
166        sksl.appendf("uniform float4 bias[%d];", intervalCount);
167
168        // Explicit binary search for the proper interval that t falls within. The interval
169        // count checks are constant expressions, which are then optimized to the minimal number
170        // of branches for the specific interval count.
171        sksl.appendf(R"(
172        half4 main(float2 coord) {
173            half t = half(coord.x);
174            float4 s, b;
175            // thresholds1_7.w is mid point for intervals (0,7) and (8,15)
176            if (%d <= 4 || t < thresholds1_7.w) {
177                // thresholds1_7.y is mid point for intervals (0,3) and (4,7)
178                if (%d <= 2 || t < thresholds1_7.y) {
179                    // thresholds1_7.x is mid point for intervals (0,1) and (2,3)
180                    if (%d <= 1 || t < thresholds1_7.x) {
181                        %s s = scale[0]; b = bias[0];
182                    } else {
183                        %s s = scale[1]; b = bias[1];
184                    }
185                } else {
186                    // thresholds1_7.z is mid point for intervals (4,5) and (6,7)
187                    if (%d <= 3 || t < thresholds1_7.z) {
188                        %s s = scale[2]; b = bias[2];
189                    } else {
190                        %s s = scale[3]; b = bias[3];
191                    }
192                }
193            } else {
194                // thresholds9_13.y is mid point for intervals (8,11) and (12,15)
195                if (%d <= 6 || t < thresholds9_13.y) {
196                    // thresholds9_13.x is mid point for intervals (8,9) and (10,11)
197                    if (%d <= 5 || t < thresholds9_13.x) {
198                        %s s = scale[4]; b = bias[4];
199                    } else {
200                        %s s = scale[5]; b = bias[5];
201                    }
202                } else {
203                    // thresholds9_13.z is mid point for intervals (12,13) and (14,15)
204                    if (%d <= 7 || t < thresholds9_13.z) {
205                        %s s = scale[6]; b = bias[6];
206                    } else {
207                        %s s = scale[7]; b = bias[7];
208                    }
209                }
210            }
211            return t * s + b;
212        }
213        )", intervalCount,
214              intervalCount,
215                intervalCount,
216                  (intervalCount <= 0) ? "//" : "",
217                  (intervalCount <= 1) ? "//" : "",
218                intervalCount,
219                  (intervalCount <= 2) ? "//" : "",
220                  (intervalCount <= 3) ? "//" : "",
221              intervalCount,
222                intervalCount,
223                  (intervalCount <= 4) ? "//" : "",
224                  (intervalCount <= 5) ? "//" : "",
225                intervalCount,
226                  (intervalCount <= 6) ? "//" : "",
227                  (intervalCount <= 7) ? "//" : "");
228
229        auto result = SkRuntimeEffect::MakeForShader(std::move(sksl));
230        SkASSERTF(result.effect, "%s", result.errorText.c_str());
231        effects[intervalCount - 1] = std::move(result.effect);
232    });
233
234    return GrSkSLFP::Make(effects[intervalCount - 1], "UnrolledBinaryColorizer",
235                          /*inputFP=*/nullptr, GrSkSLFP::OptFlags::kNone,
236                          "thresholds1_7", thresholds1_7,
237                          "thresholds9_13", thresholds9_13,
238                          "scale", SkMakeSpan(scale, intervalCount),
239                          "bias", SkMakeSpan(bias, intervalCount));
240}
241
242// The "looping" colorizer uses a real loop to binary-search the array of gradient stops.
243static constexpr int kMaxLoopingColorCount    = 128;
244static constexpr int kMaxLoopingIntervalCount = kMaxLoopingColorCount / 2;
245
246static std::unique_ptr<GrFragmentProcessor> make_looping_colorizer(int intervalCount,
247                                                                   const SkPMColor4f* scale,
248                                                                   const SkPMColor4f* bias,
249                                                                   const SkScalar* thresholds) {
250    SkASSERT(intervalCount >= 1 && intervalCount <= kMaxLoopingIntervalCount);
251    SkASSERT((intervalCount & 3) == 0);  // intervals are required to come in groups of four
252    int intervalChunks = intervalCount / 4;
253    int cacheIndex = (size_t)intervalChunks - 1;
254
255    struct EffectCacheEntry {
256        SkOnce once;
257        sk_sp<SkRuntimeEffect> effect;
258    };
259
260    static EffectCacheEntry effectCache[kMaxLoopingIntervalCount / 4];
261    SkASSERT(cacheIndex >= 0 && cacheIndex < (int)SK_ARRAY_COUNT(effectCache));
262    EffectCacheEntry* cacheEntry = &effectCache[cacheIndex];
263
264    cacheEntry->once([intervalCount, intervalChunks, cacheEntry] {
265        SkString sksl;
266
267        // Binary search for the interval that `t` falls within. We can precalculate the number of
268        // loop iterations we need, and we know `t` will always be in range, so we can just loop a
269        // fixed number of times and can be guaranteed to have found the proper element.
270        //
271        // Threshold values are stored in half4s to keep them compact, so the last two rounds of
272        // binary search are hand-unrolled to allow them to use swizzles.
273        //
274        // Note that this colorizer is also designed to handle the case of exactly 4 intervals (a
275        // single chunk). In this case, the binary search for-loop will optimize away entirely, as
276        // it can be proven to execute zero times. We also optimize away the calculation of `4 *
277        // chunk` near the end via an @if statement, as the result will always be in chunk 0.
278        int loopCount = SkNextLog2(intervalChunks);
279        sksl.appendf(R"(
280        uniform half4 thresholds[%d];
281        uniform float4 scale[%d];
282        uniform float4 bias[%d];
283
284        half4 main(float2 coord) {
285            half t = half(coord.x);
286
287            // Choose a chunk from thresholds via binary search in a loop.
288            int low = 0;
289            int high = %d;
290            int chunk = %d;
291            for (int loop = 0; loop < %d; ++loop) {
292                if (t < thresholds[chunk].w) {
293                    high = chunk;
294                } else {
295                    low = chunk + 1;
296                }
297                chunk = (low + high) / 2;
298            }
299
300            // Choose the final position via explicit 4-way binary search.
301            int pos;
302            if (t < thresholds[chunk].y) {
303                pos = (t < thresholds[chunk].x) ? 0 : 1;
304            } else {
305                pos = (t < thresholds[chunk].z) ? 2 : 3;
306            }
307            @if (%d > 0) {
308                pos += 4 * chunk;
309            }
310            return t * scale[pos] + bias[pos];
311        }
312        )", /* thresholds: */ intervalChunks,
313            /* scale: */ intervalCount,
314            /* bias: */ intervalCount,
315            /* high: */ intervalChunks - 1,
316            /* chunk: */ (intervalChunks - 1) / 2,
317            /* loopCount: */ loopCount,
318            /* @if (loopCount > 0): */ loopCount);
319
320        auto result = SkRuntimeEffect::MakeForShader(std::move(sksl),
321                                                     SkRuntimeEffectPriv::ES3Options());
322        SkASSERTF(result.effect, "%s", result.errorText.c_str());
323        cacheEntry->effect = std::move(result.effect);
324    });
325
326    return GrSkSLFP::Make(cacheEntry->effect, "LoopingBinaryColorizer",
327                          /*inputFP=*/nullptr, GrSkSLFP::OptFlags::kNone,
328                          "thresholds", SkMakeSpan((const SkV4*)thresholds, intervalChunks),
329                          "scale", SkMakeSpan(scale, intervalCount),
330                          "bias", SkMakeSpan(bias, intervalCount));
331}
332
333// Converts an input array of {colors, positions} into an array of {scales, biases, thresholds}.
334// The length of the result array may differ from the input due to hard-stops or empty intervals.
335int build_intervals(int inputLength,
336                    const SkPMColor4f* inColors,
337                    const SkScalar* inPositions,
338                    int outputLength,
339                    SkPMColor4f* outScales,
340                    SkPMColor4f* outBiases,
341                    SkScalar* outThresholds) {
342    // Depending on how the positions resolve into hard stops or regular stops, the number of
343    // intervals specified by the number of colors/positions can change. For instance, a plain
344    // 3 color gradient is two intervals, but a 4 color gradient with a hard stop is also
345    // two intervals. At the most extreme end, an 8 interval gradient made entirely of hard
346    // stops has 16 colors.
347    int intervalCount = 0;
348    for (int i = 0; i < inputLength - 1; i++) {
349        if (intervalCount >= outputLength) {
350            // Already reached our output limit, and haven't run out of color stops. This gradient
351            // cannot be represented without more intervals.
352            return 0;
353        }
354
355        SkScalar t0 = inPositions[i];
356        SkScalar t1 = inPositions[i + 1];
357        SkScalar dt = t1 - t0;
358        // If the interval is empty, skip to the next interval. This will automatically create
359        // distinct hard stop intervals as needed. It also protects against malformed gradients
360        // that have repeated hard stops at the very beginning that are effectively unreachable.
361        if (SkScalarNearlyZero(dt)) {
362            continue;
363        }
364
365        Vec4 c0 = Vec4::Load(inColors[i].vec());
366        Vec4 c1 = Vec4::Load(inColors[i + 1].vec());
367        Vec4 scale = (c1 - c0) / dt;
368        Vec4 bias = c0 - t0 * scale;
369
370        scale.store(outScales + intervalCount);
371        bias.store(outBiases + intervalCount);
372        outThresholds[intervalCount] = t1;
373        intervalCount++;
374    }
375    return intervalCount;
376}
377
378static std::unique_ptr<GrFragmentProcessor> make_unrolled_binary_colorizer(
379        const SkPMColor4f* colors, const SkScalar* positions, int count) {
380    if (count > kMaxUnrolledColorCount) {
381        // Definitely cannot represent this gradient configuration
382        return nullptr;
383    }
384
385    SkPMColor4f scales[kMaxUnrolledIntervalCount];
386    SkPMColor4f biases[kMaxUnrolledIntervalCount];
387    SkScalar thresholds[kMaxUnrolledIntervalCount] = {};
388    int intervalCount = build_intervals(count, colors, positions,
389                                        kMaxUnrolledIntervalCount, scales, biases, thresholds);
390    if (intervalCount <= 0) {
391        return nullptr;
392    }
393
394    SkRect thresholds1_7  = {thresholds[0], thresholds[1], thresholds[2], thresholds[3]},
395           thresholds9_13 = {thresholds[4], thresholds[5], thresholds[6], 0.0};
396
397    return make_unrolled_colorizer(intervalCount, scales, biases, thresholds1_7, thresholds9_13);
398}
399
400static std::unique_ptr<GrFragmentProcessor> make_looping_binary_colorizer(const SkPMColor4f* colors,
401                                                                          const SkScalar* positions,
402                                                                          int count) {
403    if (count > kMaxLoopingColorCount) {
404        // Definitely cannot represent this gradient configuration
405        return nullptr;
406    }
407
408    SkPMColor4f scales[kMaxLoopingIntervalCount];
409    SkPMColor4f biases[kMaxLoopingIntervalCount];
410    SkScalar thresholds[kMaxLoopingIntervalCount] = {};
411    int intervalCount = build_intervals(count, colors, positions,
412                                        kMaxLoopingIntervalCount, scales, biases, thresholds);
413    if (intervalCount <= 0) {
414        return nullptr;
415    }
416
417    // We round up the number of intervals to the next power of two. This reduces the number of
418    // unique shaders and doesn't require any additional GPU processing power, but this does waste a
419    // handful of uniforms.
420    int roundedSize = std::max(4, SkNextPow2(intervalCount));
421    SkASSERT(roundedSize <= kMaxLoopingIntervalCount);
422    for (; intervalCount < roundedSize; ++intervalCount) {
423        thresholds[intervalCount] = thresholds[intervalCount - 1];
424        scales[intervalCount] = scales[intervalCount - 1];
425        biases[intervalCount] = biases[intervalCount - 1];
426    }
427
428    return make_looping_colorizer(intervalCount, scales, biases, thresholds);
429}
430
431// Analyze the shader's color stops and positions and chooses an appropriate colorizer to represent
432// the gradient.
433static std::unique_ptr<GrFragmentProcessor> make_colorizer(const SkPMColor4f* colors,
434                                                           const SkScalar* positions,
435                                                           int count,
436                                                           bool premul,
437                                                           const GrFPArgs& args) {
438    // If there are hard stops at the beginning or end, the first and/or last color should be
439    // ignored by the colorizer since it should only be used in a clamped border color. By detecting
440    // and removing these stops at the beginning, it makes optimizing the remaining color stops
441    // simpler.
442
443    // SkGradientShaderBase guarantees that pos[0] == 0 by adding a default value.
444    bool bottomHardStop = SkScalarNearlyEqual(positions[0], positions[1]);
445    // The same is true for pos[end] == 1
446    bool topHardStop = SkScalarNearlyEqual(positions[count - 2], positions[count - 1]);
447
448    if (bottomHardStop) {
449        colors++;
450        positions++;
451        count--;
452    }
453    if (topHardStop) {
454        count--;
455    }
456
457    // Two remaining colors means a single interval from 0 to 1
458    // (but it may have originally been a 3 or 4 color gradient with 1-2 hard stops at the ends)
459    if (count == 2) {
460        return make_single_interval_colorizer(colors[0], colors[1]);
461    }
462
463    const GrShaderCaps* caps = args.fContext->priv().caps()->shaderCaps();
464    auto intervalsExceedPrecisionLimit = [&]() -> bool {
465        // The remaining analytic colorizers use scale*t+bias, and the scale/bias values can become
466        // quite large when thresholds are close (but still outside the hardstop limit). If float
467        // isn't 32-bit, output can be incorrect if the thresholds are too close together. However,
468        // the analytic shaders are higher quality, so they can be used with lower precision
469        // hardware when the thresholds are not ill-conditioned.
470        if (!caps->floatIs32Bits()) {
471            // Could run into problems. Check if thresholds are close together (with a limit of .01,
472            // so that scales will be less than 100, which leaves 4 decimals of precision on
473            // 16-bit).
474            for (int i = 0; i < count - 1; i++) {
475                SkScalar dt = SkScalarAbs(positions[i] - positions[i + 1]);
476                if (dt <= kLowPrecisionIntervalLimit && dt > SK_ScalarNearlyZero) {
477                    return true;
478                }
479            }
480        }
481        return false;
482    };
483
484    auto makeDualIntervalColorizer = [&]() -> std::unique_ptr<GrFragmentProcessor> {
485        // The dual-interval colorizer uses the same principles as the binary-search colorizer, but
486        // is limited to exactly 2 intervals.
487        if (count == 3) {
488            // Must be a dual interval gradient, where the middle point is at 1 and the
489            // two intervals share the middle color stop.
490            return make_dual_interval_colorizer(colors[0], colors[1],
491                                                colors[1], colors[2],
492                                                positions[1]);
493        }
494        if (count == 4 && SkScalarNearlyEqual(positions[1], positions[2])) {
495            // Two separate intervals that join at the same threshold position
496            return make_dual_interval_colorizer(colors[0], colors[1],
497                                                colors[2], colors[3],
498                                                positions[1]);
499        }
500        // The gradient can't be represented in only two intervals.
501        return nullptr;
502    };
503
504    int binaryColorizerLimit = caps->nonconstantArrayIndexSupport() ? kMaxLoopingColorCount
505                                                                    : kMaxUnrolledColorCount;
506    if ((count <= binaryColorizerLimit) && !intervalsExceedPrecisionLimit()) {
507        // The dual-interval colorizer uses the same principles as the binary-search colorizer, but
508        // is limited to exactly 2 intervals.
509        std::unique_ptr<GrFragmentProcessor> colorizer = makeDualIntervalColorizer();
510        if (colorizer) {
511            return colorizer;
512        }
513        // Attempt to create an analytic colorizer that uses a binary-search loop.
514        colorizer = caps->nonconstantArrayIndexSupport()
515                            ? make_looping_binary_colorizer(colors, positions, count)
516                            : make_unrolled_binary_colorizer(colors, positions, count);
517        if (colorizer) {
518            return colorizer;
519        }
520    }
521
522    // Otherwise fall back to a rasterized gradient sampled by a texture, which can handle
523    // arbitrary gradients. (This has limited sampling resolution, and always blurs hard-stops.)
524    return make_textured_colorizer(colors, positions, count, premul, args);
525}
526
527// This top-level effect implements clamping on the layout coordinate and requires specifying the
528// border colors that are used when outside the clamped boundary. Gradients with the
529// SkTileMode::kClamp should use the colors at their first and last stop (after adding default stops
530// for t=0,t=1) as the border color. This will automatically replicate the edge color, even when
531// there is a hard stop.
532//
533// The SkTileMode::kDecal can be produced by specifying transparent black as the border colors,
534// regardless of the gradient's stop colors.
535static std::unique_ptr<GrFragmentProcessor> make_clamped_gradient(
536        std::unique_ptr<GrFragmentProcessor> colorizer,
537        std::unique_ptr<GrFragmentProcessor> gradLayout,
538        SkPMColor4f leftBorderColor,
539        SkPMColor4f rightBorderColor,
540        bool makePremul,
541        bool colorsAreOpaque) {
542    static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
543        uniform shader colorizer;
544        uniform shader gradLayout;
545
546        uniform half4 leftBorderColor;  // t < 0.0
547        uniform half4 rightBorderColor; // t > 1.0
548
549        uniform int makePremul;              // specialized
550        uniform int layoutPreservesOpacity;  // specialized
551
552        half4 main(float2 coord) {
553            half4 t = gradLayout.eval(coord);
554            half4 outColor;
555
556            // If t.x is below 0, use the left border color without invoking the child processor.
557            // If any t.x is above 1, use the right border color. Otherwise, t is in the [0, 1]
558            // range assumed by the colorizer FP, so delegate to the child processor.
559            if (!bool(layoutPreservesOpacity) && t.y < 0) {
560                // layout has rejected this fragment (rely on sksl to remove this branch if the
561                // layout FP preserves opacity is false)
562                outColor = half4(0);
563            } else if (t.x < 0) {
564                outColor = leftBorderColor;
565            } else if (t.x > 1.0) {
566                outColor = rightBorderColor;
567            } else {
568                // Always sample from (x, 0), discarding y, since the layout FP can use y as a
569                // side-channel.
570                outColor = colorizer.eval(t.x0);
571            }
572            if (bool(makePremul)) {
573                outColor.rgb *= outColor.a;
574            }
575            return outColor;
576        }
577    )");
578
579    // If the layout does not preserve opacity, remove the opaque optimization,
580    // but otherwise respect the provided color opacity state (which should take
581    // into account the opacity of the border colors).
582    bool layoutPreservesOpacity = gradLayout->preservesOpaqueInput();
583    GrSkSLFP::OptFlags optFlags = GrSkSLFP::OptFlags::kCompatibleWithCoverageAsAlpha;
584    if (colorsAreOpaque && layoutPreservesOpacity) {
585        optFlags |= GrSkSLFP::OptFlags::kPreservesOpaqueInput;
586    }
587
588    return GrSkSLFP::Make(effect, "ClampedGradient", /*inputFP=*/nullptr, optFlags,
589                          "colorizer", GrSkSLFP::IgnoreOptFlags(std::move(colorizer)),
590                          "gradLayout", GrSkSLFP::IgnoreOptFlags(std::move(gradLayout)),
591                          "leftBorderColor", leftBorderColor,
592                          "rightBorderColor", rightBorderColor,
593                          "makePremul", GrSkSLFP::Specialize<int>(makePremul),
594                          "layoutPreservesOpacity",
595                              GrSkSLFP::Specialize<int>(layoutPreservesOpacity));
596}
597
598static std::unique_ptr<GrFragmentProcessor> make_tiled_gradient(
599        const GrFPArgs& args,
600        std::unique_ptr<GrFragmentProcessor> colorizer,
601        std::unique_ptr<GrFragmentProcessor> gradLayout,
602        bool mirror,
603        bool makePremul,
604        bool colorsAreOpaque) {
605    static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
606        uniform shader colorizer;
607        uniform shader gradLayout;
608
609        uniform int mirror;                  // specialized
610        uniform int makePremul;              // specialized
611        uniform int layoutPreservesOpacity;  // specialized
612        uniform int useFloorAbsWorkaround;   // specialized
613
614        half4 main(float2 coord) {
615            half4 t = gradLayout.eval(coord);
616
617            if (!bool(layoutPreservesOpacity) && t.y < 0) {
618                // layout has rejected this fragment (rely on sksl to remove this branch if the
619                // layout FP preserves opacity is false)
620                return half4(0);
621            } else {
622                if (bool(mirror)) {
623                    half t_1 = t.x - 1;
624                    half tiled_t = t_1 - 2 * floor(t_1 * 0.5) - 1;
625                    if (bool(useFloorAbsWorkaround)) {
626                        // At this point the expected value of tiled_t should between -1 and 1, so
627                        // this clamp has no effect other than to break up the floor and abs calls
628                        // and make sure the compiler doesn't merge them back together.
629                        tiled_t = clamp(tiled_t, -1, 1);
630                    }
631                    t.x = abs(tiled_t);
632                } else {
633                    // Simple repeat mode
634                    t.x = fract(t.x);
635                }
636
637                // Always sample from (x, 0), discarding y, since the layout FP can use y as a
638                // side-channel.
639                half4 outColor = colorizer.eval(t.x0);
640                if (bool(makePremul)) {
641                    outColor.rgb *= outColor.a;
642                }
643                return outColor;
644            }
645        }
646    )");
647
648    // If the layout does not preserve opacity, remove the opaque optimization,
649    // but otherwise respect the provided color opacity state (which should take
650    // into account the opacity of the border colors).
651    bool layoutPreservesOpacity = gradLayout->preservesOpaqueInput();
652    GrSkSLFP::OptFlags optFlags = GrSkSLFP::OptFlags::kCompatibleWithCoverageAsAlpha;
653    if (colorsAreOpaque && layoutPreservesOpacity) {
654        optFlags |= GrSkSLFP::OptFlags::kPreservesOpaqueInput;
655    }
656    const bool useFloorAbsWorkaround =
657            args.fContext->priv().caps()->shaderCaps()->mustDoOpBetweenFloorAndAbs();
658
659    return GrSkSLFP::Make(effect, "TiledGradient", /*inputFP=*/nullptr, optFlags,
660                          "colorizer", GrSkSLFP::IgnoreOptFlags(std::move(colorizer)),
661                          "gradLayout", GrSkSLFP::IgnoreOptFlags(std::move(gradLayout)),
662                          "mirror", GrSkSLFP::Specialize<int>(mirror),
663                          "makePremul", GrSkSLFP::Specialize<int>(makePremul),
664                          "layoutPreservesOpacity",
665                                GrSkSLFP::Specialize<int>(layoutPreservesOpacity),
666                          "useFloorAbsWorkaround",
667                                GrSkSLFP::Specialize<int>(useFloorAbsWorkaround));
668}
669
670// Combines the colorizer and layout with an appropriately configured top-level effect based on the
671// gradient's tile mode
672static std::unique_ptr<GrFragmentProcessor> make_gradient(
673        const SkGradientShaderBase& shader,
674        const GrFPArgs& args,
675        std::unique_ptr<GrFragmentProcessor> layout,
676        const SkMatrix* overrideMatrix = nullptr) {
677    // No shader is possible if a layout couldn't be created, e.g. a layout-specific Make() returned
678    // null.
679    if (layout == nullptr) {
680        return nullptr;
681    }
682
683    // Wrap the layout in a matrix effect to apply the gradient's matrix:
684    SkMatrix matrix;
685    if (!shader.totalLocalMatrix(args.fPreLocalMatrix)->invert(&matrix)) {
686        return nullptr;
687    }
688    // Some two-point conical gradients use a custom matrix here
689    matrix.postConcat(overrideMatrix ? *overrideMatrix : shader.getGradientMatrix());
690    layout = GrMatrixEffect::Make(matrix, std::move(layout));
691
692    // Convert all colors into destination space and into SkPMColor4fs, and handle
693    // premul issues depending on the interpolation mode
694    bool inputPremul = shader.getGradFlags() & SkGradientShader::kInterpolateColorsInPremul_Flag;
695    bool allOpaque = true;
696    SkAutoSTMalloc<4, SkPMColor4f> colors(shader.fColorCount);
697    SkColor4fXformer xformedColors(shader.fOrigColors4f, shader.fColorCount,
698                                   shader.fColorSpace.get(), args.fDstColorInfo->colorSpace());
699    for (int i = 0; i < shader.fColorCount; i++) {
700        const SkColor4f& upmColor = xformedColors.fColors[i];
701        colors[i] = inputPremul ? upmColor.premul()
702                                : SkPMColor4f{ upmColor.fR, upmColor.fG, upmColor.fB, upmColor.fA };
703        if (allOpaque && !SkScalarNearlyEqual(colors[i].fA, 1.0)) {
704            allOpaque = false;
705        }
706    }
707
708    // SkGradientShader stores positions implicitly when they are evenly spaced, but the getPos()
709    // implementation performs a branch for every position index. Since the shader conversion
710    // requires lots of position tests, calculate all of the positions up front if needed.
711    SkTArray<SkScalar, true> implicitPos;
712    SkScalar* positions;
713    if (shader.fOrigPos) {
714        positions = shader.fOrigPos;
715    } else {
716        implicitPos.reserve_back(shader.fColorCount);
717        SkScalar posScale = SK_Scalar1 / (shader.fColorCount - 1);
718        for (int i = 0 ; i < shader.fColorCount; i++) {
719            implicitPos.push_back(SkIntToScalar(i) * posScale);
720        }
721        positions = implicitPos.begin();
722    }
723
724    // All gradients are colorized the same way, regardless of layout
725    std::unique_ptr<GrFragmentProcessor> colorizer = make_colorizer(
726            colors.get(), positions, shader.fColorCount, inputPremul, args);
727    if (colorizer == nullptr) {
728        return nullptr;
729    }
730
731    // The top-level effect has to export premul colors, but under certain conditions it doesn't
732    // need to do anything to achieve that: i.e. its interpolating already premul colors
733    // (inputPremul) or all the colors have a = 1, in which case premul is a no op. Note that this
734    // allOpaque check is more permissive than SkGradientShaderBase's isOpaque(), since we can
735    // optimize away the make-premul op for two point conical gradients (which report false for
736    // isOpaque).
737    bool makePremul = !inputPremul && !allOpaque;
738
739    // All tile modes are supported (unless something was added to SkShader)
740    std::unique_ptr<GrFragmentProcessor> gradient;
741    switch(shader.getTileMode()) {
742        case SkTileMode::kRepeat:
743            gradient = make_tiled_gradient(args, std::move(colorizer), std::move(layout),
744                                           /* mirror */ false, makePremul, allOpaque);
745            break;
746        case SkTileMode::kMirror:
747            gradient = make_tiled_gradient(args, std::move(colorizer), std::move(layout),
748                                           /* mirror */ true, makePremul, allOpaque);
749            break;
750        case SkTileMode::kClamp:
751            // For the clamped mode, the border colors are the first and last colors, corresponding
752            // to t=0 and t=1, because SkGradientShaderBase enforces that by adding color stops as
753            // appropriate. If there is a hard stop, this grabs the expected outer colors for the
754            // border.
755            gradient = make_clamped_gradient(std::move(colorizer), std::move(layout),
756                                             colors[0], colors[shader.fColorCount - 1],
757                                             makePremul, allOpaque);
758            break;
759        case SkTileMode::kDecal:
760            // Even if the gradient colors are opaque, the decal borders are transparent so
761            // disable that optimization
762            gradient = make_clamped_gradient(std::move(colorizer), std::move(layout),
763                                             SK_PMColor4fTRANSPARENT, SK_PMColor4fTRANSPARENT,
764                                             makePremul, /* colorsAreOpaque */ false);
765            break;
766    }
767
768    return gradient;
769}
770
771namespace GrGradientShader {
772
773std::unique_ptr<GrFragmentProcessor> MakeLinear(const SkLinearGradient& shader,
774                                                const GrFPArgs& args) {
775    // We add a tiny delta to t. When gradient stops are set up so that a hard stop in a vertically
776    // or horizontally oriented gradient falls exactly at a column or row of pixel centers we can
777    // get slightly different interpolated t values along the column/row. By adding the delta
778    // we will consistently get the color to the "right" of the stop. Of course if the hard stop
779    // falls at X.5 - delta then we still could get inconsistent results, but that is much less
780    // likely. crbug.com/938592
781    // If/when we add filtering of the gradient this can be removed.
782    static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
783        half4 main(float2 coord) {
784            return half4(half(coord.x) + 0.00001, 1, 0, 0); // y = 1 for always valid
785        }
786    )");
787    // The linear gradient never rejects a pixel so it doesn't change opacity
788    auto fp = GrSkSLFP::Make(effect, "LinearLayout", /*inputFP=*/nullptr,
789                             GrSkSLFP::OptFlags::kPreservesOpaqueInput);
790    return make_gradient(shader, args, std::move(fp));
791}
792
793std::unique_ptr<GrFragmentProcessor> MakeRadial(const SkRadialGradient& shader,
794                                                const GrFPArgs& args) {
795    static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
796        half4 main(float2 coord) {
797            return half4(half(length(coord)), 1, 0, 0); // y = 1 for always valid
798        }
799    )");
800    // The radial gradient never rejects a pixel so it doesn't change opacity
801    auto fp = GrSkSLFP::Make(effect, "RadialLayout", /*inputFP=*/nullptr,
802                             GrSkSLFP::OptFlags::kPreservesOpaqueInput);
803    return make_gradient(shader, args, std::move(fp));
804}
805
806std::unique_ptr<GrFragmentProcessor> MakeSweep(const SkSweepGradient& shader,
807                                               const GrFPArgs& args) {
808    // On some devices they incorrectly implement atan2(y,x) as atan(y/x). In actuality it is
809    // atan2(y,x) = 2 * atan(y / (sqrt(x^2 + y^2) + x)). So to work around this we pass in (sqrt(x^2
810    // + y^2) + x) as the second parameter to atan2 in these cases. We let the device handle the
811    // undefined behavior of the second paramenter being 0 instead of doing the divide ourselves and
812    // using atan instead.
813    int useAtanWorkaround =
814            args.fContext->priv().caps()->shaderCaps()->atan2ImplementedAsAtanYOverX();
815    static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
816        uniform half bias;
817        uniform half scale;
818        uniform int useAtanWorkaround;  // specialized
819
820        half4 main(float2 coord) {
821            half angle = bool(useAtanWorkaround)
822                    ? half(2 * atan(-coord.y, length(coord) - coord.x))
823                    : half(atan(-coord.y, -coord.x));
824
825            // 0.1591549430918 is 1/(2*pi), used since atan returns values [-pi, pi]
826            half t = (angle * 0.1591549430918 + 0.5 + bias) * scale;
827            return half4(t, 1, 0, 0); // y = 1 for always valid
828        }
829    )");
830    // The sweep gradient never rejects a pixel so it doesn't change opacity
831    auto fp = GrSkSLFP::Make(effect, "SweepLayout", /*inputFP=*/nullptr,
832                             GrSkSLFP::OptFlags::kPreservesOpaqueInput,
833                             "bias", shader.getTBias(),
834                             "scale", shader.getTScale(),
835                             "useAtanWorkaround", GrSkSLFP::Specialize(useAtanWorkaround));
836    return make_gradient(shader, args, std::move(fp));
837}
838
839std::unique_ptr<GrFragmentProcessor> MakeConical(const SkTwoPointConicalGradient& shader,
840                                                 const GrFPArgs& args) {
841    // The 2 point conical gradient can reject a pixel so it does change opacity even if the input
842    // was opaque. Thus, all of these layout FPs disable that optimization.
843    std::unique_ptr<GrFragmentProcessor> fp;
844    SkTLazy<SkMatrix> matrix;
845    switch (shader.getType()) {
846        case SkTwoPointConicalGradient::Type::kStrip: {
847            static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
848                uniform half r0_2;
849                half4 main(float2 p) {
850                    half v = 1; // validation flag, set to negative to discard fragment later
851                    float t = r0_2 - p.y * p.y;
852                    if (t >= 0) {
853                        t = p.x + sqrt(t);
854                    } else {
855                        v = -1;
856                    }
857                    return half4(half(t), v, 0, 0);
858                }
859            )");
860            float r0 = shader.getStartRadius() / shader.getCenterX1();
861            fp = GrSkSLFP::Make(effect, "TwoPointConicalStripLayout", /*inputFP=*/nullptr,
862                                GrSkSLFP::OptFlags::kNone,
863                                "r0_2", r0 * r0);
864        } break;
865
866        case SkTwoPointConicalGradient::Type::kRadial: {
867            static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
868                uniform half r0;
869                uniform half lengthScale;
870                half4 main(float2 p) {
871                    half v = 1; // validation flag, set to negative to discard fragment later
872                    float t = length(p) * lengthScale - r0;
873                    return half4(half(t), v, 0, 0);
874                }
875            )");
876            float dr = shader.getDiffRadius();
877            float r0 = shader.getStartRadius() / dr;
878            bool isRadiusIncreasing = dr >= 0;
879            fp = GrSkSLFP::Make(effect, "TwoPointConicalRadialLayout", /*inputFP=*/nullptr,
880                                GrSkSLFP::OptFlags::kNone,
881                                "r0", r0,
882                                "lengthScale", isRadiusIncreasing ? 1.0f : -1.0f);
883
884            // GPU radial matrix is different from the original matrix, since we map the diff radius
885            // to have |dr| = 1, so manually compute the final gradient matrix here.
886
887            // Map center to (0, 0)
888            matrix.set(SkMatrix::Translate(-shader.getStartCenter().fX,
889                                           -shader.getStartCenter().fY));
890            // scale |diffRadius| to 1
891            matrix->postScale(1 / dr, 1 / dr);
892        } break;
893
894        case SkTwoPointConicalGradient::Type::kFocal: {
895            static auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
896                // Optimization flags, all specialized:
897                uniform int isRadiusIncreasing;
898                uniform int isFocalOnCircle;
899                uniform int isWellBehaved;
900                uniform int isSwapped;
901                uniform int isNativelyFocal;
902
903                uniform half invR1;  // 1/r1
904                uniform half fx;     // focalX = r0/(r0-r1)
905
906                half4 main(float2 p) {
907                    float t = -1;
908                    half v = 1; // validation flag, set to negative to discard fragment later
909
910                    float x_t = -1;
911                    if (bool(isFocalOnCircle)) {
912                        x_t = dot(p, p) / p.x;
913                    } else if (bool(isWellBehaved)) {
914                        x_t = length(p) - p.x * invR1;
915                    } else {
916                        float temp = p.x * p.x - p.y * p.y;
917
918                        // Only do sqrt if temp >= 0; this is significantly slower than checking
919                        // temp >= 0 in the if statement that checks r(t) >= 0. But GPU may break if
920                        // we sqrt a negative float. (Although I havevn't observed that on any
921                        // devices so far, and the old approach also does sqrt negative value
922                        // without a check.) If the performance is really critical, maybe we should
923                        // just compute the area where temp and x_t are always valid and drop all
924                        // these ifs.
925                        if (temp >= 0) {
926                            if (bool(isSwapped) || !bool(isRadiusIncreasing)) {
927                                x_t = -sqrt(temp) - p.x * invR1;
928                            } else {
929                                x_t = sqrt(temp) - p.x * invR1;
930                            }
931                        }
932                    }
933
934                    // The final calculation of t from x_t has lots of static optimizations but only
935                    // do them when x_t is positive (which can be assumed true if isWellBehaved is
936                    // true)
937                    if (!bool(isWellBehaved)) {
938                        // This will still calculate t even though it will be ignored later in the
939                        // pipeline to avoid a branch
940                        if (x_t <= 0.0) {
941                            v = -1;
942                        }
943                    }
944                    if (bool(isRadiusIncreasing)) {
945                        if (bool(isNativelyFocal)) {
946                            t = x_t;
947                        } else {
948                            t = x_t + fx;
949                        }
950                    } else {
951                        if (bool(isNativelyFocal)) {
952                            t = -x_t;
953                        } else {
954                            t = -x_t + fx;
955                        }
956                    }
957
958                    if (bool(isSwapped)) {
959                        t = 1 - t;
960                    }
961
962                    return half4(half(t), v, 0, 0);
963                }
964            )");
965
966            const SkTwoPointConicalGradient::FocalData& focalData = shader.getFocalData();
967            bool isRadiusIncreasing = (1 - focalData.fFocalX) > 0,
968                 isFocalOnCircle    = focalData.isFocalOnCircle(),
969                 isWellBehaved      = focalData.isWellBehaved(),
970                 isSwapped          = focalData.isSwapped(),
971                 isNativelyFocal    = focalData.isNativelyFocal();
972
973            fp = GrSkSLFP::Make(effect, "TwoPointConicalFocalLayout", /*inputFP=*/nullptr,
974                                GrSkSLFP::OptFlags::kNone,
975                                "isRadiusIncreasing", GrSkSLFP::Specialize<int>(isRadiusIncreasing),
976                                "isFocalOnCircle",    GrSkSLFP::Specialize<int>(isFocalOnCircle),
977                                "isWellBehaved",      GrSkSLFP::Specialize<int>(isWellBehaved),
978                                "isSwapped",          GrSkSLFP::Specialize<int>(isSwapped),
979                                "isNativelyFocal",    GrSkSLFP::Specialize<int>(isNativelyFocal),
980                                "invR1", 1.0f / focalData.fR1,
981                                "fx", focalData.fFocalX);
982        } break;
983    }
984    return make_gradient(shader, args, std::move(fp), matrix.getMaybeNull());
985}
986
987#if GR_TEST_UTILS
988RandomParams::RandomParams(SkRandom* random) {
989    // Set color count to min of 2 so that we don't trigger the const color optimization and make
990    // a non-gradient processor.
991    fColorCount = random->nextRangeU(2, kMaxRandomGradientColors);
992    fUseColors4f = random->nextBool();
993
994    // if one color, omit stops, otherwise randomly decide whether or not to
995    if (fColorCount == 1 || (fColorCount >= 2 && random->nextBool())) {
996        fStops = nullptr;
997    } else {
998        fStops = fStopStorage;
999    }
1000
1001    // if using SkColor4f, attach a random (possibly null) color space (with linear gamma)
1002    if (fUseColors4f) {
1003        fColorSpace = GrTest::TestColorSpace(random);
1004    }
1005
1006    SkScalar stop = 0.f;
1007    for (int i = 0; i < fColorCount; ++i) {
1008        if (fUseColors4f) {
1009            fColors4f[i].fR = random->nextUScalar1();
1010            fColors4f[i].fG = random->nextUScalar1();
1011            fColors4f[i].fB = random->nextUScalar1();
1012            fColors4f[i].fA = random->nextUScalar1();
1013        } else {
1014            fColors[i] = random->nextU();
1015        }
1016        if (fStops) {
1017            fStops[i] = stop;
1018            stop = i < fColorCount - 1 ? stop + random->nextUScalar1() * (1.f - stop) : 1.f;
1019        }
1020    }
1021    fTileMode = static_cast<SkTileMode>(random->nextULessThan(kSkTileModeCount));
1022}
1023#endif
1024
1025}  // namespace GrGradientShader
1026