1cb93a386Sopenharmony_ci/*
2cb93a386Sopenharmony_ci * Copyright 2006 The Android Open Source Project
3cb93a386Sopenharmony_ci *
4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be
5cb93a386Sopenharmony_ci * found in the LICENSE file.
6cb93a386Sopenharmony_ci */
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci#include "include/core/SkShader.h"
9cb93a386Sopenharmony_ci#include "include/private/SkColorData.h"
10cb93a386Sopenharmony_ci#include "include/private/SkVx.h"
11cb93a386Sopenharmony_ci#include "src/core/SkCoreBlitters.h"
12cb93a386Sopenharmony_ci#include "src/core/SkOpts.h"
13cb93a386Sopenharmony_ci#include "src/core/SkXfermodePriv.h"
14cb93a386Sopenharmony_ci
15cb93a386Sopenharmony_cistatic inline int upscale_31_to_32(int value) {
16cb93a386Sopenharmony_ci    SkASSERT((unsigned)value <= 31);
17cb93a386Sopenharmony_ci    return value + (value >> 4);
18cb93a386Sopenharmony_ci}
19cb93a386Sopenharmony_ci
20cb93a386Sopenharmony_cistatic inline int blend_32(int src, int dst, int scale) {
21cb93a386Sopenharmony_ci    SkASSERT((unsigned)src <= 0xFF);
22cb93a386Sopenharmony_ci    SkASSERT((unsigned)dst <= 0xFF);
23cb93a386Sopenharmony_ci    SkASSERT((unsigned)scale <= 32);
24cb93a386Sopenharmony_ci    return dst + ((src - dst) * scale >> 5);
25cb93a386Sopenharmony_ci}
26cb93a386Sopenharmony_ci
27cb93a386Sopenharmony_cistatic inline SkPMColor blend_lcd16(int srcA, int srcR, int srcG, int srcB,
28cb93a386Sopenharmony_ci                                     SkPMColor dst, uint16_t mask) {
29cb93a386Sopenharmony_ci    if (mask == 0) {
30cb93a386Sopenharmony_ci        return dst;
31cb93a386Sopenharmony_ci    }
32cb93a386Sopenharmony_ci
33cb93a386Sopenharmony_ci    /*  We want all of these in 5bits, hence the shifts in case one of them
34cb93a386Sopenharmony_ci     *  (green) is 6bits.
35cb93a386Sopenharmony_ci     */
36cb93a386Sopenharmony_ci    int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
37cb93a386Sopenharmony_ci    int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
38cb93a386Sopenharmony_ci    int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
39cb93a386Sopenharmony_ci
40cb93a386Sopenharmony_ci    // Now upscale them to 0..32, so we can use blend32
41cb93a386Sopenharmony_ci    maskR = upscale_31_to_32(maskR);
42cb93a386Sopenharmony_ci    maskG = upscale_31_to_32(maskG);
43cb93a386Sopenharmony_ci    maskB = upscale_31_to_32(maskB);
44cb93a386Sopenharmony_ci
45cb93a386Sopenharmony_ci    // srcA has been upscaled to 256 before passed into this function
46cb93a386Sopenharmony_ci    maskR = maskR * srcA >> 8;
47cb93a386Sopenharmony_ci    maskG = maskG * srcA >> 8;
48cb93a386Sopenharmony_ci    maskB = maskB * srcA >> 8;
49cb93a386Sopenharmony_ci
50cb93a386Sopenharmony_ci    int dstR = SkGetPackedR32(dst);
51cb93a386Sopenharmony_ci    int dstG = SkGetPackedG32(dst);
52cb93a386Sopenharmony_ci    int dstB = SkGetPackedB32(dst);
53cb93a386Sopenharmony_ci
54cb93a386Sopenharmony_ci    // LCD blitting is only supported if the dst is known/required
55cb93a386Sopenharmony_ci    // to be opaque
56cb93a386Sopenharmony_ci    return SkPackARGB32(0xFF,
57cb93a386Sopenharmony_ci                        blend_32(srcR, dstR, maskR),
58cb93a386Sopenharmony_ci                        blend_32(srcG, dstG, maskG),
59cb93a386Sopenharmony_ci                        blend_32(srcB, dstB, maskB));
60cb93a386Sopenharmony_ci}
61cb93a386Sopenharmony_ci
62cb93a386Sopenharmony_cistatic inline SkPMColor blend_lcd16_opaque(int srcR, int srcG, int srcB,
63cb93a386Sopenharmony_ci                                           SkPMColor dst, uint16_t mask,
64cb93a386Sopenharmony_ci                                           SkPMColor opaqueDst) {
65cb93a386Sopenharmony_ci    if (mask == 0) {
66cb93a386Sopenharmony_ci        return dst;
67cb93a386Sopenharmony_ci    }
68cb93a386Sopenharmony_ci
69cb93a386Sopenharmony_ci    if (0xFFFF == mask) {
70cb93a386Sopenharmony_ci        return opaqueDst;
71cb93a386Sopenharmony_ci    }
72cb93a386Sopenharmony_ci
73cb93a386Sopenharmony_ci    /*  We want all of these in 5bits, hence the shifts in case one of them
74cb93a386Sopenharmony_ci     *  (green) is 6bits.
75cb93a386Sopenharmony_ci     */
76cb93a386Sopenharmony_ci    int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
77cb93a386Sopenharmony_ci    int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
78cb93a386Sopenharmony_ci    int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
79cb93a386Sopenharmony_ci
80cb93a386Sopenharmony_ci    // Now upscale them to 0..32, so we can use blend32
81cb93a386Sopenharmony_ci    maskR = upscale_31_to_32(maskR);
82cb93a386Sopenharmony_ci    maskG = upscale_31_to_32(maskG);
83cb93a386Sopenharmony_ci    maskB = upscale_31_to_32(maskB);
84cb93a386Sopenharmony_ci
85cb93a386Sopenharmony_ci    int dstR = SkGetPackedR32(dst);
86cb93a386Sopenharmony_ci    int dstG = SkGetPackedG32(dst);
87cb93a386Sopenharmony_ci    int dstB = SkGetPackedB32(dst);
88cb93a386Sopenharmony_ci
89cb93a386Sopenharmony_ci    // LCD blitting is only supported if the dst is known/required
90cb93a386Sopenharmony_ci    // to be opaque
91cb93a386Sopenharmony_ci    return SkPackARGB32(0xFF,
92cb93a386Sopenharmony_ci                        blend_32(srcR, dstR, maskR),
93cb93a386Sopenharmony_ci                        blend_32(srcG, dstG, maskG),
94cb93a386Sopenharmony_ci                        blend_32(srcB, dstB, maskB));
95cb93a386Sopenharmony_ci}
96cb93a386Sopenharmony_ci
97cb93a386Sopenharmony_ci
98cb93a386Sopenharmony_ci// TODO: rewrite at least the SSE code here.  It's miserable.
99cb93a386Sopenharmony_ci
100cb93a386Sopenharmony_ci#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
101cb93a386Sopenharmony_ci    #include <emmintrin.h>
102cb93a386Sopenharmony_ci
103cb93a386Sopenharmony_ci    // The following (left) shifts cause the top 5 bits of the mask components to
104cb93a386Sopenharmony_ci    // line up with the corresponding components in an SkPMColor.
105cb93a386Sopenharmony_ci    // Note that the mask's RGB16 order may differ from the SkPMColor order.
106cb93a386Sopenharmony_ci    #define SK_R16x5_R32x5_SHIFT (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5)
107cb93a386Sopenharmony_ci    #define SK_G16x5_G32x5_SHIFT (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5)
108cb93a386Sopenharmony_ci    #define SK_B16x5_B32x5_SHIFT (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5)
109cb93a386Sopenharmony_ci
110cb93a386Sopenharmony_ci    #if SK_R16x5_R32x5_SHIFT == 0
111cb93a386Sopenharmony_ci        #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (x)
112cb93a386Sopenharmony_ci    #elif SK_R16x5_R32x5_SHIFT > 0
113cb93a386Sopenharmony_ci        #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT))
114cb93a386Sopenharmony_ci    #else
115cb93a386Sopenharmony_ci        #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_srli_epi32(x, -SK_R16x5_R32x5_SHIFT))
116cb93a386Sopenharmony_ci    #endif
117cb93a386Sopenharmony_ci
118cb93a386Sopenharmony_ci    #if SK_G16x5_G32x5_SHIFT == 0
119cb93a386Sopenharmony_ci        #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (x)
120cb93a386Sopenharmony_ci    #elif SK_G16x5_G32x5_SHIFT > 0
121cb93a386Sopenharmony_ci        #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT))
122cb93a386Sopenharmony_ci    #else
123cb93a386Sopenharmony_ci        #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_srli_epi32(x, -SK_G16x5_G32x5_SHIFT))
124cb93a386Sopenharmony_ci    #endif
125cb93a386Sopenharmony_ci
126cb93a386Sopenharmony_ci    #if SK_B16x5_B32x5_SHIFT == 0
127cb93a386Sopenharmony_ci        #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (x)
128cb93a386Sopenharmony_ci    #elif SK_B16x5_B32x5_SHIFT > 0
129cb93a386Sopenharmony_ci        #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT))
130cb93a386Sopenharmony_ci    #else
131cb93a386Sopenharmony_ci        #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_srli_epi32(x, -SK_B16x5_B32x5_SHIFT))
132cb93a386Sopenharmony_ci    #endif
133cb93a386Sopenharmony_ci
134cb93a386Sopenharmony_ci    static __m128i blend_lcd16_sse2(__m128i &src, __m128i &dst, __m128i &mask, __m128i &srcA) {
135cb93a386Sopenharmony_ci        // In the following comments, the components of src, dst and mask are
136cb93a386Sopenharmony_ci        // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked
137cb93a386Sopenharmony_ci        // by an R, G, B, or A suffix. Components of one of the four pixels that
138cb93a386Sopenharmony_ci        // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for
139cb93a386Sopenharmony_ci        // example is the blue channel of the second destination pixel. Memory
140cb93a386Sopenharmony_ci        // layout is shown for an ARGB byte order in a color value.
141cb93a386Sopenharmony_ci
142cb93a386Sopenharmony_ci        // src and srcA store 8-bit values interleaved with zeros.
143cb93a386Sopenharmony_ci        // src  = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
144cb93a386Sopenharmony_ci        // srcA = (srcA, 0, srcA, 0, srcA, 0, srcA, 0,
145cb93a386Sopenharmony_ci        //         srcA, 0, srcA, 0, srcA, 0, srcA, 0)
146cb93a386Sopenharmony_ci        // mask stores 16-bit values (compressed three channels) interleaved with zeros.
147cb93a386Sopenharmony_ci        // Lo and Hi denote the low and high bytes of a 16-bit value, respectively.
148cb93a386Sopenharmony_ci        // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
149cb93a386Sopenharmony_ci        //         m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
150cb93a386Sopenharmony_ci
151cb93a386Sopenharmony_ci        // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
152cb93a386Sopenharmony_ci        // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0)
153cb93a386Sopenharmony_ci        __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
154cb93a386Sopenharmony_ci                                  _mm_set1_epi32(0x1F << SK_R32_SHIFT));
155cb93a386Sopenharmony_ci
156cb93a386Sopenharmony_ci        // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0)
157cb93a386Sopenharmony_ci        __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
158cb93a386Sopenharmony_ci                                  _mm_set1_epi32(0x1F << SK_G32_SHIFT));
159cb93a386Sopenharmony_ci
160cb93a386Sopenharmony_ci        // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B)
161cb93a386Sopenharmony_ci        __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
162cb93a386Sopenharmony_ci                                  _mm_set1_epi32(0x1F << SK_B32_SHIFT));
163cb93a386Sopenharmony_ci
164cb93a386Sopenharmony_ci        // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
165cb93a386Sopenharmony_ci        // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an
166cb93a386Sopenharmony_ci        // 8-bit position
167cb93a386Sopenharmony_ci        // mask = (0, m0R, m0G, m0B, 0, m1R, m1G, m1B,
168cb93a386Sopenharmony_ci        //         0, m2R, m2G, m2B, 0, m3R, m3G, m3B)
169cb93a386Sopenharmony_ci        mask = _mm_or_si128(_mm_or_si128(r, g), b);
170cb93a386Sopenharmony_ci
171cb93a386Sopenharmony_ci        // Interleave R,G,B into the lower byte of word.
172cb93a386Sopenharmony_ci        // i.e. split the sixteen 8-bit values from mask into two sets of eight
173cb93a386Sopenharmony_ci        // 16-bit values, padded by zero.
174cb93a386Sopenharmony_ci        __m128i maskLo, maskHi;
175cb93a386Sopenharmony_ci        // maskLo = (0, 0, m0R, 0, m0G, 0, m0B, 0, 0, 0, m1R, 0, m1G, 0, m1B, 0)
176cb93a386Sopenharmony_ci        maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
177cb93a386Sopenharmony_ci        // maskHi = (0, 0, m2R, 0, m2G, 0, m2B, 0, 0, 0, m3R, 0, m3G, 0, m3B, 0)
178cb93a386Sopenharmony_ci        maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
179cb93a386Sopenharmony_ci
180cb93a386Sopenharmony_ci        // Upscale from 0..31 to 0..32
181cb93a386Sopenharmony_ci        // (allows to replace division by left-shift further down)
182cb93a386Sopenharmony_ci        // Left-shift each component by 4 and add the result back to that component,
183cb93a386Sopenharmony_ci        // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32
184cb93a386Sopenharmony_ci        maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
185cb93a386Sopenharmony_ci        maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
186cb93a386Sopenharmony_ci
187cb93a386Sopenharmony_ci        // Multiply each component of maskLo and maskHi by srcA
188cb93a386Sopenharmony_ci        maskLo = _mm_mullo_epi16(maskLo, srcA);
189cb93a386Sopenharmony_ci        maskHi = _mm_mullo_epi16(maskHi, srcA);
190cb93a386Sopenharmony_ci
191cb93a386Sopenharmony_ci        // Left shift mask components by 8 (divide by 256)
192cb93a386Sopenharmony_ci        maskLo = _mm_srli_epi16(maskLo, 8);
193cb93a386Sopenharmony_ci        maskHi = _mm_srli_epi16(maskHi, 8);
194cb93a386Sopenharmony_ci
195cb93a386Sopenharmony_ci        // Interleave R,G,B into the lower byte of the word
196cb93a386Sopenharmony_ci        // dstLo = (0, 0, d0R, 0, d0G, 0, d0B, 0, 0, 0, d1R, 0, d1G, 0, d1B, 0)
197cb93a386Sopenharmony_ci        __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
198cb93a386Sopenharmony_ci        // dstLo = (0, 0, d2R, 0, d2G, 0, d2B, 0, 0, 0, d3R, 0, d3G, 0, d3B, 0)
199cb93a386Sopenharmony_ci        __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
200cb93a386Sopenharmony_ci
201cb93a386Sopenharmony_ci        // mask = (src - dst) * mask
202cb93a386Sopenharmony_ci        maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo));
203cb93a386Sopenharmony_ci        maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi));
204cb93a386Sopenharmony_ci
205cb93a386Sopenharmony_ci        // mask = (src - dst) * mask >> 5
206cb93a386Sopenharmony_ci        maskLo = _mm_srai_epi16(maskLo, 5);
207cb93a386Sopenharmony_ci        maskHi = _mm_srai_epi16(maskHi, 5);
208cb93a386Sopenharmony_ci
209cb93a386Sopenharmony_ci        // Add two pixels into result.
210cb93a386Sopenharmony_ci        // result = dst + ((src - dst) * mask >> 5)
211cb93a386Sopenharmony_ci        __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
212cb93a386Sopenharmony_ci        __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
213cb93a386Sopenharmony_ci
214cb93a386Sopenharmony_ci        // Pack into 4 32bit dst pixels.
215cb93a386Sopenharmony_ci        // resultLo and resultHi contain eight 16-bit components (two pixels) each.
216cb93a386Sopenharmony_ci        // Merge into one SSE regsiter with sixteen 8-bit values (four pixels),
217cb93a386Sopenharmony_ci        // clamping to 255 if necessary.
218cb93a386Sopenharmony_ci        return _mm_packus_epi16(resultLo, resultHi);
219cb93a386Sopenharmony_ci    }
220cb93a386Sopenharmony_ci
221cb93a386Sopenharmony_ci    static __m128i blend_lcd16_opaque_sse2(__m128i &src, __m128i &dst, __m128i &mask) {
222cb93a386Sopenharmony_ci        // In the following comments, the components of src, dst and mask are
223cb93a386Sopenharmony_ci        // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked
224cb93a386Sopenharmony_ci        // by an R, G, B, or A suffix. Components of one of the four pixels that
225cb93a386Sopenharmony_ci        // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for
226cb93a386Sopenharmony_ci        // example is the blue channel of the second destination pixel. Memory
227cb93a386Sopenharmony_ci        // layout is shown for an ARGB byte order in a color value.
228cb93a386Sopenharmony_ci
229cb93a386Sopenharmony_ci        // src and srcA store 8-bit values interleaved with zeros.
230cb93a386Sopenharmony_ci        // src  = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
231cb93a386Sopenharmony_ci        // mask stores 16-bit values (shown as high and low bytes) interleaved with
232cb93a386Sopenharmony_ci        // zeros
233cb93a386Sopenharmony_ci        // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
234cb93a386Sopenharmony_ci        //         m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
235cb93a386Sopenharmony_ci
236cb93a386Sopenharmony_ci        // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
237cb93a386Sopenharmony_ci        // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0)
238cb93a386Sopenharmony_ci        __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
239cb93a386Sopenharmony_ci                                  _mm_set1_epi32(0x1F << SK_R32_SHIFT));
240cb93a386Sopenharmony_ci
241cb93a386Sopenharmony_ci        // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0)
242cb93a386Sopenharmony_ci        __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
243cb93a386Sopenharmony_ci                                  _mm_set1_epi32(0x1F << SK_G32_SHIFT));
244cb93a386Sopenharmony_ci
245cb93a386Sopenharmony_ci        // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B)
246cb93a386Sopenharmony_ci        __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
247cb93a386Sopenharmony_ci                                  _mm_set1_epi32(0x1F << SK_B32_SHIFT));
248cb93a386Sopenharmony_ci
249cb93a386Sopenharmony_ci        // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
250cb93a386Sopenharmony_ci        // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an
251cb93a386Sopenharmony_ci        // 8-bit position
252cb93a386Sopenharmony_ci        // mask = (0, m0R, m0G, m0B, 0, m1R, m1G, m1B,
253cb93a386Sopenharmony_ci        //         0, m2R, m2G, m2B, 0, m3R, m3G, m3B)
254cb93a386Sopenharmony_ci        mask = _mm_or_si128(_mm_or_si128(r, g), b);
255cb93a386Sopenharmony_ci
256cb93a386Sopenharmony_ci        // Interleave R,G,B into the lower byte of word.
257cb93a386Sopenharmony_ci        // i.e. split the sixteen 8-bit values from mask into two sets of eight
258cb93a386Sopenharmony_ci        // 16-bit values, padded by zero.
259cb93a386Sopenharmony_ci        __m128i maskLo, maskHi;
260cb93a386Sopenharmony_ci        // maskLo = (0, 0, m0R, 0, m0G, 0, m0B, 0, 0, 0, m1R, 0, m1G, 0, m1B, 0)
261cb93a386Sopenharmony_ci        maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
262cb93a386Sopenharmony_ci        // maskHi = (0, 0, m2R, 0, m2G, 0, m2B, 0, 0, 0, m3R, 0, m3G, 0, m3B, 0)
263cb93a386Sopenharmony_ci        maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
264cb93a386Sopenharmony_ci
265cb93a386Sopenharmony_ci        // Upscale from 0..31 to 0..32
266cb93a386Sopenharmony_ci        // (allows to replace division by left-shift further down)
267cb93a386Sopenharmony_ci        // Left-shift each component by 4 and add the result back to that component,
268cb93a386Sopenharmony_ci        // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32
269cb93a386Sopenharmony_ci        maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
270cb93a386Sopenharmony_ci        maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
271cb93a386Sopenharmony_ci
272cb93a386Sopenharmony_ci        // Interleave R,G,B into the lower byte of the word
273cb93a386Sopenharmony_ci        // dstLo = (0, 0, d0R, 0, d0G, 0, d0B, 0, 0, 0, d1R, 0, d1G, 0, d1B, 0)
274cb93a386Sopenharmony_ci        __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
275cb93a386Sopenharmony_ci        // dstLo = (0, 0, d2R, 0, d2G, 0, d2B, 0, 0, 0, d3R, 0, d3G, 0, d3B, 0)
276cb93a386Sopenharmony_ci        __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
277cb93a386Sopenharmony_ci
278cb93a386Sopenharmony_ci        // mask = (src - dst) * mask
279cb93a386Sopenharmony_ci        maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo));
280cb93a386Sopenharmony_ci        maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi));
281cb93a386Sopenharmony_ci
282cb93a386Sopenharmony_ci        // mask = (src - dst) * mask >> 5
283cb93a386Sopenharmony_ci        maskLo = _mm_srai_epi16(maskLo, 5);
284cb93a386Sopenharmony_ci        maskHi = _mm_srai_epi16(maskHi, 5);
285cb93a386Sopenharmony_ci
286cb93a386Sopenharmony_ci        // Add two pixels into result.
287cb93a386Sopenharmony_ci        // result = dst + ((src - dst) * mask >> 5)
288cb93a386Sopenharmony_ci        __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
289cb93a386Sopenharmony_ci        __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
290cb93a386Sopenharmony_ci
291cb93a386Sopenharmony_ci        // Pack into 4 32bit dst pixels and force opaque.
292cb93a386Sopenharmony_ci        // resultLo and resultHi contain eight 16-bit components (two pixels) each.
293cb93a386Sopenharmony_ci        // Merge into one SSE regsiter with sixteen 8-bit values (four pixels),
294cb93a386Sopenharmony_ci        // clamping to 255 if necessary. Set alpha components to 0xFF.
295cb93a386Sopenharmony_ci        return _mm_or_si128(_mm_packus_epi16(resultLo, resultHi),
296cb93a386Sopenharmony_ci                            _mm_set1_epi32(SK_A32_MASK << SK_A32_SHIFT));
297cb93a386Sopenharmony_ci    }
298cb93a386Sopenharmony_ci
299cb93a386Sopenharmony_ci    void blit_row_lcd16(SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor) {
300cb93a386Sopenharmony_ci        if (width <= 0) {
301cb93a386Sopenharmony_ci            return;
302cb93a386Sopenharmony_ci        }
303cb93a386Sopenharmony_ci
304cb93a386Sopenharmony_ci        int srcA = SkColorGetA(src);
305cb93a386Sopenharmony_ci        int srcR = SkColorGetR(src);
306cb93a386Sopenharmony_ci        int srcG = SkColorGetG(src);
307cb93a386Sopenharmony_ci        int srcB = SkColorGetB(src);
308cb93a386Sopenharmony_ci
309cb93a386Sopenharmony_ci        srcA = SkAlpha255To256(srcA);
310cb93a386Sopenharmony_ci
311cb93a386Sopenharmony_ci        if (width >= 4) {
312cb93a386Sopenharmony_ci            SkASSERT(((size_t)dst & 0x03) == 0);
313cb93a386Sopenharmony_ci            while (((size_t)dst & 0x0F) != 0) {
314cb93a386Sopenharmony_ci                *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
315cb93a386Sopenharmony_ci                mask++;
316cb93a386Sopenharmony_ci                dst++;
317cb93a386Sopenharmony_ci                width--;
318cb93a386Sopenharmony_ci            }
319cb93a386Sopenharmony_ci
320cb93a386Sopenharmony_ci            __m128i *d = reinterpret_cast<__m128i*>(dst);
321cb93a386Sopenharmony_ci            // Set alpha to 0xFF and replicate source four times in SSE register.
322cb93a386Sopenharmony_ci            __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
323cb93a386Sopenharmony_ci            // Interleave with zeros to get two sets of four 16-bit values.
324cb93a386Sopenharmony_ci            src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128());
325cb93a386Sopenharmony_ci            // Set srcA_sse to contain eight copies of srcA, padded with zero.
326cb93a386Sopenharmony_ci            // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
327cb93a386Sopenharmony_ci            __m128i srcA_sse = _mm_set1_epi16(srcA);
328cb93a386Sopenharmony_ci            while (width >= 4) {
329cb93a386Sopenharmony_ci                // Load four destination pixels into dst_sse.
330cb93a386Sopenharmony_ci                __m128i dst_sse = _mm_load_si128(d);
331cb93a386Sopenharmony_ci                // Load four 16-bit masks into lower half of mask_sse.
332cb93a386Sopenharmony_ci                __m128i mask_sse = _mm_loadl_epi64(
333cb93a386Sopenharmony_ci                                       reinterpret_cast<const __m128i*>(mask));
334cb93a386Sopenharmony_ci
335cb93a386Sopenharmony_ci                // Check whether masks are equal to 0 and get the highest bit
336cb93a386Sopenharmony_ci                // of each byte of result, if masks are all zero, we will get
337cb93a386Sopenharmony_ci                // pack_cmp to 0xFFFF
338cb93a386Sopenharmony_ci                int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse,
339cb93a386Sopenharmony_ci                                                 _mm_setzero_si128()));
340cb93a386Sopenharmony_ci
341cb93a386Sopenharmony_ci                // if mask pixels are not all zero, we will blend the dst pixels
342cb93a386Sopenharmony_ci                if (pack_cmp != 0xFFFF) {
343cb93a386Sopenharmony_ci                    // Unpack 4 16bit mask pixels to
344cb93a386Sopenharmony_ci                    // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
345cb93a386Sopenharmony_ci                    //             m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
346cb93a386Sopenharmony_ci                    mask_sse = _mm_unpacklo_epi16(mask_sse,
347cb93a386Sopenharmony_ci                                                  _mm_setzero_si128());
348cb93a386Sopenharmony_ci
349cb93a386Sopenharmony_ci                    // Process 4 32bit dst pixels
350cb93a386Sopenharmony_ci                    __m128i result = blend_lcd16_sse2(src_sse, dst_sse, mask_sse, srcA_sse);
351cb93a386Sopenharmony_ci                    _mm_store_si128(d, result);
352cb93a386Sopenharmony_ci                }
353cb93a386Sopenharmony_ci
354cb93a386Sopenharmony_ci                d++;
355cb93a386Sopenharmony_ci                mask += 4;
356cb93a386Sopenharmony_ci                width -= 4;
357cb93a386Sopenharmony_ci            }
358cb93a386Sopenharmony_ci
359cb93a386Sopenharmony_ci            dst = reinterpret_cast<SkPMColor*>(d);
360cb93a386Sopenharmony_ci        }
361cb93a386Sopenharmony_ci
362cb93a386Sopenharmony_ci        while (width > 0) {
363cb93a386Sopenharmony_ci            *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
364cb93a386Sopenharmony_ci            mask++;
365cb93a386Sopenharmony_ci            dst++;
366cb93a386Sopenharmony_ci            width--;
367cb93a386Sopenharmony_ci        }
368cb93a386Sopenharmony_ci    }
369cb93a386Sopenharmony_ci
370cb93a386Sopenharmony_ci    void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t mask[],
371cb93a386Sopenharmony_ci                                   SkColor src, int width, SkPMColor opaqueDst) {
372cb93a386Sopenharmony_ci        if (width <= 0) {
373cb93a386Sopenharmony_ci            return;
374cb93a386Sopenharmony_ci        }
375cb93a386Sopenharmony_ci
376cb93a386Sopenharmony_ci        int srcR = SkColorGetR(src);
377cb93a386Sopenharmony_ci        int srcG = SkColorGetG(src);
378cb93a386Sopenharmony_ci        int srcB = SkColorGetB(src);
379cb93a386Sopenharmony_ci
380cb93a386Sopenharmony_ci        if (width >= 4) {
381cb93a386Sopenharmony_ci            SkASSERT(((size_t)dst & 0x03) == 0);
382cb93a386Sopenharmony_ci            while (((size_t)dst & 0x0F) != 0) {
383cb93a386Sopenharmony_ci                *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
384cb93a386Sopenharmony_ci                mask++;
385cb93a386Sopenharmony_ci                dst++;
386cb93a386Sopenharmony_ci                width--;
387cb93a386Sopenharmony_ci            }
388cb93a386Sopenharmony_ci
389cb93a386Sopenharmony_ci            __m128i *d = reinterpret_cast<__m128i*>(dst);
390cb93a386Sopenharmony_ci            // Set alpha to 0xFF and replicate source four times in SSE register.
391cb93a386Sopenharmony_ci            __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
392cb93a386Sopenharmony_ci            // Set srcA_sse to contain eight copies of srcA, padded with zero.
393cb93a386Sopenharmony_ci            // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0)
394cb93a386Sopenharmony_ci            src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128());
395cb93a386Sopenharmony_ci            while (width >= 4) {
396cb93a386Sopenharmony_ci                // Load four destination pixels into dst_sse.
397cb93a386Sopenharmony_ci                __m128i dst_sse = _mm_load_si128(d);
398cb93a386Sopenharmony_ci                // Load four 16-bit masks into lower half of mask_sse.
399cb93a386Sopenharmony_ci                __m128i mask_sse = _mm_loadl_epi64(
400cb93a386Sopenharmony_ci                                       reinterpret_cast<const __m128i*>(mask));
401cb93a386Sopenharmony_ci
402cb93a386Sopenharmony_ci                // Check whether masks are equal to 0 and get the highest bit
403cb93a386Sopenharmony_ci                // of each byte of result, if masks are all zero, we will get
404cb93a386Sopenharmony_ci                // pack_cmp to 0xFFFF
405cb93a386Sopenharmony_ci                int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse,
406cb93a386Sopenharmony_ci                                                 _mm_setzero_si128()));
407cb93a386Sopenharmony_ci
408cb93a386Sopenharmony_ci                // if mask pixels are not all zero, we will blend the dst pixels
409cb93a386Sopenharmony_ci                if (pack_cmp != 0xFFFF) {
410cb93a386Sopenharmony_ci                    // Unpack 4 16bit mask pixels to
411cb93a386Sopenharmony_ci                    // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0,
412cb93a386Sopenharmony_ci                    //             m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0)
413cb93a386Sopenharmony_ci                    mask_sse = _mm_unpacklo_epi16(mask_sse,
414cb93a386Sopenharmony_ci                                                  _mm_setzero_si128());
415cb93a386Sopenharmony_ci
416cb93a386Sopenharmony_ci                    // Process 4 32bit dst pixels
417cb93a386Sopenharmony_ci                    __m128i result = blend_lcd16_opaque_sse2(src_sse, dst_sse, mask_sse);
418cb93a386Sopenharmony_ci                    _mm_store_si128(d, result);
419cb93a386Sopenharmony_ci                }
420cb93a386Sopenharmony_ci
421cb93a386Sopenharmony_ci                d++;
422cb93a386Sopenharmony_ci                mask += 4;
423cb93a386Sopenharmony_ci                width -= 4;
424cb93a386Sopenharmony_ci            }
425cb93a386Sopenharmony_ci
426cb93a386Sopenharmony_ci            dst = reinterpret_cast<SkPMColor*>(d);
427cb93a386Sopenharmony_ci        }
428cb93a386Sopenharmony_ci
429cb93a386Sopenharmony_ci        while (width > 0) {
430cb93a386Sopenharmony_ci            *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
431cb93a386Sopenharmony_ci            mask++;
432cb93a386Sopenharmony_ci            dst++;
433cb93a386Sopenharmony_ci            width--;
434cb93a386Sopenharmony_ci        }
435cb93a386Sopenharmony_ci    }
436cb93a386Sopenharmony_ci
437cb93a386Sopenharmony_ci#elif defined(SK_ARM_HAS_NEON)
438cb93a386Sopenharmony_ci    #include <arm_neon.h>
439cb93a386Sopenharmony_ci
440cb93a386Sopenharmony_ci    #define NEON_A (SK_A32_SHIFT / 8)
441cb93a386Sopenharmony_ci    #define NEON_R (SK_R32_SHIFT / 8)
442cb93a386Sopenharmony_ci    #define NEON_G (SK_G32_SHIFT / 8)
443cb93a386Sopenharmony_ci    #define NEON_B (SK_B32_SHIFT / 8)
444cb93a386Sopenharmony_ci
445cb93a386Sopenharmony_ci    static inline uint8x8_t blend_32_neon(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) {
446cb93a386Sopenharmony_ci        int16x8_t src_wide, dst_wide;
447cb93a386Sopenharmony_ci
448cb93a386Sopenharmony_ci        src_wide = vreinterpretq_s16_u16(vmovl_u8(src));
449cb93a386Sopenharmony_ci        dst_wide = vreinterpretq_s16_u16(vmovl_u8(dst));
450cb93a386Sopenharmony_ci
451cb93a386Sopenharmony_ci        src_wide = (src_wide - dst_wide) * vreinterpretq_s16_u16(scale);
452cb93a386Sopenharmony_ci
453cb93a386Sopenharmony_ci        dst_wide += vshrq_n_s16(src_wide, 5);
454cb93a386Sopenharmony_ci
455cb93a386Sopenharmony_ci        return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
456cb93a386Sopenharmony_ci    }
457cb93a386Sopenharmony_ci
458cb93a386Sopenharmony_ci    void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t src[],
459cb93a386Sopenharmony_ci                               SkColor color, int width,
460cb93a386Sopenharmony_ci                               SkPMColor opaqueDst) {
461cb93a386Sopenharmony_ci        int colR = SkColorGetR(color);
462cb93a386Sopenharmony_ci        int colG = SkColorGetG(color);
463cb93a386Sopenharmony_ci        int colB = SkColorGetB(color);
464cb93a386Sopenharmony_ci
465cb93a386Sopenharmony_ci        uint8x8_t vcolR = vdup_n_u8(colR);
466cb93a386Sopenharmony_ci        uint8x8_t vcolG = vdup_n_u8(colG);
467cb93a386Sopenharmony_ci        uint8x8_t vcolB = vdup_n_u8(colB);
468cb93a386Sopenharmony_ci        uint8x8_t vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
469cb93a386Sopenharmony_ci        uint8x8_t vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
470cb93a386Sopenharmony_ci        uint8x8_t vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
471cb93a386Sopenharmony_ci        uint8x8_t vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
472cb93a386Sopenharmony_ci
473cb93a386Sopenharmony_ci        while (width >= 8) {
474cb93a386Sopenharmony_ci            uint8x8x4_t vdst;
475cb93a386Sopenharmony_ci            uint16x8_t vmask;
476cb93a386Sopenharmony_ci            uint16x8_t vmaskR, vmaskG, vmaskB;
477cb93a386Sopenharmony_ci            uint8x8_t vsel_trans, vsel_opq;
478cb93a386Sopenharmony_ci
479cb93a386Sopenharmony_ci            vdst = vld4_u8((uint8_t*)dst);
480cb93a386Sopenharmony_ci            vmask = vld1q_u16(src);
481cb93a386Sopenharmony_ci
482cb93a386Sopenharmony_ci            // Prepare compare masks
483cb93a386Sopenharmony_ci            vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
484cb93a386Sopenharmony_ci            vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
485cb93a386Sopenharmony_ci
486cb93a386Sopenharmony_ci            // Get all the color masks on 5 bits
487cb93a386Sopenharmony_ci            vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
488cb93a386Sopenharmony_ci            vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
489cb93a386Sopenharmony_ci                                 SK_B16_BITS + SK_R16_BITS + 1);
490cb93a386Sopenharmony_ci            vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
491cb93a386Sopenharmony_ci
492cb93a386Sopenharmony_ci            // Upscale to 0..32
493cb93a386Sopenharmony_ci            vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
494cb93a386Sopenharmony_ci            vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
495cb93a386Sopenharmony_ci            vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
496cb93a386Sopenharmony_ci
497cb93a386Sopenharmony_ci            vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF));
498cb93a386Sopenharmony_ci            vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
499cb93a386Sopenharmony_ci
500cb93a386Sopenharmony_ci            vdst.val[NEON_R] = blend_32_neon(vcolR, vdst.val[NEON_R], vmaskR);
501cb93a386Sopenharmony_ci            vdst.val[NEON_G] = blend_32_neon(vcolG, vdst.val[NEON_G], vmaskG);
502cb93a386Sopenharmony_ci            vdst.val[NEON_B] = blend_32_neon(vcolB, vdst.val[NEON_B], vmaskB);
503cb93a386Sopenharmony_ci
504cb93a386Sopenharmony_ci            vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
505cb93a386Sopenharmony_ci            vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
506cb93a386Sopenharmony_ci            vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
507cb93a386Sopenharmony_ci
508cb93a386Sopenharmony_ci            vst4_u8((uint8_t*)dst, vdst);
509cb93a386Sopenharmony_ci
510cb93a386Sopenharmony_ci            dst += 8;
511cb93a386Sopenharmony_ci            src += 8;
512cb93a386Sopenharmony_ci            width -= 8;
513cb93a386Sopenharmony_ci        }
514cb93a386Sopenharmony_ci
515cb93a386Sopenharmony_ci        // Leftovers
516cb93a386Sopenharmony_ci        for (int i = 0; i < width; i++) {
517cb93a386Sopenharmony_ci            dst[i] = blend_lcd16_opaque(colR, colG, colB, dst[i], src[i], opaqueDst);
518cb93a386Sopenharmony_ci        }
519cb93a386Sopenharmony_ci    }
520cb93a386Sopenharmony_ci
521cb93a386Sopenharmony_ci    void blit_row_lcd16(SkPMColor dst[], const uint16_t src[],
522cb93a386Sopenharmony_ci                        SkColor color, int width, SkPMColor) {
523cb93a386Sopenharmony_ci        int colA = SkColorGetA(color);
524cb93a386Sopenharmony_ci        int colR = SkColorGetR(color);
525cb93a386Sopenharmony_ci        int colG = SkColorGetG(color);
526cb93a386Sopenharmony_ci        int colB = SkColorGetB(color);
527cb93a386Sopenharmony_ci
528cb93a386Sopenharmony_ci        colA = SkAlpha255To256(colA);
529cb93a386Sopenharmony_ci
530cb93a386Sopenharmony_ci        uint16x8_t vcolA = vdupq_n_u16(colA);
531cb93a386Sopenharmony_ci        uint8x8_t vcolR = vdup_n_u8(colR);
532cb93a386Sopenharmony_ci        uint8x8_t vcolG = vdup_n_u8(colG);
533cb93a386Sopenharmony_ci        uint8x8_t vcolB = vdup_n_u8(colB);
534cb93a386Sopenharmony_ci
535cb93a386Sopenharmony_ci        while (width >= 8) {
536cb93a386Sopenharmony_ci            uint8x8x4_t vdst;
537cb93a386Sopenharmony_ci            uint16x8_t vmask;
538cb93a386Sopenharmony_ci            uint16x8_t vmaskR, vmaskG, vmaskB;
539cb93a386Sopenharmony_ci
540cb93a386Sopenharmony_ci            vdst = vld4_u8((uint8_t*)dst);
541cb93a386Sopenharmony_ci            vmask = vld1q_u16(src);
542cb93a386Sopenharmony_ci
543cb93a386Sopenharmony_ci            // Get all the color masks on 5 bits
544cb93a386Sopenharmony_ci            vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
545cb93a386Sopenharmony_ci            vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
546cb93a386Sopenharmony_ci                                 SK_B16_BITS + SK_R16_BITS + 1);
547cb93a386Sopenharmony_ci            vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
548cb93a386Sopenharmony_ci
549cb93a386Sopenharmony_ci            // Upscale to 0..32
550cb93a386Sopenharmony_ci            vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
551cb93a386Sopenharmony_ci            vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
552cb93a386Sopenharmony_ci            vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
553cb93a386Sopenharmony_ci
554cb93a386Sopenharmony_ci            vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
555cb93a386Sopenharmony_ci            vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
556cb93a386Sopenharmony_ci            vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
557cb93a386Sopenharmony_ci
558cb93a386Sopenharmony_ci            vdst.val[NEON_A] = vdup_n_u8(0xFF);
559cb93a386Sopenharmony_ci            vdst.val[NEON_R] = blend_32_neon(vcolR, vdst.val[NEON_R], vmaskR);
560cb93a386Sopenharmony_ci            vdst.val[NEON_G] = blend_32_neon(vcolG, vdst.val[NEON_G], vmaskG);
561cb93a386Sopenharmony_ci            vdst.val[NEON_B] = blend_32_neon(vcolB, vdst.val[NEON_B], vmaskB);
562cb93a386Sopenharmony_ci
563cb93a386Sopenharmony_ci            vst4_u8((uint8_t*)dst, vdst);
564cb93a386Sopenharmony_ci
565cb93a386Sopenharmony_ci            dst += 8;
566cb93a386Sopenharmony_ci            src += 8;
567cb93a386Sopenharmony_ci            width -= 8;
568cb93a386Sopenharmony_ci        }
569cb93a386Sopenharmony_ci
570cb93a386Sopenharmony_ci        for (int i = 0; i < width; i++) {
571cb93a386Sopenharmony_ci            dst[i] = blend_lcd16(colA, colR, colG, colB, dst[i], src[i]);
572cb93a386Sopenharmony_ci        }
573cb93a386Sopenharmony_ci    }
574cb93a386Sopenharmony_ci
575cb93a386Sopenharmony_ci#else
576cb93a386Sopenharmony_ci
577cb93a386Sopenharmony_ci    static inline void blit_row_lcd16(SkPMColor dst[], const uint16_t mask[],
578cb93a386Sopenharmony_ci                                      SkColor src, int width, SkPMColor) {
579cb93a386Sopenharmony_ci        int srcA = SkColorGetA(src);
580cb93a386Sopenharmony_ci        int srcR = SkColorGetR(src);
581cb93a386Sopenharmony_ci        int srcG = SkColorGetG(src);
582cb93a386Sopenharmony_ci        int srcB = SkColorGetB(src);
583cb93a386Sopenharmony_ci
584cb93a386Sopenharmony_ci        srcA = SkAlpha255To256(srcA);
585cb93a386Sopenharmony_ci
586cb93a386Sopenharmony_ci        for (int i = 0; i < width; i++) {
587cb93a386Sopenharmony_ci            dst[i] = blend_lcd16(srcA, srcR, srcG, srcB, dst[i], mask[i]);
588cb93a386Sopenharmony_ci        }
589cb93a386Sopenharmony_ci    }
590cb93a386Sopenharmony_ci
591cb93a386Sopenharmony_ci    static inline void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t mask[],
592cb93a386Sopenharmony_ci                                             SkColor src, int width,
593cb93a386Sopenharmony_ci                                             SkPMColor opaqueDst) {
594cb93a386Sopenharmony_ci        int srcR = SkColorGetR(src);
595cb93a386Sopenharmony_ci        int srcG = SkColorGetG(src);
596cb93a386Sopenharmony_ci        int srcB = SkColorGetB(src);
597cb93a386Sopenharmony_ci
598cb93a386Sopenharmony_ci        for (int i = 0; i < width; i++) {
599cb93a386Sopenharmony_ci            dst[i] = blend_lcd16_opaque(srcR, srcG, srcB, dst[i], mask[i], opaqueDst);
600cb93a386Sopenharmony_ci        }
601cb93a386Sopenharmony_ci    }
602cb93a386Sopenharmony_ci
603cb93a386Sopenharmony_ci#endif
604cb93a386Sopenharmony_ci
605cb93a386Sopenharmony_cistatic bool blit_color(const SkPixmap& device,
606cb93a386Sopenharmony_ci                       const SkMask& mask,
607cb93a386Sopenharmony_ci                       const SkIRect& clip,
608cb93a386Sopenharmony_ci                       SkColor color) {
609cb93a386Sopenharmony_ci    int x = clip.fLeft,
610cb93a386Sopenharmony_ci        y = clip.fTop;
611cb93a386Sopenharmony_ci
612cb93a386Sopenharmony_ci    if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kA8_Format) {
613cb93a386Sopenharmony_ci        SkOpts::blit_mask_d32_a8(device.writable_addr32(x,y), device.rowBytes(),
614cb93a386Sopenharmony_ci                                 (const SkAlpha*)mask.getAddr(x,y), mask.fRowBytes,
615cb93a386Sopenharmony_ci                                 color, clip.width(), clip.height());
616cb93a386Sopenharmony_ci        return true;
617cb93a386Sopenharmony_ci    }
618cb93a386Sopenharmony_ci
619cb93a386Sopenharmony_ci    if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kLCD16_Format) {
620cb93a386Sopenharmony_ci        auto dstRow  = device.writable_addr32(x,y);
621cb93a386Sopenharmony_ci        auto maskRow = (const uint16_t*)mask.getAddr(x,y);
622cb93a386Sopenharmony_ci
623cb93a386Sopenharmony_ci        auto blit_row = blit_row_lcd16;
624cb93a386Sopenharmony_ci        SkPMColor opaqueDst = 0;  // ignored unless opaque
625cb93a386Sopenharmony_ci
626cb93a386Sopenharmony_ci        if (0xff == SkColorGetA(color)) {
627cb93a386Sopenharmony_ci            blit_row  = blit_row_lcd16_opaque;
628cb93a386Sopenharmony_ci            opaqueDst = SkPreMultiplyColor(color);
629cb93a386Sopenharmony_ci        }
630cb93a386Sopenharmony_ci
631cb93a386Sopenharmony_ci        for (int height = clip.height(); height --> 0; ) {
632cb93a386Sopenharmony_ci            blit_row(dstRow, maskRow, color, clip.width(), opaqueDst);
633cb93a386Sopenharmony_ci
634cb93a386Sopenharmony_ci            dstRow  = (SkPMColor*)     ((      char*) dstRow + device.rowBytes());
635cb93a386Sopenharmony_ci            maskRow = (const uint16_t*)((const char*)maskRow +  mask.fRowBytes);
636cb93a386Sopenharmony_ci        }
637cb93a386Sopenharmony_ci        return true;
638cb93a386Sopenharmony_ci    }
639cb93a386Sopenharmony_ci
640cb93a386Sopenharmony_ci    return false;
641cb93a386Sopenharmony_ci}
642cb93a386Sopenharmony_ci
643cb93a386Sopenharmony_ci///////////////////////////////////////////////////////////////////////////////
644cb93a386Sopenharmony_ci
645cb93a386Sopenharmony_cistatic void SkARGB32_Blit32(const SkPixmap& device, const SkMask& mask,
646cb93a386Sopenharmony_ci                            const SkIRect& clip, SkPMColor srcColor) {
647cb93a386Sopenharmony_ci    U8CPU alpha = SkGetPackedA32(srcColor);
648cb93a386Sopenharmony_ci    unsigned flags = SkBlitRow::kSrcPixelAlpha_Flag32;
649cb93a386Sopenharmony_ci    if (alpha != 255) {
650cb93a386Sopenharmony_ci        flags |= SkBlitRow::kGlobalAlpha_Flag32;
651cb93a386Sopenharmony_ci    }
652cb93a386Sopenharmony_ci    SkBlitRow::Proc32 proc = SkBlitRow::Factory32(flags);
653cb93a386Sopenharmony_ci
654cb93a386Sopenharmony_ci    int x = clip.fLeft;
655cb93a386Sopenharmony_ci    int y = clip.fTop;
656cb93a386Sopenharmony_ci    int width = clip.width();
657cb93a386Sopenharmony_ci    int height = clip.height();
658cb93a386Sopenharmony_ci
659cb93a386Sopenharmony_ci    SkPMColor* dstRow = device.writable_addr32(x, y);
660cb93a386Sopenharmony_ci    const SkPMColor* srcRow = reinterpret_cast<const SkPMColor*>(mask.getAddr8(x, y));
661cb93a386Sopenharmony_ci
662cb93a386Sopenharmony_ci    do {
663cb93a386Sopenharmony_ci        proc(dstRow, srcRow, width, alpha);
664cb93a386Sopenharmony_ci        dstRow = (SkPMColor*)((char*)dstRow + device.rowBytes());
665cb93a386Sopenharmony_ci        srcRow = (const SkPMColor*)((const char*)srcRow + mask.fRowBytes);
666cb93a386Sopenharmony_ci    } while (--height != 0);
667cb93a386Sopenharmony_ci}
668cb93a386Sopenharmony_ci
669cb93a386Sopenharmony_ci//////////////////////////////////////////////////////////////////////////////////////
670cb93a386Sopenharmony_ci
671cb93a386Sopenharmony_ciSkARGB32_Blitter::SkARGB32_Blitter(const SkPixmap& device, const SkPaint& paint)
672cb93a386Sopenharmony_ci        : INHERITED(device) {
673cb93a386Sopenharmony_ci    SkColor color = paint.getColor();
674cb93a386Sopenharmony_ci    fColor = color;
675cb93a386Sopenharmony_ci
676cb93a386Sopenharmony_ci    fSrcA = SkColorGetA(color);
677cb93a386Sopenharmony_ci    unsigned scale = SkAlpha255To256(fSrcA);
678cb93a386Sopenharmony_ci    fSrcR = SkAlphaMul(SkColorGetR(color), scale);
679cb93a386Sopenharmony_ci    fSrcG = SkAlphaMul(SkColorGetG(color), scale);
680cb93a386Sopenharmony_ci    fSrcB = SkAlphaMul(SkColorGetB(color), scale);
681cb93a386Sopenharmony_ci
682cb93a386Sopenharmony_ci    fPMColor = SkPackARGB32(fSrcA, fSrcR, fSrcG, fSrcB);
683cb93a386Sopenharmony_ci}
684cb93a386Sopenharmony_ci
685cb93a386Sopenharmony_ciconst SkPixmap* SkARGB32_Blitter::justAnOpaqueColor(uint32_t* value) {
686cb93a386Sopenharmony_ci    if (255 == fSrcA) {
687cb93a386Sopenharmony_ci        *value = fPMColor;
688cb93a386Sopenharmony_ci        return &fDevice;
689cb93a386Sopenharmony_ci    }
690cb93a386Sopenharmony_ci    return nullptr;
691cb93a386Sopenharmony_ci}
692cb93a386Sopenharmony_ci
693cb93a386Sopenharmony_ci#if defined _WIN32  // disable warning : local variable used without having been initialized
694cb93a386Sopenharmony_ci#pragma warning ( push )
695cb93a386Sopenharmony_ci#pragma warning ( disable : 4701 )
696cb93a386Sopenharmony_ci#endif
697cb93a386Sopenharmony_ci
698cb93a386Sopenharmony_civoid SkARGB32_Blitter::blitH(int x, int y, int width) {
699cb93a386Sopenharmony_ci    SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width());
700cb93a386Sopenharmony_ci
701cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
702cb93a386Sopenharmony_ci    SkBlitRow::Color32(device, device, width, fPMColor);
703cb93a386Sopenharmony_ci}
704cb93a386Sopenharmony_ci
705cb93a386Sopenharmony_civoid SkARGB32_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
706cb93a386Sopenharmony_ci                                 const int16_t runs[]) {
707cb93a386Sopenharmony_ci    if (fSrcA == 0) {
708cb93a386Sopenharmony_ci        return;
709cb93a386Sopenharmony_ci    }
710cb93a386Sopenharmony_ci
711cb93a386Sopenharmony_ci    uint32_t    color = fPMColor;
712cb93a386Sopenharmony_ci    uint32_t*   device = fDevice.writable_addr32(x, y);
713cb93a386Sopenharmony_ci    unsigned    opaqueMask = fSrcA; // if fSrcA is 0xFF, then we will catch the fast opaque case
714cb93a386Sopenharmony_ci
715cb93a386Sopenharmony_ci    for (;;) {
716cb93a386Sopenharmony_ci        int count = runs[0];
717cb93a386Sopenharmony_ci        SkASSERT(count >= 0);
718cb93a386Sopenharmony_ci        if (count <= 0) {
719cb93a386Sopenharmony_ci            return;
720cb93a386Sopenharmony_ci        }
721cb93a386Sopenharmony_ci        unsigned aa = antialias[0];
722cb93a386Sopenharmony_ci        if (aa) {
723cb93a386Sopenharmony_ci            if ((opaqueMask & aa) == 255) {
724cb93a386Sopenharmony_ci                sk_memset32(device, color, count);
725cb93a386Sopenharmony_ci            } else {
726cb93a386Sopenharmony_ci                uint32_t sc = SkAlphaMulQ(color, SkAlpha255To256(aa));
727cb93a386Sopenharmony_ci                SkBlitRow::Color32(device, device, count, sc);
728cb93a386Sopenharmony_ci            }
729cb93a386Sopenharmony_ci        }
730cb93a386Sopenharmony_ci        runs += count;
731cb93a386Sopenharmony_ci        antialias += count;
732cb93a386Sopenharmony_ci        device += count;
733cb93a386Sopenharmony_ci    }
734cb93a386Sopenharmony_ci}
735cb93a386Sopenharmony_ci
736cb93a386Sopenharmony_civoid SkARGB32_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
737cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
738cb93a386Sopenharmony_ci    SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);)
739cb93a386Sopenharmony_ci
740cb93a386Sopenharmony_ci    device[0] = SkBlendARGB32(fPMColor, device[0], a0);
741cb93a386Sopenharmony_ci    device[1] = SkBlendARGB32(fPMColor, device[1], a1);
742cb93a386Sopenharmony_ci}
743cb93a386Sopenharmony_ci
744cb93a386Sopenharmony_civoid SkARGB32_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
745cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
746cb93a386Sopenharmony_ci    SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);)
747cb93a386Sopenharmony_ci
748cb93a386Sopenharmony_ci    device[0] = SkBlendARGB32(fPMColor, device[0], a0);
749cb93a386Sopenharmony_ci    device = (uint32_t*)((char*)device + fDevice.rowBytes());
750cb93a386Sopenharmony_ci    device[0] = SkBlendARGB32(fPMColor, device[0], a1);
751cb93a386Sopenharmony_ci}
752cb93a386Sopenharmony_ci
753cb93a386Sopenharmony_ci//////////////////////////////////////////////////////////////////////////////////////
754cb93a386Sopenharmony_ci
755cb93a386Sopenharmony_ci#define solid_8_pixels(mask, dst, color)    \
756cb93a386Sopenharmony_ci    do {                                    \
757cb93a386Sopenharmony_ci        if (mask & 0x80) dst[0] = color;    \
758cb93a386Sopenharmony_ci        if (mask & 0x40) dst[1] = color;    \
759cb93a386Sopenharmony_ci        if (mask & 0x20) dst[2] = color;    \
760cb93a386Sopenharmony_ci        if (mask & 0x10) dst[3] = color;    \
761cb93a386Sopenharmony_ci        if (mask & 0x08) dst[4] = color;    \
762cb93a386Sopenharmony_ci        if (mask & 0x04) dst[5] = color;    \
763cb93a386Sopenharmony_ci        if (mask & 0x02) dst[6] = color;    \
764cb93a386Sopenharmony_ci        if (mask & 0x01) dst[7] = color;    \
765cb93a386Sopenharmony_ci    } while (0)
766cb93a386Sopenharmony_ci
767cb93a386Sopenharmony_ci#define SK_BLITBWMASK_NAME                  SkARGB32_BlitBW
768cb93a386Sopenharmony_ci#define SK_BLITBWMASK_ARGS                  , SkPMColor color
769cb93a386Sopenharmony_ci#define SK_BLITBWMASK_BLIT8(mask, dst)      solid_8_pixels(mask, dst, color)
770cb93a386Sopenharmony_ci#define SK_BLITBWMASK_GETADDR               writable_addr32
771cb93a386Sopenharmony_ci#define SK_BLITBWMASK_DEVTYPE               uint32_t
772cb93a386Sopenharmony_ci#include "src/core/SkBlitBWMaskTemplate.h"
773cb93a386Sopenharmony_ci
774cb93a386Sopenharmony_ci#define blend_8_pixels(mask, dst, sc, dst_scale)                            \
775cb93a386Sopenharmony_ci    do {                                                                    \
776cb93a386Sopenharmony_ci        if (mask & 0x80) { dst[0] = sc + SkAlphaMulQ(dst[0], dst_scale); }  \
777cb93a386Sopenharmony_ci        if (mask & 0x40) { dst[1] = sc + SkAlphaMulQ(dst[1], dst_scale); }  \
778cb93a386Sopenharmony_ci        if (mask & 0x20) { dst[2] = sc + SkAlphaMulQ(dst[2], dst_scale); }  \
779cb93a386Sopenharmony_ci        if (mask & 0x10) { dst[3] = sc + SkAlphaMulQ(dst[3], dst_scale); }  \
780cb93a386Sopenharmony_ci        if (mask & 0x08) { dst[4] = sc + SkAlphaMulQ(dst[4], dst_scale); }  \
781cb93a386Sopenharmony_ci        if (mask & 0x04) { dst[5] = sc + SkAlphaMulQ(dst[5], dst_scale); }  \
782cb93a386Sopenharmony_ci        if (mask & 0x02) { dst[6] = sc + SkAlphaMulQ(dst[6], dst_scale); }  \
783cb93a386Sopenharmony_ci        if (mask & 0x01) { dst[7] = sc + SkAlphaMulQ(dst[7], dst_scale); }  \
784cb93a386Sopenharmony_ci    } while (0)
785cb93a386Sopenharmony_ci
786cb93a386Sopenharmony_ci#define SK_BLITBWMASK_NAME                  SkARGB32_BlendBW
787cb93a386Sopenharmony_ci#define SK_BLITBWMASK_ARGS                  , uint32_t sc, unsigned dst_scale
788cb93a386Sopenharmony_ci#define SK_BLITBWMASK_BLIT8(mask, dst)      blend_8_pixels(mask, dst, sc, dst_scale)
789cb93a386Sopenharmony_ci#define SK_BLITBWMASK_GETADDR               writable_addr32
790cb93a386Sopenharmony_ci#define SK_BLITBWMASK_DEVTYPE               uint32_t
791cb93a386Sopenharmony_ci#include "src/core/SkBlitBWMaskTemplate.h"
792cb93a386Sopenharmony_ci
793cb93a386Sopenharmony_civoid SkARGB32_Blitter::blitMask(const SkMask& mask, const SkIRect& clip) {
794cb93a386Sopenharmony_ci    SkASSERT(mask.fBounds.contains(clip));
795cb93a386Sopenharmony_ci    SkASSERT(fSrcA != 0xFF);
796cb93a386Sopenharmony_ci
797cb93a386Sopenharmony_ci    if (fSrcA == 0) {
798cb93a386Sopenharmony_ci        return;
799cb93a386Sopenharmony_ci    }
800cb93a386Sopenharmony_ci
801cb93a386Sopenharmony_ci    if (blit_color(fDevice, mask, clip, fColor)) {
802cb93a386Sopenharmony_ci        return;
803cb93a386Sopenharmony_ci    }
804cb93a386Sopenharmony_ci
805cb93a386Sopenharmony_ci    switch (mask.fFormat) {
806cb93a386Sopenharmony_ci        case SkMask::kBW_Format:
807cb93a386Sopenharmony_ci            SkARGB32_BlendBW(fDevice, mask, clip, fPMColor, SkAlpha255To256(255 - fSrcA));
808cb93a386Sopenharmony_ci            break;
809cb93a386Sopenharmony_ci        case SkMask::kARGB32_Format:
810cb93a386Sopenharmony_ci            SkARGB32_Blit32(fDevice, mask, clip, fPMColor);
811cb93a386Sopenharmony_ci            break;
812cb93a386Sopenharmony_ci        default:
813cb93a386Sopenharmony_ci            SK_ABORT("Mask format not handled.");
814cb93a386Sopenharmony_ci    }
815cb93a386Sopenharmony_ci}
816cb93a386Sopenharmony_ci
817cb93a386Sopenharmony_civoid SkARGB32_Opaque_Blitter::blitMask(const SkMask& mask,
818cb93a386Sopenharmony_ci                                       const SkIRect& clip) {
819cb93a386Sopenharmony_ci    SkASSERT(mask.fBounds.contains(clip));
820cb93a386Sopenharmony_ci
821cb93a386Sopenharmony_ci    if (blit_color(fDevice, mask, clip, fColor)) {
822cb93a386Sopenharmony_ci        return;
823cb93a386Sopenharmony_ci    }
824cb93a386Sopenharmony_ci
825cb93a386Sopenharmony_ci    switch (mask.fFormat) {
826cb93a386Sopenharmony_ci        case SkMask::kBW_Format:
827cb93a386Sopenharmony_ci            SkARGB32_BlitBW(fDevice, mask, clip, fPMColor);
828cb93a386Sopenharmony_ci            break;
829cb93a386Sopenharmony_ci        case SkMask::kARGB32_Format:
830cb93a386Sopenharmony_ci            SkARGB32_Blit32(fDevice, mask, clip, fPMColor);
831cb93a386Sopenharmony_ci            break;
832cb93a386Sopenharmony_ci        default:
833cb93a386Sopenharmony_ci            SK_ABORT("Mask format not handled.");
834cb93a386Sopenharmony_ci    }
835cb93a386Sopenharmony_ci}
836cb93a386Sopenharmony_ci
837cb93a386Sopenharmony_civoid SkARGB32_Opaque_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
838cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
839cb93a386Sopenharmony_ci    SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);)
840cb93a386Sopenharmony_ci
841cb93a386Sopenharmony_ci    device[0] = SkFastFourByteInterp(fPMColor, device[0], a0);
842cb93a386Sopenharmony_ci    device[1] = SkFastFourByteInterp(fPMColor, device[1], a1);
843cb93a386Sopenharmony_ci}
844cb93a386Sopenharmony_ci
845cb93a386Sopenharmony_civoid SkARGB32_Opaque_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
846cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
847cb93a386Sopenharmony_ci    SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);)
848cb93a386Sopenharmony_ci
849cb93a386Sopenharmony_ci    device[0] = SkFastFourByteInterp(fPMColor, device[0], a0);
850cb93a386Sopenharmony_ci    device = (uint32_t*)((char*)device + fDevice.rowBytes());
851cb93a386Sopenharmony_ci    device[0] = SkFastFourByteInterp(fPMColor, device[0], a1);
852cb93a386Sopenharmony_ci}
853cb93a386Sopenharmony_ci
854cb93a386Sopenharmony_ci///////////////////////////////////////////////////////////////////////////////
855cb93a386Sopenharmony_ci
856cb93a386Sopenharmony_civoid SkARGB32_Blitter::blitV(int x, int y, int height, SkAlpha alpha) {
857cb93a386Sopenharmony_ci    if (alpha == 0 || fSrcA == 0) {
858cb93a386Sopenharmony_ci        return;
859cb93a386Sopenharmony_ci    }
860cb93a386Sopenharmony_ci
861cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
862cb93a386Sopenharmony_ci    uint32_t  color = fPMColor;
863cb93a386Sopenharmony_ci
864cb93a386Sopenharmony_ci    if (alpha != 255) {
865cb93a386Sopenharmony_ci        color = SkAlphaMulQ(color, SkAlpha255To256(alpha));
866cb93a386Sopenharmony_ci    }
867cb93a386Sopenharmony_ci
868cb93a386Sopenharmony_ci    unsigned dst_scale = SkAlpha255To256(255 - SkGetPackedA32(color));
869cb93a386Sopenharmony_ci    size_t rowBytes = fDevice.rowBytes();
870cb93a386Sopenharmony_ci    while (--height >= 0) {
871cb93a386Sopenharmony_ci        device[0] = color + SkAlphaMulQ(device[0], dst_scale);
872cb93a386Sopenharmony_ci        device = (uint32_t*)((char*)device + rowBytes);
873cb93a386Sopenharmony_ci    }
874cb93a386Sopenharmony_ci}
875cb93a386Sopenharmony_ci
876cb93a386Sopenharmony_civoid SkARGB32_Blitter::blitRect(int x, int y, int width, int height) {
877cb93a386Sopenharmony_ci    SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width() && y + height <= fDevice.height());
878cb93a386Sopenharmony_ci
879cb93a386Sopenharmony_ci    if (fSrcA == 0) {
880cb93a386Sopenharmony_ci        return;
881cb93a386Sopenharmony_ci    }
882cb93a386Sopenharmony_ci
883cb93a386Sopenharmony_ci    uint32_t*   device = fDevice.writable_addr32(x, y);
884cb93a386Sopenharmony_ci    uint32_t    color = fPMColor;
885cb93a386Sopenharmony_ci    size_t      rowBytes = fDevice.rowBytes();
886cb93a386Sopenharmony_ci
887cb93a386Sopenharmony_ci    if (SkGetPackedA32(fPMColor) == 0xFF) {
888cb93a386Sopenharmony_ci        SkOpts::rect_memset32(device, color, width, rowBytes, height);
889cb93a386Sopenharmony_ci    } else {
890cb93a386Sopenharmony_ci        while (height --> 0) {
891cb93a386Sopenharmony_ci            SkBlitRow::Color32(device, device, width, color);
892cb93a386Sopenharmony_ci            device = (uint32_t*)((char*)device + rowBytes);
893cb93a386Sopenharmony_ci        }
894cb93a386Sopenharmony_ci    }
895cb93a386Sopenharmony_ci}
896cb93a386Sopenharmony_ci
897cb93a386Sopenharmony_ci#if defined _WIN32
898cb93a386Sopenharmony_ci#pragma warning ( pop )
899cb93a386Sopenharmony_ci#endif
900cb93a386Sopenharmony_ci
901cb93a386Sopenharmony_ci///////////////////////////////////////////////////////////////////////
902cb93a386Sopenharmony_ci
903cb93a386Sopenharmony_civoid SkARGB32_Black_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
904cb93a386Sopenharmony_ci                                       const int16_t runs[]) {
905cb93a386Sopenharmony_ci    uint32_t*   device = fDevice.writable_addr32(x, y);
906cb93a386Sopenharmony_ci    SkPMColor   black = (SkPMColor)(SK_A32_MASK << SK_A32_SHIFT);
907cb93a386Sopenharmony_ci
908cb93a386Sopenharmony_ci    for (;;) {
909cb93a386Sopenharmony_ci        int count = runs[0];
910cb93a386Sopenharmony_ci        SkASSERT(count >= 0);
911cb93a386Sopenharmony_ci        if (count <= 0) {
912cb93a386Sopenharmony_ci            return;
913cb93a386Sopenharmony_ci        }
914cb93a386Sopenharmony_ci        unsigned aa = antialias[0];
915cb93a386Sopenharmony_ci        if (aa) {
916cb93a386Sopenharmony_ci            if (aa == 255) {
917cb93a386Sopenharmony_ci                sk_memset32(device, black, count);
918cb93a386Sopenharmony_ci            } else {
919cb93a386Sopenharmony_ci                SkPMColor src = aa << SK_A32_SHIFT;
920cb93a386Sopenharmony_ci                unsigned dst_scale = 256 - aa;
921cb93a386Sopenharmony_ci                int n = count;
922cb93a386Sopenharmony_ci                do {
923cb93a386Sopenharmony_ci                    --n;
924cb93a386Sopenharmony_ci                    device[n] = src + SkAlphaMulQ(device[n], dst_scale);
925cb93a386Sopenharmony_ci                } while (n > 0);
926cb93a386Sopenharmony_ci            }
927cb93a386Sopenharmony_ci        }
928cb93a386Sopenharmony_ci        runs += count;
929cb93a386Sopenharmony_ci        antialias += count;
930cb93a386Sopenharmony_ci        device += count;
931cb93a386Sopenharmony_ci    }
932cb93a386Sopenharmony_ci}
933cb93a386Sopenharmony_ci
934cb93a386Sopenharmony_civoid SkARGB32_Black_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
935cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
936cb93a386Sopenharmony_ci    SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);)
937cb93a386Sopenharmony_ci
938cb93a386Sopenharmony_ci    device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0);
939cb93a386Sopenharmony_ci    device[1] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[1], 256 - a1);
940cb93a386Sopenharmony_ci}
941cb93a386Sopenharmony_ci
942cb93a386Sopenharmony_civoid SkARGB32_Black_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
943cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
944cb93a386Sopenharmony_ci    SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);)
945cb93a386Sopenharmony_ci
946cb93a386Sopenharmony_ci    device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0);
947cb93a386Sopenharmony_ci    device = (uint32_t*)((char*)device + fDevice.rowBytes());
948cb93a386Sopenharmony_ci    device[0] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a1);
949cb93a386Sopenharmony_ci}
950cb93a386Sopenharmony_ci
951cb93a386Sopenharmony_ci///////////////////////////////////////////////////////////////////////////////
952cb93a386Sopenharmony_ci
953cb93a386Sopenharmony_ci// Special version of SkBlitRow::Factory32 that knows we're in kSrc_Mode,
954cb93a386Sopenharmony_ci// instead of kSrcOver_Mode
955cb93a386Sopenharmony_cistatic void blend_srcmode(SkPMColor* SK_RESTRICT device,
956cb93a386Sopenharmony_ci                          const SkPMColor* SK_RESTRICT span,
957cb93a386Sopenharmony_ci                          int count, U8CPU aa) {
958cb93a386Sopenharmony_ci    int aa256 = SkAlpha255To256(aa);
959cb93a386Sopenharmony_ci    for (int i = 0; i < count; ++i) {
960cb93a386Sopenharmony_ci        device[i] = SkFourByteInterp256(span[i], device[i], aa256);
961cb93a386Sopenharmony_ci    }
962cb93a386Sopenharmony_ci}
963cb93a386Sopenharmony_ci
964cb93a386Sopenharmony_ciSkARGB32_Shader_Blitter::SkARGB32_Shader_Blitter(const SkPixmap& device,
965cb93a386Sopenharmony_ci        const SkPaint& paint, SkShaderBase::Context* shaderContext)
966cb93a386Sopenharmony_ci    : INHERITED(device, paint, shaderContext)
967cb93a386Sopenharmony_ci{
968cb93a386Sopenharmony_ci    fBuffer = (SkPMColor*)sk_malloc_throw(device.width() * (sizeof(SkPMColor)));
969cb93a386Sopenharmony_ci
970cb93a386Sopenharmony_ci    fXfermode = SkXfermode::Peek(paint.getBlendMode_or(SkBlendMode::kSrcOver));
971cb93a386Sopenharmony_ci
972cb93a386Sopenharmony_ci    int flags = 0;
973cb93a386Sopenharmony_ci    if (!(shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag)) {
974cb93a386Sopenharmony_ci        flags |= SkBlitRow::kSrcPixelAlpha_Flag32;
975cb93a386Sopenharmony_ci    }
976cb93a386Sopenharmony_ci    // we call this on the output from the shader
977cb93a386Sopenharmony_ci    fProc32 = SkBlitRow::Factory32(flags);
978cb93a386Sopenharmony_ci    // we call this on the output from the shader + alpha from the aa buffer
979cb93a386Sopenharmony_ci    fProc32Blend = SkBlitRow::Factory32(flags | SkBlitRow::kGlobalAlpha_Flag32);
980cb93a386Sopenharmony_ci
981cb93a386Sopenharmony_ci    fShadeDirectlyIntoDevice = false;
982cb93a386Sopenharmony_ci    if (fXfermode == nullptr) {
983cb93a386Sopenharmony_ci        if (shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag) {
984cb93a386Sopenharmony_ci            fShadeDirectlyIntoDevice = true;
985cb93a386Sopenharmony_ci        }
986cb93a386Sopenharmony_ci    } else {
987cb93a386Sopenharmony_ci        if (SkBlendMode::kSrc == paint.asBlendMode()) {
988cb93a386Sopenharmony_ci            fShadeDirectlyIntoDevice = true;
989cb93a386Sopenharmony_ci            fProc32Blend = blend_srcmode;
990cb93a386Sopenharmony_ci        }
991cb93a386Sopenharmony_ci    }
992cb93a386Sopenharmony_ci
993cb93a386Sopenharmony_ci    fConstInY = SkToBool(shaderContext->getFlags() & SkShaderBase::kConstInY32_Flag);
994cb93a386Sopenharmony_ci}
995cb93a386Sopenharmony_ci
996cb93a386Sopenharmony_ciSkARGB32_Shader_Blitter::~SkARGB32_Shader_Blitter() {
997cb93a386Sopenharmony_ci    sk_free(fBuffer);
998cb93a386Sopenharmony_ci}
999cb93a386Sopenharmony_ci
1000cb93a386Sopenharmony_civoid SkARGB32_Shader_Blitter::blitH(int x, int y, int width) {
1001cb93a386Sopenharmony_ci    SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width());
1002cb93a386Sopenharmony_ci
1003cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
1004cb93a386Sopenharmony_ci
1005cb93a386Sopenharmony_ci    if (fShadeDirectlyIntoDevice) {
1006cb93a386Sopenharmony_ci        fShaderContext->shadeSpan(x, y, device, width);
1007cb93a386Sopenharmony_ci    } else {
1008cb93a386Sopenharmony_ci        SkPMColor*  span = fBuffer;
1009cb93a386Sopenharmony_ci        fShaderContext->shadeSpan(x, y, span, width);
1010cb93a386Sopenharmony_ci        if (fXfermode) {
1011cb93a386Sopenharmony_ci            fXfermode->xfer32(device, span, width, nullptr);
1012cb93a386Sopenharmony_ci        } else {
1013cb93a386Sopenharmony_ci            fProc32(device, span, width, 255);
1014cb93a386Sopenharmony_ci        }
1015cb93a386Sopenharmony_ci    }
1016cb93a386Sopenharmony_ci}
1017cb93a386Sopenharmony_ci
1018cb93a386Sopenharmony_civoid SkARGB32_Shader_Blitter::blitRect(int x, int y, int width, int height) {
1019cb93a386Sopenharmony_ci    SkASSERT(x >= 0 && y >= 0 &&
1020cb93a386Sopenharmony_ci             x + width <= fDevice.width() && y + height <= fDevice.height());
1021cb93a386Sopenharmony_ci
1022cb93a386Sopenharmony_ci    uint32_t*  device = fDevice.writable_addr32(x, y);
1023cb93a386Sopenharmony_ci    size_t     deviceRB = fDevice.rowBytes();
1024cb93a386Sopenharmony_ci    auto*      shaderContext = fShaderContext;
1025cb93a386Sopenharmony_ci    SkPMColor* span = fBuffer;
1026cb93a386Sopenharmony_ci
1027cb93a386Sopenharmony_ci    if (fConstInY) {
1028cb93a386Sopenharmony_ci        if (fShadeDirectlyIntoDevice) {
1029cb93a386Sopenharmony_ci            // shade the first row directly into the device
1030cb93a386Sopenharmony_ci            shaderContext->shadeSpan(x, y, device, width);
1031cb93a386Sopenharmony_ci            span = device;
1032cb93a386Sopenharmony_ci            while (--height > 0) {
1033cb93a386Sopenharmony_ci                device = (uint32_t*)((char*)device + deviceRB);
1034cb93a386Sopenharmony_ci                memcpy(device, span, width << 2);
1035cb93a386Sopenharmony_ci            }
1036cb93a386Sopenharmony_ci        } else {
1037cb93a386Sopenharmony_ci            shaderContext->shadeSpan(x, y, span, width);
1038cb93a386Sopenharmony_ci            SkXfermode* xfer = fXfermode;
1039cb93a386Sopenharmony_ci            if (xfer) {
1040cb93a386Sopenharmony_ci                do {
1041cb93a386Sopenharmony_ci                    xfer->xfer32(device, span, width, nullptr);
1042cb93a386Sopenharmony_ci                    y += 1;
1043cb93a386Sopenharmony_ci                    device = (uint32_t*)((char*)device + deviceRB);
1044cb93a386Sopenharmony_ci                } while (--height > 0);
1045cb93a386Sopenharmony_ci            } else {
1046cb93a386Sopenharmony_ci                SkBlitRow::Proc32 proc = fProc32;
1047cb93a386Sopenharmony_ci                do {
1048cb93a386Sopenharmony_ci                    proc(device, span, width, 255);
1049cb93a386Sopenharmony_ci                    y += 1;
1050cb93a386Sopenharmony_ci                    device = (uint32_t*)((char*)device + deviceRB);
1051cb93a386Sopenharmony_ci                } while (--height > 0);
1052cb93a386Sopenharmony_ci            }
1053cb93a386Sopenharmony_ci        }
1054cb93a386Sopenharmony_ci        return;
1055cb93a386Sopenharmony_ci    }
1056cb93a386Sopenharmony_ci
1057cb93a386Sopenharmony_ci    if (fShadeDirectlyIntoDevice) {
1058cb93a386Sopenharmony_ci        do {
1059cb93a386Sopenharmony_ci            shaderContext->shadeSpan(x, y, device, width);
1060cb93a386Sopenharmony_ci            y += 1;
1061cb93a386Sopenharmony_ci            device = (uint32_t*)((char*)device + deviceRB);
1062cb93a386Sopenharmony_ci        } while (--height > 0);
1063cb93a386Sopenharmony_ci    } else {
1064cb93a386Sopenharmony_ci        SkXfermode* xfer = fXfermode;
1065cb93a386Sopenharmony_ci        if (xfer) {
1066cb93a386Sopenharmony_ci            do {
1067cb93a386Sopenharmony_ci                shaderContext->shadeSpan(x, y, span, width);
1068cb93a386Sopenharmony_ci                xfer->xfer32(device, span, width, nullptr);
1069cb93a386Sopenharmony_ci                y += 1;
1070cb93a386Sopenharmony_ci                device = (uint32_t*)((char*)device + deviceRB);
1071cb93a386Sopenharmony_ci            } while (--height > 0);
1072cb93a386Sopenharmony_ci        } else {
1073cb93a386Sopenharmony_ci            SkBlitRow::Proc32 proc = fProc32;
1074cb93a386Sopenharmony_ci            do {
1075cb93a386Sopenharmony_ci                shaderContext->shadeSpan(x, y, span, width);
1076cb93a386Sopenharmony_ci                proc(device, span, width, 255);
1077cb93a386Sopenharmony_ci                y += 1;
1078cb93a386Sopenharmony_ci                device = (uint32_t*)((char*)device + deviceRB);
1079cb93a386Sopenharmony_ci            } while (--height > 0);
1080cb93a386Sopenharmony_ci        }
1081cb93a386Sopenharmony_ci    }
1082cb93a386Sopenharmony_ci}
1083cb93a386Sopenharmony_ci
1084cb93a386Sopenharmony_civoid SkARGB32_Shader_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
1085cb93a386Sopenharmony_ci                                        const int16_t runs[]) {
1086cb93a386Sopenharmony_ci    SkPMColor* span = fBuffer;
1087cb93a386Sopenharmony_ci    uint32_t*  device = fDevice.writable_addr32(x, y);
1088cb93a386Sopenharmony_ci    auto*      shaderContext = fShaderContext;
1089cb93a386Sopenharmony_ci
1090cb93a386Sopenharmony_ci    if (fXfermode && !fShadeDirectlyIntoDevice) {
1091cb93a386Sopenharmony_ci        for (;;) {
1092cb93a386Sopenharmony_ci            SkXfermode* xfer = fXfermode;
1093cb93a386Sopenharmony_ci
1094cb93a386Sopenharmony_ci            int count = *runs;
1095cb93a386Sopenharmony_ci            if (count <= 0)
1096cb93a386Sopenharmony_ci                break;
1097cb93a386Sopenharmony_ci            int aa = *antialias;
1098cb93a386Sopenharmony_ci            if (aa) {
1099cb93a386Sopenharmony_ci                shaderContext->shadeSpan(x, y, span, count);
1100cb93a386Sopenharmony_ci                if (aa == 255) {
1101cb93a386Sopenharmony_ci                    xfer->xfer32(device, span, count, nullptr);
1102cb93a386Sopenharmony_ci                } else {
1103cb93a386Sopenharmony_ci                    // count is almost always 1
1104cb93a386Sopenharmony_ci                    for (int i = count - 1; i >= 0; --i) {
1105cb93a386Sopenharmony_ci                        xfer->xfer32(&device[i], &span[i], 1, antialias);
1106cb93a386Sopenharmony_ci                    }
1107cb93a386Sopenharmony_ci                }
1108cb93a386Sopenharmony_ci            }
1109cb93a386Sopenharmony_ci            device += count;
1110cb93a386Sopenharmony_ci            runs += count;
1111cb93a386Sopenharmony_ci            antialias += count;
1112cb93a386Sopenharmony_ci            x += count;
1113cb93a386Sopenharmony_ci        }
1114cb93a386Sopenharmony_ci    } else if (fShadeDirectlyIntoDevice ||
1115cb93a386Sopenharmony_ci               (shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag)) {
1116cb93a386Sopenharmony_ci        for (;;) {
1117cb93a386Sopenharmony_ci            int count = *runs;
1118cb93a386Sopenharmony_ci            if (count <= 0) {
1119cb93a386Sopenharmony_ci                break;
1120cb93a386Sopenharmony_ci            }
1121cb93a386Sopenharmony_ci            int aa = *antialias;
1122cb93a386Sopenharmony_ci            if (aa) {
1123cb93a386Sopenharmony_ci                if (aa == 255) {
1124cb93a386Sopenharmony_ci                    // cool, have the shader draw right into the device
1125cb93a386Sopenharmony_ci                    shaderContext->shadeSpan(x, y, device, count);
1126cb93a386Sopenharmony_ci                } else {
1127cb93a386Sopenharmony_ci                    shaderContext->shadeSpan(x, y, span, count);
1128cb93a386Sopenharmony_ci                    fProc32Blend(device, span, count, aa);
1129cb93a386Sopenharmony_ci                }
1130cb93a386Sopenharmony_ci            }
1131cb93a386Sopenharmony_ci            device += count;
1132cb93a386Sopenharmony_ci            runs += count;
1133cb93a386Sopenharmony_ci            antialias += count;
1134cb93a386Sopenharmony_ci            x += count;
1135cb93a386Sopenharmony_ci        }
1136cb93a386Sopenharmony_ci    } else {
1137cb93a386Sopenharmony_ci        for (;;) {
1138cb93a386Sopenharmony_ci            int count = *runs;
1139cb93a386Sopenharmony_ci            if (count <= 0) {
1140cb93a386Sopenharmony_ci                break;
1141cb93a386Sopenharmony_ci            }
1142cb93a386Sopenharmony_ci            int aa = *antialias;
1143cb93a386Sopenharmony_ci            if (aa) {
1144cb93a386Sopenharmony_ci                shaderContext->shadeSpan(x, y, span, count);
1145cb93a386Sopenharmony_ci                if (aa == 255) {
1146cb93a386Sopenharmony_ci                    fProc32(device, span, count, 255);
1147cb93a386Sopenharmony_ci                } else {
1148cb93a386Sopenharmony_ci                    fProc32Blend(device, span, count, aa);
1149cb93a386Sopenharmony_ci                }
1150cb93a386Sopenharmony_ci            }
1151cb93a386Sopenharmony_ci            device += count;
1152cb93a386Sopenharmony_ci            runs += count;
1153cb93a386Sopenharmony_ci            antialias += count;
1154cb93a386Sopenharmony_ci            x += count;
1155cb93a386Sopenharmony_ci        }
1156cb93a386Sopenharmony_ci    }
1157cb93a386Sopenharmony_ci}
1158cb93a386Sopenharmony_ci
1159cb93a386Sopenharmony_ciusing U32  = skvx::Vec< 4, uint32_t>;
1160cb93a386Sopenharmony_ciusing U8x4 = skvx::Vec<16, uint8_t>;
1161cb93a386Sopenharmony_ciusing U8   = skvx::Vec< 4, uint8_t>;
1162cb93a386Sopenharmony_ci
1163cb93a386Sopenharmony_cistatic void drive(SkPMColor* dst, const SkPMColor* src, const uint8_t* cov, int n,
1164cb93a386Sopenharmony_ci                  U8x4 (*kernel)(U8x4,U8x4,U8x4)) {
1165cb93a386Sopenharmony_ci
1166cb93a386Sopenharmony_ci    auto apply = [kernel](U32 dst, U32 src, U8 cov) -> U32 {
1167cb93a386Sopenharmony_ci        U8x4 cov_splat = skvx::shuffle<0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3>(cov);
1168cb93a386Sopenharmony_ci        return skvx::bit_pun<U32>(kernel(skvx::bit_pun<U8x4>(dst),
1169cb93a386Sopenharmony_ci                                         skvx::bit_pun<U8x4>(src),
1170cb93a386Sopenharmony_ci                                         cov_splat));
1171cb93a386Sopenharmony_ci    };
1172cb93a386Sopenharmony_ci    while (n >= 4) {
1173cb93a386Sopenharmony_ci        apply(U32::Load(dst), U32::Load(src), U8::Load(cov)).store(dst);
1174cb93a386Sopenharmony_ci        dst += 4;
1175cb93a386Sopenharmony_ci        src += 4;
1176cb93a386Sopenharmony_ci        cov += 4;
1177cb93a386Sopenharmony_ci        n   -= 4;
1178cb93a386Sopenharmony_ci    }
1179cb93a386Sopenharmony_ci    while (n --> 0) {
1180cb93a386Sopenharmony_ci        *dst = apply(U32{*dst}, U32{*src}, U8{*cov})[0];
1181cb93a386Sopenharmony_ci        dst++;
1182cb93a386Sopenharmony_ci        src++;
1183cb93a386Sopenharmony_ci        cov++;
1184cb93a386Sopenharmony_ci    }
1185cb93a386Sopenharmony_ci}
1186cb93a386Sopenharmony_ci
1187cb93a386Sopenharmony_cistatic void blend_row_A8(SkPMColor* dst, const void* mask, const SkPMColor* src, int n) {
1188cb93a386Sopenharmony_ci    auto cov = (const uint8_t*)mask;
1189cb93a386Sopenharmony_ci    drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
1190cb93a386Sopenharmony_ci        U8x4 s_aa  = skvx::approx_scale(s, c),
1191cb93a386Sopenharmony_ci             alpha = skvx::shuffle<3,3,3,3, 7,7,7,7, 11,11,11,11, 15,15,15,15>(s_aa);
1192cb93a386Sopenharmony_ci        return s_aa + skvx::approx_scale(d, 255 - alpha);
1193cb93a386Sopenharmony_ci    });
1194cb93a386Sopenharmony_ci}
1195cb93a386Sopenharmony_ci
1196cb93a386Sopenharmony_cistatic void blend_row_A8_opaque(SkPMColor* dst, const void* mask, const SkPMColor* src, int n) {
1197cb93a386Sopenharmony_ci    auto cov = (const uint8_t*)mask;
1198cb93a386Sopenharmony_ci    drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
1199cb93a386Sopenharmony_ci        return skvx::div255( skvx::cast<uint16_t>(s) * skvx::cast<uint16_t>(  c  )
1200cb93a386Sopenharmony_ci                           + skvx::cast<uint16_t>(d) * skvx::cast<uint16_t>(255-c));
1201cb93a386Sopenharmony_ci    });
1202cb93a386Sopenharmony_ci}
1203cb93a386Sopenharmony_ci
1204cb93a386Sopenharmony_cistatic void blend_row_lcd16(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
1205cb93a386Sopenharmony_ci    auto src_alpha_blend = [](int s, int d, int sa, int m) {
1206cb93a386Sopenharmony_ci        return d + SkAlphaMul(s - SkAlphaMul(sa, d), m);
1207cb93a386Sopenharmony_ci    };
1208cb93a386Sopenharmony_ci
1209cb93a386Sopenharmony_ci    auto upscale_31_to_255 = [](int v) {
1210cb93a386Sopenharmony_ci        return (v << 3) | (v >> 2);
1211cb93a386Sopenharmony_ci    };
1212cb93a386Sopenharmony_ci
1213cb93a386Sopenharmony_ci    auto mask = (const uint16_t*)vmask;
1214cb93a386Sopenharmony_ci    for (int i = 0; i < n; ++i) {
1215cb93a386Sopenharmony_ci        uint16_t m = mask[i];
1216cb93a386Sopenharmony_ci        if (0 == m) {
1217cb93a386Sopenharmony_ci            continue;
1218cb93a386Sopenharmony_ci        }
1219cb93a386Sopenharmony_ci
1220cb93a386Sopenharmony_ci        SkPMColor s = src[i];
1221cb93a386Sopenharmony_ci        SkPMColor d = dst[i];
1222cb93a386Sopenharmony_ci
1223cb93a386Sopenharmony_ci        int srcA = SkGetPackedA32(s);
1224cb93a386Sopenharmony_ci        int srcR = SkGetPackedR32(s);
1225cb93a386Sopenharmony_ci        int srcG = SkGetPackedG32(s);
1226cb93a386Sopenharmony_ci        int srcB = SkGetPackedB32(s);
1227cb93a386Sopenharmony_ci
1228cb93a386Sopenharmony_ci        srcA += srcA >> 7;
1229cb93a386Sopenharmony_ci
1230cb93a386Sopenharmony_ci        // We're ignoring the least significant bit of the green coverage channel here.
1231cb93a386Sopenharmony_ci        int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5);
1232cb93a386Sopenharmony_ci        int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5);
1233cb93a386Sopenharmony_ci        int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
1234cb93a386Sopenharmony_ci
1235cb93a386Sopenharmony_ci        // Scale up to 8-bit coverage to work with SkAlphaMul() in src_alpha_blend().
1236cb93a386Sopenharmony_ci        maskR = upscale_31_to_255(maskR);
1237cb93a386Sopenharmony_ci        maskG = upscale_31_to_255(maskG);
1238cb93a386Sopenharmony_ci        maskB = upscale_31_to_255(maskB);
1239cb93a386Sopenharmony_ci
1240cb93a386Sopenharmony_ci        // This LCD blit routine only works if the destination is opaque.
1241cb93a386Sopenharmony_ci        dst[i] = SkPackARGB32(0xFF,
1242cb93a386Sopenharmony_ci                              src_alpha_blend(srcR, SkGetPackedR32(d), srcA, maskR),
1243cb93a386Sopenharmony_ci                              src_alpha_blend(srcG, SkGetPackedG32(d), srcA, maskG),
1244cb93a386Sopenharmony_ci                              src_alpha_blend(srcB, SkGetPackedB32(d), srcA, maskB));
1245cb93a386Sopenharmony_ci    }
1246cb93a386Sopenharmony_ci}
1247cb93a386Sopenharmony_ci
1248cb93a386Sopenharmony_cistatic void blend_row_LCD16_opaque(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
1249cb93a386Sopenharmony_ci    auto mask = (const uint16_t*)vmask;
1250cb93a386Sopenharmony_ci
1251cb93a386Sopenharmony_ci    for (int i = 0; i < n; ++i) {
1252cb93a386Sopenharmony_ci        uint16_t m = mask[i];
1253cb93a386Sopenharmony_ci        if (0 == m) {
1254cb93a386Sopenharmony_ci            continue;
1255cb93a386Sopenharmony_ci        }
1256cb93a386Sopenharmony_ci
1257cb93a386Sopenharmony_ci        SkPMColor s = src[i];
1258cb93a386Sopenharmony_ci        SkPMColor d = dst[i];
1259cb93a386Sopenharmony_ci
1260cb93a386Sopenharmony_ci        int srcR = SkGetPackedR32(s);
1261cb93a386Sopenharmony_ci        int srcG = SkGetPackedG32(s);
1262cb93a386Sopenharmony_ci        int srcB = SkGetPackedB32(s);
1263cb93a386Sopenharmony_ci
1264cb93a386Sopenharmony_ci        // We're ignoring the least significant bit of the green coverage channel here.
1265cb93a386Sopenharmony_ci        int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5);
1266cb93a386Sopenharmony_ci        int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5);
1267cb93a386Sopenharmony_ci        int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
1268cb93a386Sopenharmony_ci
1269cb93a386Sopenharmony_ci        // Now upscale them to 0..32, so we can use blend_32.
1270cb93a386Sopenharmony_ci        maskR = upscale_31_to_32(maskR);
1271cb93a386Sopenharmony_ci        maskG = upscale_31_to_32(maskG);
1272cb93a386Sopenharmony_ci        maskB = upscale_31_to_32(maskB);
1273cb93a386Sopenharmony_ci
1274cb93a386Sopenharmony_ci        // This LCD blit routine only works if the destination is opaque.
1275cb93a386Sopenharmony_ci        dst[i] = SkPackARGB32(0xFF,
1276cb93a386Sopenharmony_ci                              blend_32(srcR, SkGetPackedR32(d), maskR),
1277cb93a386Sopenharmony_ci                              blend_32(srcG, SkGetPackedG32(d), maskG),
1278cb93a386Sopenharmony_ci                              blend_32(srcB, SkGetPackedB32(d), maskB));
1279cb93a386Sopenharmony_ci    }
1280cb93a386Sopenharmony_ci}
1281cb93a386Sopenharmony_ci
1282cb93a386Sopenharmony_civoid SkARGB32_Shader_Blitter::blitMask(const SkMask& mask, const SkIRect& clip) {
1283cb93a386Sopenharmony_ci    // we only handle kA8 with an xfermode
1284cb93a386Sopenharmony_ci    if (fXfermode && (SkMask::kA8_Format != mask.fFormat)) {
1285cb93a386Sopenharmony_ci        this->INHERITED::blitMask(mask, clip);
1286cb93a386Sopenharmony_ci        return;
1287cb93a386Sopenharmony_ci    }
1288cb93a386Sopenharmony_ci
1289cb93a386Sopenharmony_ci    SkASSERT(mask.fBounds.contains(clip));
1290cb93a386Sopenharmony_ci
1291cb93a386Sopenharmony_ci    void (*blend_row)(SkPMColor*, const void* mask, const SkPMColor*, int) = nullptr;
1292cb93a386Sopenharmony_ci
1293cb93a386Sopenharmony_ci    if (!fXfermode) {
1294cb93a386Sopenharmony_ci        bool opaque = (fShaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag);
1295cb93a386Sopenharmony_ci
1296cb93a386Sopenharmony_ci        if (mask.fFormat == SkMask::kA8_Format && opaque) {
1297cb93a386Sopenharmony_ci            blend_row = blend_row_A8_opaque;
1298cb93a386Sopenharmony_ci        } else if (mask.fFormat == SkMask::kA8_Format) {
1299cb93a386Sopenharmony_ci            blend_row = blend_row_A8;
1300cb93a386Sopenharmony_ci        } else if (mask.fFormat == SkMask::kLCD16_Format && opaque) {
1301cb93a386Sopenharmony_ci            blend_row = blend_row_LCD16_opaque;
1302cb93a386Sopenharmony_ci        } else if (mask.fFormat == SkMask::kLCD16_Format) {
1303cb93a386Sopenharmony_ci            blend_row = blend_row_lcd16;
1304cb93a386Sopenharmony_ci        } else {
1305cb93a386Sopenharmony_ci            this->INHERITED::blitMask(mask, clip);
1306cb93a386Sopenharmony_ci            return;
1307cb93a386Sopenharmony_ci        }
1308cb93a386Sopenharmony_ci    }
1309cb93a386Sopenharmony_ci
1310cb93a386Sopenharmony_ci    const int x = clip.fLeft;
1311cb93a386Sopenharmony_ci    const int width = clip.width();
1312cb93a386Sopenharmony_ci    int y = clip.fTop;
1313cb93a386Sopenharmony_ci    int height = clip.height();
1314cb93a386Sopenharmony_ci
1315cb93a386Sopenharmony_ci    char* dstRow = (char*)fDevice.writable_addr32(x, y);
1316cb93a386Sopenharmony_ci    const size_t dstRB = fDevice.rowBytes();
1317cb93a386Sopenharmony_ci    const uint8_t* maskRow = (const uint8_t*)mask.getAddr(x, y);
1318cb93a386Sopenharmony_ci    const size_t maskRB = mask.fRowBytes;
1319cb93a386Sopenharmony_ci
1320cb93a386Sopenharmony_ci    SkPMColor* span = fBuffer;
1321cb93a386Sopenharmony_ci
1322cb93a386Sopenharmony_ci    if (fXfermode) {
1323cb93a386Sopenharmony_ci        SkASSERT(SkMask::kA8_Format == mask.fFormat);
1324cb93a386Sopenharmony_ci        SkXfermode* xfer = fXfermode;
1325cb93a386Sopenharmony_ci        do {
1326cb93a386Sopenharmony_ci            fShaderContext->shadeSpan(x, y, span, width);
1327cb93a386Sopenharmony_ci            xfer->xfer32(reinterpret_cast<SkPMColor*>(dstRow), span, width, maskRow);
1328cb93a386Sopenharmony_ci            dstRow += dstRB;
1329cb93a386Sopenharmony_ci            maskRow += maskRB;
1330cb93a386Sopenharmony_ci            y += 1;
1331cb93a386Sopenharmony_ci        } while (--height > 0);
1332cb93a386Sopenharmony_ci    } else {
1333cb93a386Sopenharmony_ci        SkASSERT(blend_row);
1334cb93a386Sopenharmony_ci        do {
1335cb93a386Sopenharmony_ci            fShaderContext->shadeSpan(x, y, span, width);
1336cb93a386Sopenharmony_ci            blend_row(reinterpret_cast<SkPMColor*>(dstRow), maskRow, span, width);
1337cb93a386Sopenharmony_ci            dstRow += dstRB;
1338cb93a386Sopenharmony_ci            maskRow += maskRB;
1339cb93a386Sopenharmony_ci            y += 1;
1340cb93a386Sopenharmony_ci        } while (--height > 0);
1341cb93a386Sopenharmony_ci    }
1342cb93a386Sopenharmony_ci}
1343cb93a386Sopenharmony_ci
1344cb93a386Sopenharmony_civoid SkARGB32_Shader_Blitter::blitV(int x, int y, int height, SkAlpha alpha) {
1345cb93a386Sopenharmony_ci    SkASSERT(x >= 0 && y >= 0 && y + height <= fDevice.height());
1346cb93a386Sopenharmony_ci
1347cb93a386Sopenharmony_ci    uint32_t* device = fDevice.writable_addr32(x, y);
1348cb93a386Sopenharmony_ci    size_t    deviceRB = fDevice.rowBytes();
1349cb93a386Sopenharmony_ci
1350cb93a386Sopenharmony_ci    if (fConstInY) {
1351cb93a386Sopenharmony_ci        SkPMColor c;
1352cb93a386Sopenharmony_ci        fShaderContext->shadeSpan(x, y, &c, 1);
1353cb93a386Sopenharmony_ci
1354cb93a386Sopenharmony_ci        if (fShadeDirectlyIntoDevice) {
1355cb93a386Sopenharmony_ci            if (255 == alpha) {
1356cb93a386Sopenharmony_ci                do {
1357cb93a386Sopenharmony_ci                    *device = c;
1358cb93a386Sopenharmony_ci                    device = (uint32_t*)((char*)device + deviceRB);
1359cb93a386Sopenharmony_ci                } while (--height > 0);
1360cb93a386Sopenharmony_ci            } else {
1361cb93a386Sopenharmony_ci                do {
1362cb93a386Sopenharmony_ci                    *device = SkFourByteInterp(c, *device, alpha);
1363cb93a386Sopenharmony_ci                    device = (uint32_t*)((char*)device + deviceRB);
1364cb93a386Sopenharmony_ci                } while (--height > 0);
1365cb93a386Sopenharmony_ci            }
1366cb93a386Sopenharmony_ci        } else {
1367cb93a386Sopenharmony_ci            SkXfermode* xfer = fXfermode;
1368cb93a386Sopenharmony_ci            if (xfer) {
1369cb93a386Sopenharmony_ci                do {
1370cb93a386Sopenharmony_ci                    xfer->xfer32(device, &c, 1, &alpha);
1371cb93a386Sopenharmony_ci                    device = (uint32_t*)((char*)device + deviceRB);
1372cb93a386Sopenharmony_ci                } while (--height > 0);
1373cb93a386Sopenharmony_ci            } else {
1374cb93a386Sopenharmony_ci                SkBlitRow::Proc32 proc = (255 == alpha) ? fProc32 : fProc32Blend;
1375cb93a386Sopenharmony_ci                do {
1376cb93a386Sopenharmony_ci                    proc(device, &c, 1, alpha);
1377cb93a386Sopenharmony_ci                    device = (uint32_t*)((char*)device + deviceRB);
1378cb93a386Sopenharmony_ci                } while (--height > 0);
1379cb93a386Sopenharmony_ci            }
1380cb93a386Sopenharmony_ci        }
1381cb93a386Sopenharmony_ci        return;
1382cb93a386Sopenharmony_ci    }
1383cb93a386Sopenharmony_ci
1384cb93a386Sopenharmony_ci    if (fShadeDirectlyIntoDevice) {
1385cb93a386Sopenharmony_ci        if (255 == alpha) {
1386cb93a386Sopenharmony_ci            do {
1387cb93a386Sopenharmony_ci                fShaderContext->shadeSpan(x, y, device, 1);
1388cb93a386Sopenharmony_ci                y += 1;
1389cb93a386Sopenharmony_ci                device = (uint32_t*)((char*)device + deviceRB);
1390cb93a386Sopenharmony_ci            } while (--height > 0);
1391cb93a386Sopenharmony_ci        } else {
1392cb93a386Sopenharmony_ci            do {
1393cb93a386Sopenharmony_ci                SkPMColor c;
1394cb93a386Sopenharmony_ci                fShaderContext->shadeSpan(x, y, &c, 1);
1395cb93a386Sopenharmony_ci                *device = SkFourByteInterp(c, *device, alpha);
1396cb93a386Sopenharmony_ci                y += 1;
1397cb93a386Sopenharmony_ci                device = (uint32_t*)((char*)device + deviceRB);
1398cb93a386Sopenharmony_ci            } while (--height > 0);
1399cb93a386Sopenharmony_ci        }
1400cb93a386Sopenharmony_ci    } else {
1401cb93a386Sopenharmony_ci        SkPMColor* span = fBuffer;
1402cb93a386Sopenharmony_ci        SkXfermode* xfer = fXfermode;
1403cb93a386Sopenharmony_ci        if (xfer) {
1404cb93a386Sopenharmony_ci            do {
1405cb93a386Sopenharmony_ci                fShaderContext->shadeSpan(x, y, span, 1);
1406cb93a386Sopenharmony_ci                xfer->xfer32(device, span, 1, &alpha);
1407cb93a386Sopenharmony_ci                y += 1;
1408cb93a386Sopenharmony_ci                device = (uint32_t*)((char*)device + deviceRB);
1409cb93a386Sopenharmony_ci            } while (--height > 0);
1410cb93a386Sopenharmony_ci        } else {
1411cb93a386Sopenharmony_ci            SkBlitRow::Proc32 proc = (255 == alpha) ? fProc32 : fProc32Blend;
1412cb93a386Sopenharmony_ci            do {
1413cb93a386Sopenharmony_ci                fShaderContext->shadeSpan(x, y, span, 1);
1414cb93a386Sopenharmony_ci                proc(device, span, 1, alpha);
1415cb93a386Sopenharmony_ci                y += 1;
1416cb93a386Sopenharmony_ci                device = (uint32_t*)((char*)device + deviceRB);
1417cb93a386Sopenharmony_ci            } while (--height > 0);
1418cb93a386Sopenharmony_ci        }
1419cb93a386Sopenharmony_ci    }
1420cb93a386Sopenharmony_ci}
1421