1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci * Copyright 2015 Google Inc. 3cb93a386Sopenharmony_ci * 4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci * found in the LICENSE file. 6cb93a386Sopenharmony_ci */ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#ifndef SkBlitMask_opts_DEFINED 9cb93a386Sopenharmony_ci#define SkBlitMask_opts_DEFINED 10cb93a386Sopenharmony_ci 11cb93a386Sopenharmony_ci#include "src/core/Sk4px.h" 12cb93a386Sopenharmony_ci 13cb93a386Sopenharmony_cinamespace SK_OPTS_NS { 14cb93a386Sopenharmony_ci 15cb93a386Sopenharmony_ci#if defined(SK_ARM_HAS_NEON) 16cb93a386Sopenharmony_ci // The Sk4px versions below will work fine with NEON, but we have had many indications 17cb93a386Sopenharmony_ci // that it doesn't perform as well as this NEON-specific code. TODO(mtklein): why? 18cb93a386Sopenharmony_ci 19cb93a386Sopenharmony_ci #define NEON_A (SK_A32_SHIFT / 8) 20cb93a386Sopenharmony_ci #define NEON_R (SK_R32_SHIFT / 8) 21cb93a386Sopenharmony_ci #define NEON_G (SK_G32_SHIFT / 8) 22cb93a386Sopenharmony_ci #define NEON_B (SK_B32_SHIFT / 8) 23cb93a386Sopenharmony_ci 24cb93a386Sopenharmony_ci static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) { 25cb93a386Sopenharmony_ci return vaddw_u8(vdupq_n_u16(1), alpha); 26cb93a386Sopenharmony_ci } 27cb93a386Sopenharmony_ci 28cb93a386Sopenharmony_ci static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) { 29cb93a386Sopenharmony_ci return vshrn_n_u16(vmovl_u8(color) * scale, 8); 30cb93a386Sopenharmony_ci } 31cb93a386Sopenharmony_ci 32cb93a386Sopenharmony_ci static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) { 33cb93a386Sopenharmony_ci uint8x8x4_t ret; 34cb93a386Sopenharmony_ci 35cb93a386Sopenharmony_ci ret.val[0] = SkAlphaMul_neon8(color.val[0], scale); 36cb93a386Sopenharmony_ci ret.val[1] = SkAlphaMul_neon8(color.val[1], scale); 37cb93a386Sopenharmony_ci ret.val[2] = SkAlphaMul_neon8(color.val[2], scale); 38cb93a386Sopenharmony_ci ret.val[3] = SkAlphaMul_neon8(color.val[3], scale); 39cb93a386Sopenharmony_ci 40cb93a386Sopenharmony_ci return ret; 41cb93a386Sopenharmony_ci } 42cb93a386Sopenharmony_ci 43cb93a386Sopenharmony_ci 44cb93a386Sopenharmony_ci template <bool isColor> 45cb93a386Sopenharmony_ci static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, 46cb93a386Sopenharmony_ci const void* SK_RESTRICT maskPtr, size_t maskRB, 47cb93a386Sopenharmony_ci SkColor color, int width, int height) { 48cb93a386Sopenharmony_ci SkPMColor pmc = SkPreMultiplyColor(color); 49cb93a386Sopenharmony_ci SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; 50cb93a386Sopenharmony_ci const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; 51cb93a386Sopenharmony_ci uint8x8x4_t vpmc; 52cb93a386Sopenharmony_ci 53cb93a386Sopenharmony_ci maskRB -= width; 54cb93a386Sopenharmony_ci dstRB -= (width << 2); 55cb93a386Sopenharmony_ci 56cb93a386Sopenharmony_ci if (width >= 8) { 57cb93a386Sopenharmony_ci vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); 58cb93a386Sopenharmony_ci vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); 59cb93a386Sopenharmony_ci vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); 60cb93a386Sopenharmony_ci vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); 61cb93a386Sopenharmony_ci } 62cb93a386Sopenharmony_ci do { 63cb93a386Sopenharmony_ci int w = width; 64cb93a386Sopenharmony_ci while (w >= 8) { 65cb93a386Sopenharmony_ci uint8x8_t vmask = vld1_u8(mask); 66cb93a386Sopenharmony_ci uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); 67cb93a386Sopenharmony_ci if (isColor) { 68cb93a386Sopenharmony_ci vscale = vsubw_u8(vdupq_n_u16(256), 69cb93a386Sopenharmony_ci SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); 70cb93a386Sopenharmony_ci } else { 71cb93a386Sopenharmony_ci vscale = vsubw_u8(vdupq_n_u16(256), vmask); 72cb93a386Sopenharmony_ci } 73cb93a386Sopenharmony_ci uint8x8x4_t vdev = vld4_u8((uint8_t*)device); 74cb93a386Sopenharmony_ci 75cb93a386Sopenharmony_ci vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) 76cb93a386Sopenharmony_ci + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); 77cb93a386Sopenharmony_ci vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) 78cb93a386Sopenharmony_ci + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); 79cb93a386Sopenharmony_ci vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) 80cb93a386Sopenharmony_ci + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); 81cb93a386Sopenharmony_ci vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) 82cb93a386Sopenharmony_ci + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); 83cb93a386Sopenharmony_ci 84cb93a386Sopenharmony_ci vst4_u8((uint8_t*)device, vdev); 85cb93a386Sopenharmony_ci 86cb93a386Sopenharmony_ci mask += 8; 87cb93a386Sopenharmony_ci device += 8; 88cb93a386Sopenharmony_ci w -= 8; 89cb93a386Sopenharmony_ci } 90cb93a386Sopenharmony_ci 91cb93a386Sopenharmony_ci while (w--) { 92cb93a386Sopenharmony_ci unsigned aa = *mask++; 93cb93a386Sopenharmony_ci if (isColor) { 94cb93a386Sopenharmony_ci *device = SkBlendARGB32(pmc, *device, aa); 95cb93a386Sopenharmony_ci } else { 96cb93a386Sopenharmony_ci *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) 97cb93a386Sopenharmony_ci + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); 98cb93a386Sopenharmony_ci } 99cb93a386Sopenharmony_ci device += 1; 100cb93a386Sopenharmony_ci } 101cb93a386Sopenharmony_ci 102cb93a386Sopenharmony_ci device = (uint32_t*)((char*)device + dstRB); 103cb93a386Sopenharmony_ci mask += maskRB; 104cb93a386Sopenharmony_ci 105cb93a386Sopenharmony_ci } while (--height != 0); 106cb93a386Sopenharmony_ci } 107cb93a386Sopenharmony_ci 108cb93a386Sopenharmony_ci static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, 109cb93a386Sopenharmony_ci const SkAlpha* mask, size_t maskRB, 110cb93a386Sopenharmony_ci SkColor color, int w, int h) { 111cb93a386Sopenharmony_ci D32_A8_Opaque_Color_neon<true>(dst, dstRB, mask, maskRB, color, w, h); 112cb93a386Sopenharmony_ci } 113cb93a386Sopenharmony_ci 114cb93a386Sopenharmony_ci // As above, but made slightly simpler by requiring that color is opaque. 115cb93a386Sopenharmony_ci static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, 116cb93a386Sopenharmony_ci const SkAlpha* mask, size_t maskRB, 117cb93a386Sopenharmony_ci SkColor color, int w, int h) { 118cb93a386Sopenharmony_ci D32_A8_Opaque_Color_neon<false>(dst, dstRB, mask, maskRB, color, w, h); 119cb93a386Sopenharmony_ci } 120cb93a386Sopenharmony_ci 121cb93a386Sopenharmony_ci // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. 122cb93a386Sopenharmony_ci static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, 123cb93a386Sopenharmony_ci const SkAlpha* maskPtr, size_t maskRB, 124cb93a386Sopenharmony_ci int width, int height) { 125cb93a386Sopenharmony_ci SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; 126cb93a386Sopenharmony_ci const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; 127cb93a386Sopenharmony_ci 128cb93a386Sopenharmony_ci maskRB -= width; 129cb93a386Sopenharmony_ci dstRB -= (width << 2); 130cb93a386Sopenharmony_ci do { 131cb93a386Sopenharmony_ci int w = width; 132cb93a386Sopenharmony_ci while (w >= 8) { 133cb93a386Sopenharmony_ci uint8x8_t vmask = vld1_u8(mask); 134cb93a386Sopenharmony_ci uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); 135cb93a386Sopenharmony_ci uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); 136cb93a386Sopenharmony_ci 137cb93a386Sopenharmony_ci vdevice = SkAlphaMulQ_neon8(vdevice, vscale); 138cb93a386Sopenharmony_ci vdevice.val[NEON_A] += vmask; 139cb93a386Sopenharmony_ci 140cb93a386Sopenharmony_ci vst4_u8((uint8_t*)device, vdevice); 141cb93a386Sopenharmony_ci 142cb93a386Sopenharmony_ci mask += 8; 143cb93a386Sopenharmony_ci device += 8; 144cb93a386Sopenharmony_ci w -= 8; 145cb93a386Sopenharmony_ci } 146cb93a386Sopenharmony_ci while (w-- > 0) { 147cb93a386Sopenharmony_ci unsigned aa = *mask++; 148cb93a386Sopenharmony_ci *device = (aa << SK_A32_SHIFT) 149cb93a386Sopenharmony_ci + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); 150cb93a386Sopenharmony_ci device += 1; 151cb93a386Sopenharmony_ci } 152cb93a386Sopenharmony_ci device = (uint32_t*)((char*)device + dstRB); 153cb93a386Sopenharmony_ci mask += maskRB; 154cb93a386Sopenharmony_ci } while (--height != 0); 155cb93a386Sopenharmony_ci } 156cb93a386Sopenharmony_ci 157cb93a386Sopenharmony_ci#else 158cb93a386Sopenharmony_ci static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, 159cb93a386Sopenharmony_ci const SkAlpha* mask, size_t maskRB, 160cb93a386Sopenharmony_ci SkColor color, int w, int h) { 161cb93a386Sopenharmony_ci auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); 162cb93a386Sopenharmony_ci auto fn = [&](const Sk4px& d, const Sk4px& aa) { 163cb93a386Sopenharmony_ci // = (s + d(1-sa))aa + d(1-aa) 164cb93a386Sopenharmony_ci // = s*aa + d(1-sa*aa) 165cb93a386Sopenharmony_ci auto left = s.approxMulDiv255(aa), 166cb93a386Sopenharmony_ci right = d.approxMulDiv255(left.alphas().inv()); 167cb93a386Sopenharmony_ci return left + right; // This does not overflow (exhaustively checked). 168cb93a386Sopenharmony_ci }; 169cb93a386Sopenharmony_ci while (h --> 0) { 170cb93a386Sopenharmony_ci Sk4px::MapDstAlpha(w, dst, mask, fn); 171cb93a386Sopenharmony_ci dst += dstRB / sizeof(*dst); 172cb93a386Sopenharmony_ci mask += maskRB / sizeof(*mask); 173cb93a386Sopenharmony_ci } 174cb93a386Sopenharmony_ci } 175cb93a386Sopenharmony_ci 176cb93a386Sopenharmony_ci // As above, but made slightly simpler by requiring that color is opaque. 177cb93a386Sopenharmony_ci static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, 178cb93a386Sopenharmony_ci const SkAlpha* mask, size_t maskRB, 179cb93a386Sopenharmony_ci SkColor color, int w, int h) { 180cb93a386Sopenharmony_ci SkASSERT(SkColorGetA(color) == 0xFF); 181cb93a386Sopenharmony_ci auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); 182cb93a386Sopenharmony_ci auto fn = [&](const Sk4px& d, const Sk4px& aa) { 183cb93a386Sopenharmony_ci // = (s + d(1-sa))aa + d(1-aa) 184cb93a386Sopenharmony_ci // = s*aa + d(1-sa*aa) 185cb93a386Sopenharmony_ci // ~~~> 186cb93a386Sopenharmony_ci // = s*aa + d(1-aa) 187cb93a386Sopenharmony_ci return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv()); 188cb93a386Sopenharmony_ci }; 189cb93a386Sopenharmony_ci while (h --> 0) { 190cb93a386Sopenharmony_ci Sk4px::MapDstAlpha(w, dst, mask, fn); 191cb93a386Sopenharmony_ci dst += dstRB / sizeof(*dst); 192cb93a386Sopenharmony_ci mask += maskRB / sizeof(*mask); 193cb93a386Sopenharmony_ci } 194cb93a386Sopenharmony_ci } 195cb93a386Sopenharmony_ci 196cb93a386Sopenharmony_ci // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. 197cb93a386Sopenharmony_ci static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, 198cb93a386Sopenharmony_ci const SkAlpha* mask, size_t maskRB, 199cb93a386Sopenharmony_ci int w, int h) { 200cb93a386Sopenharmony_ci auto fn = [](const Sk4px& d, const Sk4px& aa) { 201cb93a386Sopenharmony_ci // = (s + d(1-sa))aa + d(1-aa) 202cb93a386Sopenharmony_ci // = s*aa + d(1-sa*aa) 203cb93a386Sopenharmony_ci // ~~~> 204cb93a386Sopenharmony_ci // a = 1*aa + d(1-1*aa) = aa + d(1-aa) 205cb93a386Sopenharmony_ci // c = 0*aa + d(1-1*aa) = d(1-aa) 206cb93a386Sopenharmony_ci return Sk4px(Sk16b(aa) & Sk16b(0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255)) 207cb93a386Sopenharmony_ci + d.approxMulDiv255(aa.inv()); 208cb93a386Sopenharmony_ci }; 209cb93a386Sopenharmony_ci while (h --> 0) { 210cb93a386Sopenharmony_ci Sk4px::MapDstAlpha(w, dst, mask, fn); 211cb93a386Sopenharmony_ci dst += dstRB / sizeof(*dst); 212cb93a386Sopenharmony_ci mask += maskRB / sizeof(*mask); 213cb93a386Sopenharmony_ci } 214cb93a386Sopenharmony_ci } 215cb93a386Sopenharmony_ci#endif 216cb93a386Sopenharmony_ci 217cb93a386Sopenharmony_ci/*not static*/ inline void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, 218cb93a386Sopenharmony_ci const SkAlpha* mask, size_t maskRB, 219cb93a386Sopenharmony_ci SkColor color, int w, int h) { 220cb93a386Sopenharmony_ci if (color == SK_ColorBLACK) { 221cb93a386Sopenharmony_ci blit_mask_d32_a8_black(dst, dstRB, mask, maskRB, w, h); 222cb93a386Sopenharmony_ci } else if (SkColorGetA(color) == 0xFF) { 223cb93a386Sopenharmony_ci blit_mask_d32_a8_opaque(dst, dstRB, mask, maskRB, color, w, h); 224cb93a386Sopenharmony_ci } else { 225cb93a386Sopenharmony_ci blit_mask_d32_a8_general(dst, dstRB, mask, maskRB, color, w, h); 226cb93a386Sopenharmony_ci } 227cb93a386Sopenharmony_ci} 228cb93a386Sopenharmony_ci 229cb93a386Sopenharmony_ci} // namespace SK_OPTS_NS 230cb93a386Sopenharmony_ci 231cb93a386Sopenharmony_ci#endif//SkBlitMask_opts_DEFINED 232