Lines Matching refs:dst

1566     Color Builder::blend(SkBlendMode mode, Color src, Color dst) {
1575 fn(src.r, dst.r),
1576 fn(src.g, dst.g),
1577 fn(src.b, dst.b),
1578 fn(src.a, dst.a),
1584 fn(src.r, dst.r),
1585 fn(src.g, dst.g),
1586 fn(src.b, dst.b),
1587 mad(dst.a, 1-src.a, src.a), // srcover for alpha
1593 R + mma(src.r, 1-dst.a, dst.r, 1-src.a),
1594 G + mma(src.g, 1-dst.a, dst.g, 1-src.a),
1595 B + mma(src.b, 1-dst.a, dst.b, 1-src.a),
1596 mad(dst.a, 1-src.a, src.a), // srcover for alpha
1608 case SkBlendMode::kDst: return dst;
1610 case SkBlendMode::kDstOver: std::swap(src, dst); [[fallthrough]];
1616 case SkBlendMode::kDstIn: std::swap(src, dst); [[fallthrough]];
1619 return s * dst.a;
1622 case SkBlendMode::kDstOut: std::swap(src, dst); [[fallthrough]];
1626 return s * (1-dst.a);
1629 case SkBlendMode::kDstATop: std::swap(src, dst); [[fallthrough]];
1632 return mma(s, dst.a, d, 1-src.a);
1637 return mma(s, 1-dst.a, d, 1-src.a);
1659 return s + (d - max(s * dst.a,
1665 return s + (d - min(s * dst.a,
1671 return s + (d - two(min(s * dst.a,
1682 auto mn = min(dst.a,
1683 src.a * (dst.a - d) / s),
1684 burn = src.a * (dst.a - mn) + mma(s, 1-dst.a, d, 1-src.a);
1685 return select(d == dst.a , s * (1-dst.a) + d,
1692 auto dodge = src.a * min(dst.a,
1694 + mma(s, 1-dst.a, d, 1-src.a);
1695 return select(d == 0.0f , s * (1-dst.a) + d,
1702 return mma(s, 1-dst.a, d, 1-src.a) +
1705 src.a * dst.a - two((dst.a - d) * (src.a - s)));
1710 return mma(s, 1-dst.a, d, 1-src.a) +
1711 select(two(d) <= dst.a,
1713 src.a * dst.a - two((dst.a - d) * (src.a - s)));
1718 return mma(s, 1-dst.a, d, 1-src.a) + s * d;
1723 auto m = select(dst.a > 0.0f, d / dst.a
1730 // 2. light src, dark dst?
1731 // 3. light src, light dst?
1740 liteSrc = dst.a * (s2 - src.a) * select(4*d <= dst.a, darkDst
1743 return s * (1-dst.a) + d * (1-src.a) + select(s2 <= src.a, darkSrc
1752 set_sat (&R, &G, &B, src.a * saturation(dst.r, dst.g, dst.b));
1753 set_lum (&R, &G, &B, src.a * luminance (dst.r, dst.g, dst.b));
1754 clip_color(&R, &G, &B, src.a * dst.a);
1760 skvm::F32 R = dst.r * src.a,
1761 G = dst.g * src.a,
1762 B = dst.b * src.a;
1764 set_sat (&R, &G, &B, dst.a * saturation(src.r, src.g, src.b));
1765 set_lum (&R, &G, &B, src.a * luminance (dst.r, dst.g, dst.b));
1766 clip_color(&R, &G, &B, src.a * dst.a);
1772 skvm::F32 R = src.r * dst.a,
1773 G = src.g * dst.a,
1774 B = src.b * dst.a;
1776 set_lum (&R, &G, &B, src.a * luminance(dst.r, dst.g, dst.b));
1777 clip_color(&R, &G, &B, src.a * dst.a);
1783 skvm::F32 R = dst.r * src.a,
1784 G = dst.g * src.a,
1785 B = dst.b * src.a;
1787 set_lum (&R, &G, &B, dst.a * luminance(src.r, src.g, src.b));
1788 clip_color(&R, &G, &B, dst.a * src.a);
1857 bool R, // Same as REX R. Pass high bit of dst register, dst>>3.
1942 void Assembler::op(int opcode, Operand dst, GP64 x) {
1943 if (dst.kind == Operand::REG) {
1944 this->byte(rex(W1,x>>3,0,dst.reg>>3));
1946 this->byte(mod_rm(Mod::Direct, x, dst.reg&7));
1948 SkASSERT(dst.kind == Operand::MEM);
1949 const Mem& m = dst.mem;
1963 void Assembler::op(int opcode, int opcode_ext, Operand dst, int imm) {
1972 this->op(opcode, dst, (GP64)opcode_ext);
1976 void Assembler::add(Operand dst, int imm) { this->op(0x01,0b000, dst,imm); }
1977 void Assembler::sub(Operand dst, int imm) { this->op(0x01,0b101, dst,imm); }
1978 void Assembler::cmp(Operand dst, int imm) { this->op(0x01,0b111, dst,imm); }
1982 void Assembler::mov(Operand dst, int imm) {
1983 this->op(0xC7,dst,(GP64)0b000);
1986 void Assembler::movb(Operand dst, int imm) {
1987 this->op(0xC6,dst,(GP64)0b000);
1991 void Assembler::add (Operand dst, GP64 x) { this->op(0x01, dst,x); }
1992 void Assembler::sub (Operand dst, GP64 x) { this->op(0x29, dst,x); }
1993 void Assembler::cmp (Operand dst, GP64 x) { this->op(0x39, dst,x); }
1994 void Assembler::mov (Operand dst, GP64 x) { this->op(0x89, dst,x); }
1995 void Assembler::movb(Operand dst, GP64 x) { this->op(0x88, dst,x); }
1997 void Assembler::add (GP64 dst, Operand x) { this->op(0x03, x,dst); }
1998 void Assembler::sub (GP64 dst, Operand x) { this->op(0x2B, x,dst); }
1999 void Assembler::cmp (GP64 dst, Operand x) { this->op(0x3B, x,dst); }
2000 void Assembler::mov (GP64 dst, Operand x) { this->op(0x8B, x,dst); }
2001 void Assembler::movb(GP64 dst, Operand x) { this->op(0x8A, x,dst); }
2003 void Assembler::movzbq(GP64 dst, Operand x) { this->op(0xB60F, x,dst); }
2004 void Assembler::movzwq(GP64 dst, Operand x) { this->op(0xB70F, x,dst); }
2006 void Assembler::vpaddd (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xfe, dst,x,y); }
2007 void Assembler::vpsubd (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xfa, dst,x,y); }
2008 void Assembler::vpmulld(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x40, dst,x,y); }
2010 void Assembler::vpaddw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xfd, dst,x,y); }
2011 void Assembler::vpsubw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xf9, dst,x,y); }
2012 void Assembler::vpmullw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xd5, dst,x,y); }
2013 void Assembler::vpavgw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xe3, dst,x,y); }
2014 void Assembler::vpmulhrsw(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x0b, dst,x,y); }
2015 void Assembler::vpminsw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xea, dst,x,y); }
2016 void Assembler::vpmaxsw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xee, dst,x,y); }
2017 void Assembler::vpminuw (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x3a, dst,x,y); }
2018 void Assembler::vpmaxuw (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x3e, dst,x,y); }
2020 void Assembler::vpabsw(Ymm dst, Operand x) { this->op(0x66,0x380f,0x1d, dst,x); }
2023 void Assembler::vpand (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0xdb, dst,x,y); }
2024 void Assembler::vpor (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0xeb, dst,x,y); }
2025 void Assembler::vpxor (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0xef, dst,x,y); }
2026 void Assembler::vpandn(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0xdf, dst,x,y); }
2028 void Assembler::vaddps(Ymm dst, Ymm x, Operand y) { this->op(0,0x0f,0x58, dst,x,y); }
2029 void Assembler::vsubps(Ymm dst, Ymm x, Operand y) { this->op(0,0x0f,0x5c, dst,x,y); }
2030 void Assembler::vmulps(Ymm dst, Ymm x, Operand y) { this->op(0,0x0f,0x59, dst,x,y); }
2031 void Assembler::vdivps(Ymm dst, Ymm x, Operand y) { this->op(0,0x0f,0x5e, dst,x,y); }
2032 void Assembler::vminps(Ymm dst, Ymm x, Operand y) { this->op(0,0x0f,0x5d, dst,x,y); }
2033 void Assembler::vmaxps(Ymm dst, Ymm x, Operand y) { this->op(0,0x0f,0x5f, dst,x,y); }
2035 void Assembler::vfmadd132ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x98, dst,x,y); }
2036 void Assembler::vfmadd213ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0xa8, dst,x,y); }
2037 void Assembler::vfmadd231ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0xb8, dst,x,y); }
2039 void Assembler::vfmsub132ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x9a, dst,x,y); }
2040 void Assembler::vfmsub213ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0xaa, dst,x,y); }
2041 void Assembler::vfmsub231ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0xba, dst,x,y); }
2043 void Assembler::vfnmadd132ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x9c, dst,x,y); }
2044 void Assembler::vfnmadd213ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0xac, dst,x,y); }
2045 void Assembler::vfnmadd231ps(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0xbc, dst,x,y); }
2047 void Assembler::vpackusdw(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x2b, dst,x,y); }
2048 void Assembler::vpackuswb(Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0x67, dst,x,y); }
2050 void Assembler::vpunpckldq(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x62, dst,x,y); }
2051 void Assembler::vpunpckhdq(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x6a, dst,x,y); }
2053 void Assembler::vpcmpeqd(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x76, dst,x,y); }
2054 void Assembler::vpcmpeqw(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x75, dst,x,y); }
2055 void Assembler::vpcmpgtd(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x66, dst,x,y); }
2056 void Assembler::vpcmpgtw(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x65, dst,x,y); }
2072 void Assembler::vcmpps(Ymm dst, Ymm x, Operand y, int imm) {
2073 this->op(0,0x0f,0xc2, dst,x,y);
2077 void Assembler::vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z) {
2078 this->op(0x66,0x3a0f,0x4c, dst,x,y);
2082 // Shift instructions encode their opcode extension as "dst", dst as x, and x as y.
2083 void Assembler::vpslld(Ymm dst, Ymm x, int imm) {
2084 this->op(0x66,0x0f,0x72,(Ymm)6, dst,x);
2087 void Assembler::vpsrld(Ymm dst, Ymm x, int imm) {
2088 this->op(0x66,0x0f,0x72,(Ymm)2, dst,x);
2091 void Assembler::vpsrad(Ymm dst, Ymm x, int imm) {
2092 this->op(0x66,0x0f,0x72,(Ymm)4, dst,x);
2095 void Assembler::vpsllw(Ymm dst, Ymm x, int imm) {
2096 this->op(0x66,0x0f,0x71,(Ymm)6, dst,x);
2099 void Assembler::vpsrlw(Ymm dst, Ymm x, int imm) {
2100 this->op(0x66,0x0f,0x71,(Ymm)2, dst,x);
2103 void Assembler::vpsraw(Ymm dst, Ymm x, int imm) {
2104 this->op(0x66,0x0f,0x71,(Ymm)4, dst,x);
2108 void Assembler::vpermq(Ymm dst, Operand x, int imm) {
2110 this->op(0x66,0x3a0f,0x00, dst,x,W1);
2114 void Assembler::vperm2f128(Ymm dst, Ymm x, Operand y, int imm) {
2115 this->op(0x66,0x3a0f,0x06, dst,x,y);
2119 void Assembler::vpermps(Ymm dst, Ymm ix, Operand src) {
2120 this->op(0x66,0x380f,0x16, dst,ix,src);
2123 void Assembler::vroundps(Ymm dst, Operand x, Rounding imm) {
2124 this->op(0x66,0x3a0f,0x08, dst,x);
2128 void Assembler::vmovdqa(Ymm dst, Operand src) { this->op(0x66,0x0f,0x6f, dst,src); }
2129 void Assembler::vmovups(Ymm dst, Operand src) { this->op( 0,0x0f,0x10, dst,src); }
2130 void Assembler::vmovups(Xmm dst, Operand src) { this->op( 0,0x0f,0x10, dst,src); }
2131 void Assembler::vmovups(Operand dst, Ymm src) { this->op( 0,0x0f,0x11, src,dst); }
2132 void Assembler::vmovups(Operand dst, Xmm src) { this->op( 0,0x0f,0x11, src,dst); }
2134 void Assembler::vcvtdq2ps (Ymm dst, Operand x) { this->op( 0,0x0f,0x5b, dst,x); }
2135 void Assembler::vcvttps2dq(Ymm dst, Operand x) { this->op(0xf3,0x0f,0x5b, dst,x); }
2136 void Assembler::vcvtps2dq (Ymm dst, Operand x) { this->op(0x66,0x0f,0x5b, dst,x); }
2137 void Assembler::vsqrtps (Ymm dst, Operand x) { this->op( 0,0x0f,0x51, dst,x); }
2139 void Assembler::vcvtps2ph(Operand dst, Ymm x, Rounding imm) {
2140 this->op(0x66,0x3a0f,0x1d, x,dst);
2141 this->imm_byte_after_operand(dst, imm);
2143 void Assembler::vcvtph2ps(Ymm dst, Operand x) {
2144 this->op(0x66,0x380f,0x13, dst,x);
2167 void Assembler::op(int prefix, int map, int opcode, int dst, int x, Operand y, W w, L l) {
2170 VEX v = vex(w, dst>>3, 0, y.reg>>3,
2174 this->byte(mod_rm(Mod::Direct, dst&7, y.reg&7));
2185 VEX v = vex(w, dst>>3, m.index>>3, m.base>>3,
2189 this->byte(mod_rm(mod(m.disp), dst&7, (need_SIB ? rsp : m.base)&7));
2200 VEX v = vex(w, dst>>3, 0, rip>>3,
2204 this->byte(mod_rm(Mod::Indirect, dst&7, rip&7));
2210 void Assembler::vpshufb(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x00, dst,x,y); }
2214 void Assembler::vbroadcastss(Ymm dst, Operand y) { this->op(0x66,0x380f,0x18, dst,y); }
2236 void Assembler::vpmovzxwd(Ymm dst, Operand src) { this->op(0x66,0x380f,0x33, dst,src); }
2237 void Assembler::vpmovzxbd(Ymm dst, Operand src) { this->op(0x66,0x380f,0x31, dst,src); }
2239 void Assembler::vmovq(Operand dst, Xmm src) { this->op(0x66,0x0f,0xd6, src,dst); }
2241 void Assembler::vmovd(Operand dst, Xmm src) { this->op(0x66,0x0f,0x7e, src,dst); }
2242 void Assembler::vmovd(Xmm dst, Operand src) { this->op(0x66,0x0f,0x6e, dst,src); }
2244 void Assembler::vpinsrd(Xmm dst, Xmm src, Operand y, int imm) {
2245 this->op(0x66,0x3a0f,0x22, dst,src,y);
2248 void Assembler::vpinsrw(Xmm dst, Xmm src, Operand y, int imm) {
2249 this->op(0x66,0x0f,0xc4, dst,src,y);
2252 void Assembler::vpinsrb(Xmm dst, Xmm src, Operand y, int imm) {
2253 this->op(0x66,0x3a0f,0x20, dst,src,y);
2257 void Assembler::vextracti128(Operand dst, Ymm src, int imm) {
2258 this->op(0x66,0x3a0f,0x39, src,dst);
2259 SkASSERT(dst.kind != Operand::LABEL);
2262 void Assembler::vpextrd(Operand dst, Xmm src, int imm) {
2263 this->op(0x66,0x3a0f,0x16, src,dst);
2264 SkASSERT(dst.kind != Operand::LABEL);
2267 void Assembler::vpextrw(Operand dst, Xmm src, int imm) {
2268 this->op(0x66,0x3a0f,0x15, src,dst);
2269 SkASSERT(dst.kind != Operand::LABEL);
2272 void Assembler::vpextrb(Operand dst, Xmm src, int imm) {
2273 this->op(0x66,0x3a0f,0x14, src,dst);
2274 SkASSERT(dst.kind != Operand::LABEL);
2278 void Assembler::vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask) {
2280 SkASSERT(dst != ix);
2281 SkASSERT(dst != mask);
2287 VEX v = vex(0, dst>>3, ix>>3, base>>3,
2291 this->byte(mod_rm(Mod::Indirect, dst&7, rsp/*use SIB*/));
2426 void Assembler::ldrd(X dst, X src, int imm12) {
2427 this->op(0b11'111'0'01'01'000000000000, src, dst, (imm12 & 12_mask) << 10);
2429 void Assembler::ldrs(X dst, X src, int imm12) {
2430 this->op(0b10'111'0'01'01'000000000000, src, dst, (imm12 & 12_mask) << 10);
2432 void Assembler::ldrh(X dst, X src, int imm12) {
2433 this->op(0b01'111'0'01'01'000000000000, src, dst, (imm12 & 12_mask) << 10);
2435 void Assembler::ldrb(X dst, X src, int imm12) {
2436 this->op(0b00'111'0'01'01'000000000000, src, dst, (imm12 & 12_mask) << 10);
2439 void Assembler::ldrq(V dst, X src, int imm12) {
2440 this->op(0b00'111'1'01'11'000000000000, src, dst, (imm12 & 12_mask) << 10);
2442 void Assembler::ldrd(V dst, X src, int imm12) {
2443 this->op(0b11'111'1'01'01'000000000000, src, dst, (imm12 & 12_mask) << 10);
2445 void Assembler::ldrs(V dst, X src, int imm12) {
2446 this->op(0b10'111'1'01'01'000000000000, src, dst, (imm12 & 12_mask) << 10);
2448 void Assembler::ldrh(V dst, X src, int imm12) {
2449 this->op(0b01'111'1'01'01'000000000000, src, dst, (imm12 & 12_mask) << 10);
2451 void Assembler::ldrb(V dst, X src, int imm12) {
2452 this->op(0b00'111'1'01'01'000000000000, src, dst, (imm12 & 12_mask) << 10);
2455 void Assembler::strs(X src, X dst, int imm12) {
2456 this->op(0b10'111'0'01'00'000000000000, dst, src, (imm12 & 12_mask) << 10);
2459 void Assembler::strq(V src, X dst, int imm12) {
2460 this->op(0b00'111'1'01'10'000000000000, dst, src, (imm12 & 12_mask) << 10);
2462 void Assembler::strd(V src, X dst, int imm12) {
2463 this->op(0b11'111'1'01'00'000000000000, dst, src, (imm12 & 12_mask) << 10);
2465 void Assembler::strs(V src, X dst, int imm12) {
2466 this->op(0b10'111'1'01'00'000000000000, dst, src, (imm12 & 12_mask) << 10);
2468 void Assembler::strh(V src, X dst, int imm12) {
2469 this->op(0b01'111'1'01'00'000000000000, dst, src, (imm12 & 12_mask) << 10);
2471 void Assembler::strb(V src, X dst, int imm12) {
2472 this->op(0b00'111'1'01'00'000000000000, dst, src, (imm12 & 12_mask) << 10);
2475 void Assembler::movs(X dst, V src, int lane) {
2477 this->op(0b0'0'0'01110000'00000'0'01'1'1'1, src, dst, (imm5 & 5_mask) << 16);
2479 void Assembler::inss(V dst, X src, int lane) {
2481 this->op(0b0'1'0'01110000'00000'0'0011'1, src, dst, (imm5 & 5_mask) << 16);
2485 void Assembler::ldrq(V dst, Label* l) {
2487 this->op(0b10'011'1'00'00000000000000, (V)0, dst, (imm19 & 19_mask) << 5);
2490 void Assembler::dup4s(V dst, X src) {
2491 this->op(0b0'1'0'01110000'00100'0'0001'1, src, dst);
2494 void Assembler::ld1r4s(V dst, X src) {
2495 this->op(0b0'1'0011010'1'0'00000'110'0'10, src, dst);
2497 void Assembler::ld1r8h(V dst, X src) {
2498 this->op(0b0'1'0011010'1'0'00000'110'0'01, src, dst);
2500 void Assembler::ld1r16b(V dst, X src) {
2501 this->op(0b0'1'0011010'1'0'00000'110'0'00, src, dst);
2504 void Assembler::ld24s(V dst, X src) { this->op(0b0'1'0011000'1'000000'1000'10, src, dst); }
2505 void Assembler::ld44s(V dst, X src) { this->op(0b0'1'0011000'1'000000'0000'10, src, dst); }
2506 void Assembler::st24s(V src, X dst) { this->op(0b0'1'0011000'0'000000'1000'10, dst, src); }
2507 void Assembler::st44s(V src, X dst) { this->op(0b0'1'0011000'0'000000'0000'10, dst, src); }
2509 void Assembler::ld24s(V dst, X src, int lane) {
2513 this->op(0b0'0'0011010'1'1'00000'100'0'00, src, dst, (Q<<30)|(S<<12));
2515 void Assembler::ld44s(V dst, X src, int lane) {
2518 this->op(0b0'0'0011010'1'1'00000'101'0'00, src, dst, (Q<<30)|(S<<12));
2676 auto S = [&](llvm::Type* dst, llvm::Value* v) { return b->CreateSExt(v, dst); };
3019 fImpl->llvm_compiling = std::async(std::launch::async, [dst = &fImpl->jit_entry,
3022 // std::atomic<void*>* dst;
3025 dst->store( (void*)ee->getFunctionAddress(name.c_str()) );
3491 // Which register holds dst,x,y,z,w for this instruction? NA if none does yet.
3551 // Alias dst() to r(v) if dies_here(v).
3563 // but with a hint, try to alias dst() to r(v) if dies_here(v).
3564 auto dst = [&](Val hint1 = NA, Val hint2 = NA) -> Reg {
3575 SkASSERT(dst() == tmp);
3626 a->vpackusdw(dst(x), r(x), r(x));
3627 a->vpermq (dst(), dst(), 0xd8);
3628 a->vpackuswb(dst(), dst(), dst());
3629 a->vmovq (A::Mem{arg[immA]}, (A::Xmm)dst());
3636 a->vpackusdw(dst(x), r(x), r(x));
3637 a->vpermq (dst(), dst(), 0xd8);
3638 a->vmovups (A::Mem{arg[immA]}, (A::Xmm)dst());
3656 a->vperm2f128(dst(), L,H, 0x20); // = {a,i,b,j|c,k,d,l}
3657 a->vmovups(A::Mem{arg[immA], 0}, dst());
3658 a->vperm2f128(dst(), L,H, 0x31); // = {e,m,f,n|g,o,h,p}
3659 a->vmovups(A::Mem{arg[immA],32}, dst());
3688 a->vextracti128(dst(), r(x), 1);
3689 a->vmovd (A::Mem{arg[immA], 4*16 + 0}, (A::Xmm)dst() );
3690 a->vpextrd(A::Mem{arg[immA], 5*16 + 0}, (A::Xmm)dst(), 1);
3691 a->vpextrd(A::Mem{arg[immA], 6*16 + 0}, (A::Xmm)dst(), 2);
3692 a->vpextrd(A::Mem{arg[immA], 7*16 + 0}, (A::Xmm)dst(), 3);
3694 a->vextracti128(dst(), r(y), 1);
3695 a->vmovd (A::Mem{arg[immA], 4*16 + 4}, (A::Xmm)dst() );
3696 a->vpextrd(A::Mem{arg[immA], 5*16 + 4}, (A::Xmm)dst(), 1);
3697 a->vpextrd(A::Mem{arg[immA], 6*16 + 4}, (A::Xmm)dst(), 2);
3698 a->vpextrd(A::Mem{arg[immA], 7*16 + 4}, (A::Xmm)dst(), 3);
3700 a->vextracti128(dst(), r(z), 1);
3701 a->vmovd (A::Mem{arg[immA], 4*16 + 8}, (A::Xmm)dst() );
3702 a->vpextrd(A::Mem{arg[immA], 5*16 + 8}, (A::Xmm)dst(), 1);
3703 a->vpextrd(A::Mem{arg[immA], 6*16 + 8}, (A::Xmm)dst(), 2);
3704 a->vpextrd(A::Mem{arg[immA], 7*16 + 8}, (A::Xmm)dst(), 3);
3706 a->vextracti128(dst(), r(w), 1);
3707 a->vmovd (A::Mem{arg[immA], 4*16 + 12}, (A::Xmm)dst() );
3708 a->vpextrd(A::Mem{arg[immA], 5*16 + 12}, (A::Xmm)dst(), 1);
3709 a->vpextrd(A::Mem{arg[immA], 6*16 + 12}, (A::Xmm)dst(), 2);
3710 a->vpextrd(A::Mem{arg[immA], 7*16 + 12}, (A::Xmm)dst(), 3);
3714 a->vpxor (dst(), dst(), dst());
3715 a->vpinsrb((A::Xmm)dst(), (A::Xmm)dst(), A::Mem{arg[immA]}, 0);
3717 a->vpmovzxbd(dst(), A::Mem{arg[immA]});
3721 a->vpxor (dst(), dst(), dst());
3722 a->vpinsrw((A::Xmm)dst(), (A::Xmm)dst(), A::Mem{arg[immA]}, 0);
3724 a->vpmovzxwd(dst(), A::Mem{arg[immA]});
3727 case Op::load32: if (scalar) { a->vmovd ((A::Xmm)dst(), A::Mem{arg[immA]}); }
3728 else { a->vmovups( dst(), A::Mem{arg[immA]}); }
3732 a->vmovd((A::Xmm)dst(), A::Mem{arg[immA], 4*immB});
3736 a->vpermps(dst(), tmp, A::Mem{arg[immA], 0});
3739 a->vperm2f128(dst(), dst(),tmp, immB ? 0x31 : 0x20);
3744 a->vmovd((A::Xmm)dst(), A::Mem{arg[immA], 4*immB});
3754 // Load 4 high values into xmm dst(),
3755 A::Xmm d = (A::Xmm)dst();
3761 // Merge the two, ymm dst() = {xmm tmp|xmm dst()}
3762 a->vperm2f128(dst(), tmp,dst(), 0x20);
3780 a->vpinsrb((A::Xmm)dst(), (A::Xmm)dst(), A::Mem{GP0,0,GP1,A::ONE}, i);
3782 a->vpmovzxbd(dst(), dst());
3798 a->vpinsrw((A::Xmm)dst(), (A::Xmm)dst(), A::Mem{GP0,0,GP1,A::TWO}, i);
3800 a->vpmovzxwd(dst(), dst());
3812 // dst = *(base + 4*index)
3813 a->vmovd((A::Xmm)dst(x), A::Mem{GP0, 0, GP1, A::FOUR});
3820 a->vgatherdps(dst(), A::FOUR, r(x), GP0, mask);
3825 case Op::uniform32: a->vbroadcastss(dst(), A::Mem{arg[immA], immB});
3829 a->vbroadcastss(dst(), A::Mem{GP0, immC});
3832 case Op::index: a->vmovd((A::Xmm)dst(), N);
3833 a->vbroadcastss(dst(), dst());
3834 a->vpsubd(dst(), dst(), &iota);
3839 if (in_reg(x)) { a->vaddps(dst(x), r(x), any(y)); }
3840 else { a->vaddps(dst(y), r(y), any(x)); }
3844 if (in_reg(x)) { a->vmulps(dst(x), r(x), any(y)); }
3845 else { a->vmulps(dst(y), r(y), any(x)); }
3848 case Op::sub_f32: a->vsubps(dst(x), r(x), any(y)); break;
3849 case Op::div_f32: a->vdivps(dst(x), r(x), any(y)); break;
3850 case Op::min_f32: a->vminps(dst(y), r(y), any(x)); break; // Order matters,
3851 case Op::max_f32: a->vmaxps(dst(y), r(y), any(x)); break; // see test SkVM_min_max.
3854 if (try_alias(x)) { a->vfmadd132ps(dst(x), r(z), any(y)); } else
3855 if (try_alias(y)) { a->vfmadd213ps(dst(y), r(x), any(z)); } else
3856 if (try_alias(z)) { a->vfmadd231ps(dst(z), r(x), any(y)); } else
3857 { a->vmovups (dst(), any(x));
3858 a->vfmadd132ps(dst(), r(z), any(y)); }
3862 if (try_alias(x)) { a->vfmsub132ps(dst(x), r(z), any(y)); } else
3863 if (try_alias(y)) { a->vfmsub213ps(dst(y), r(x), any(z)); } else
3864 if (try_alias(z)) { a->vfmsub231ps(dst(z), r(x), any(y)); } else
3865 { a->vmovups (dst(), any(x));
3866 a->vfmsub132ps(dst(), r(z), any(y)); }
3870 if (try_alias(x)) { a->vfnmadd132ps(dst(x), r(z), any(y)); } else
3871 if (try_alias(y)) { a->vfnmadd213ps(dst(y), r(x), any(z)); } else
3872 if (try_alias(z)) { a->vfnmadd231ps(dst(z), r(x), any(y)); } else
3873 { a->vmovups (dst(), any(x));
3874 a->vfnmadd132ps(dst(), r(z), any(y)); }
3877 // In situations like this we want to try aliasing dst(x) when x is
3881 if (in_reg(x)) { a->vsqrtps(dst(x), r(x)); }
3882 else { a->vsqrtps(dst(), any(x)); }
3886 if (in_reg(x)) { a->vpaddd(dst(x), r(x), any(y)); }
3887 else { a->vpaddd(dst(y), r(y), any(x)); }
3891 if (in_reg(x)) { a->vpmulld(dst(x), r(x), any(y)); }
3892 else { a->vpmulld(dst(y), r(y), any(x)); }
3895 case Op::sub_i32: a->vpsubd(dst(x), r(x), any(y)); break;
3898 if (in_reg(x)) { a->vpand(dst(x), r(x), any(y)); }
3899 else { a->vpand(dst(y), r(y), any(x)); }
3902 if (in_reg(x)) { a->vpor(dst(x), r(x), any(y)); }
3903 else { a->vpor(dst(y), r(y), any(x)); }
3906 if (in_reg(x)) { a->vpxor(dst(x), r(x), any(y)); }
3907 else { a->vpxor(dst(y), r(y), any(x)); }
3910 case Op::bit_clear: a->vpandn(dst(y), r(y), any(x)); break; // Notice, y then x.
3913 if (try_alias(z)) { a->vpblendvb(dst(z), r(z), any(y), r(x)); }
3914 else { a->vpblendvb(dst(x), r(z), any(y), r(x)); }
3917 case Op::shl_i32: a->vpslld(dst(x), r(x), immA); break;
3918 case Op::shr_i32: a->vpsrld(dst(x), r(x), immA); break;
3919 case Op::sra_i32: a->vpsrad(dst(x), r(x), immA); break;
3922 if (in_reg(x)) { a->vpcmpeqd(dst(x), r(x), any(y)); }
3923 else { a->vpcmpeqd(dst(y), r(y), any(x)); }
3926 case Op::gt_i32: a->vpcmpgtd(dst(), r(x), any(y)); break;
3929 if (in_reg(x)) { a->vcmpeqps(dst(x), r(x), any(y)); }
3930 else { a->vcmpeqps(dst(y), r(y), any(x)); }
3933 if (in_reg(x)) { a->vcmpneqps(dst(x), r(x), any(y)); }
3934 else { a->vcmpneqps(dst(y), r(y), any(x)); }
3937 case Op:: gt_f32: a->vcmpltps (dst(y), r(y), any(x)); break;
3938 case Op::gte_f32: a->vcmpleps (dst(y), r(y), any(x)); break;
3941 if (in_reg(x)) { a->vroundps(dst(x), r(x), Assembler::CEIL); }
3942 else { a->vroundps(dst(), any(x), Assembler::CEIL); }
3946 if (in_reg(x)) { a->vroundps(dst(x), r(x), Assembler::FLOOR); }
3947 else { a->vroundps(dst(), any(x), Assembler::FLOOR); }
3951 if (in_reg(x)) { a->vcvtdq2ps(dst(x), r(x)); }
3952 else { a->vcvtdq2ps(dst(), any(x)); }
3956 if (in_reg(x)) { a->vcvttps2dq(dst(x), r(x)); }
3957 else { a->vcvttps2dq(dst(), any(x)); }
3961 if (in_reg(x)) { a->vcvtps2dq(dst(x), r(x)); }
3962 else { a->vcvtps2dq(dst(), any(x)); }
3966 a->vcvtps2ph(dst(x), r(x), A::CURRENT); // f32 ymm -> f16 xmm
3967 a->vpmovzxwd(dst(), dst()); // f16 xmm -> f16 ymm
3971 a->vpackusdw(dst(x), r(x), r(x)); // f16 ymm -> f16 xmm
3972 a->vpermq (dst(), dst(), 0xd8); // swap middle two 64-bit lanes
3973 a->vcvtph2ps(dst(), dst()); // f16 xmm -> f32 ymm
3978 a->uminv4s(dst(), r(x)); // uminv acts like an all() across the vector.
3979 a->movs(GP0, dst(), 0);
3995 a->dup4s(dst(), N);
3996 a->sub4s(dst(), dst(), tmp);
4000 case Op::store8: a->xtns2h(dst(x), r(x));
4001 a->xtnh2b(dst(), dst());
4002 if (scalar) { a->strb (dst(), arg[immA]); }
4003 else { a->strs (dst(), arg[immA]); }
4006 case Op::store16: a->xtns2h(dst(x), r(x));
4007 if (scalar) { a->strh (dst(), arg[immA]); }
4008 else { a->strd (dst(), arg[immA]); }
4057 case Op::load8: if (scalar) { a->ldrb(dst(), arg[immA]); }
4058 else { a->ldrs(dst(), arg[immA]); }
4059 a->uxtlb2h(dst(), dst());
4060 a->uxtlh2s(dst(), dst());
4063 case Op::load16: if (scalar) { a->ldrh(dst(), arg[immA]); }
4064 else { a->ldrd(dst(), arg[immA]); }
4065 a->uxtlh2s(dst(), dst());
4068 case Op::load32: if (scalar) { a->ldrs(dst(), arg[immA]); }
4069 else { a->ldrq(dst(), arg[immA]); }
4073 a->ldrs(dst(), arg[immA], immB);
4086 a->ldrs(dst(), arg[immA], immB);
4107 a->ld1r4s(dst(), GP0);
4113 a->ld1r4s(dst(), GP0);
4125 a->inss(dst(x), GP1, i); // Insert it into dst() lane i.
4137 a->inss(dst(x), GP1, i);
4149 a->inss(dst(x), GP1, i);
4153 case Op::add_f32: a->fadd4s(dst(x,y), r(x), r(y)); break;
4154 case Op::sub_f32: a->fsub4s(dst(x,y), r(x), r(y)); break;
4155 case Op::mul_f32: a->fmul4s(dst(x,y), r(x), r(y)); break;
4156 case Op::div_f32: a->fdiv4s(dst(x,y), r(x), r(y)); break;
4158 case Op::sqrt_f32: a->fsqrt4s(dst(x), r(x)); break;
4162 else { a->orr16b(dst(), r(z), r(z));
4163 a->fmla4s(dst(), r(x), r(y)); }
4168 else { a->orr16b(dst(), r(z), r(z));
4169 a->fmls4s(dst(), r(x), r(y)); }
4174 else { a->orr16b(dst(), r(z), r(z));
4175 a->fmls4s(dst(), r(x), r(y)); }
4176 a->fneg4s(dst(), dst());
4179 case Op:: gt_f32: a->fcmgt4s (dst(x,y), r(x), r(y)); break;
4180 case Op::gte_f32: a->fcmge4s (dst(x,y), r(x), r(y)); break;
4181 case Op:: eq_f32: a->fcmeq4s (dst(x,y), r(x), r(y)); break;
4182 case Op::neq_f32: a->fcmeq4s (dst(x,y), r(x), r(y));
4183 a->not16b (dst(), dst()); break;
4186 case Op::add_i32: a->add4s(dst(x,y), r(x), r(y)); break;
4187 case Op::sub_i32: a->sub4s(dst(x,y), r(x), r(y)); break;
4188 case Op::mul_i32: a->mul4s(dst(x,y), r(x), r(y)); break;
4190 case Op::bit_and : a->and16b(dst(x,y), r(x), r(y)); break;
4191 case Op::bit_or : a->orr16b(dst(x,y), r(x), r(y)); break;
4192 case Op::bit_xor : a->eor16b(dst(x,y), r(x), r(y)); break;
4193 case Op::bit_clear: a->bic16b(dst(x,y), r(x), r(y)); break;
4197 else { a->orr16b(dst(), r(x), r(x));
4198 a->bsl16b(dst(), r(y), r(z)); }
4204 a->fcmgt4s(dst(), r(x), r(y));
4205 a->bsl16b (dst(), r(y), r(x));
4209 a->fcmgt4s(dst(), r(y), r(x));
4210 a->bsl16b (dst(), r(y), r(x));
4213 case Op::shl_i32: a-> shl4s(dst(x), r(x), immA); break;
4214 case Op::shr_i32: a->ushr4s(dst(x), r(x), immA); break;
4215 case Op::sra_i32: a->sshr4s(dst(x), r(x), immA); break;
4217 case Op::eq_i32: a->cmeq4s(dst(x,y), r(x), r(y)); break;
4218 case Op::gt_i32: a->cmgt4s(dst(x,y), r(x), r(y)); break;
4220 case Op::to_f32: a->scvtf4s (dst(x), r(x)); break;
4221 case Op::trunc: a->fcvtzs4s(dst(x), r(x)); break;
4222 case Op::round: a->fcvtns4s(dst(x), r(x)); break;
4223 case Op::ceil: a->frintp4s(dst(x), r(x)); break;
4224 case Op::floor: a->frintm4s(dst(x), r(x)); break;
4227 a->fcvtn (dst(x), r(x)); // 4x f32 -> 4x f16 in bottom four lanes
4228 a->uxtlh2s(dst(), dst()); // expand to 4x f16 in even 16-bit lanes
4232 a->xtns2h(dst(x), r(x)); // pack even 16-bit lanes into bottom four lanes
4233 a->fcvtl (dst(), dst()); // 4x f16 -> 4x f32