1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci * Copyright 2019 Google LLC 3cb93a386Sopenharmony_ci * 4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci * found in the LICENSE file. 6cb93a386Sopenharmony_ci */ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#include "include/core/SkColorPriv.h" 9cb93a386Sopenharmony_ci#include "include/private/SkColorData.h" 10cb93a386Sopenharmony_ci#include "src/core/SkCpu.h" 11cb93a386Sopenharmony_ci#include "src/core/SkMSAN.h" 12cb93a386Sopenharmony_ci#include "src/core/SkVM.h" 13cb93a386Sopenharmony_ci#include "tests/Test.h" 14cb93a386Sopenharmony_ci 15cb93a386Sopenharmony_citemplate <typename Fn> 16cb93a386Sopenharmony_cistatic void test_jit_and_interpreter(const skvm::Builder& b, Fn&& test) { 17cb93a386Sopenharmony_ci skvm::Program p = b.done(); 18cb93a386Sopenharmony_ci test(p); 19cb93a386Sopenharmony_ci if (p.hasJIT()) { 20cb93a386Sopenharmony_ci test(b.done(/*debug_name=*/nullptr, /*allow_jit=*/false)); 21cb93a386Sopenharmony_ci } 22cb93a386Sopenharmony_ci} 23cb93a386Sopenharmony_ci 24cb93a386Sopenharmony_ciDEF_TEST(SkVM_eliminate_dead_code, r) { 25cb93a386Sopenharmony_ci skvm::Builder b; 26cb93a386Sopenharmony_ci { 27cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<int>(); 28cb93a386Sopenharmony_ci skvm::I32 l = b.load32(arg); 29cb93a386Sopenharmony_ci skvm::I32 a = b.add(l, l); 30cb93a386Sopenharmony_ci b.add(a, b.splat(7)); 31cb93a386Sopenharmony_ci } 32cb93a386Sopenharmony_ci 33cb93a386Sopenharmony_ci std::vector<skvm::Instruction> program = b.program(); 34cb93a386Sopenharmony_ci REPORTER_ASSERT(r, program.size() == 4); 35cb93a386Sopenharmony_ci 36cb93a386Sopenharmony_ci program = skvm::eliminate_dead_code(program); 37cb93a386Sopenharmony_ci REPORTER_ASSERT(r, program.size() == 0); 38cb93a386Sopenharmony_ci} 39cb93a386Sopenharmony_ci 40cb93a386Sopenharmony_ciDEF_TEST(SkVM_Pointless, r) { 41cb93a386Sopenharmony_ci // Let's build a program with no memory arguments. 42cb93a386Sopenharmony_ci // It should all be pegged as dead code, but we should be able to "run" it. 43cb93a386Sopenharmony_ci skvm::Builder b; 44cb93a386Sopenharmony_ci { 45cb93a386Sopenharmony_ci b.add(b.splat(5.0f), 46cb93a386Sopenharmony_ci b.splat(4.0f)); 47cb93a386Sopenharmony_ci } 48cb93a386Sopenharmony_ci 49cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 50cb93a386Sopenharmony_ci for (int N = 0; N < 64; N++) { 51cb93a386Sopenharmony_ci program.eval(N); 52cb93a386Sopenharmony_ci } 53cb93a386Sopenharmony_ci }); 54cb93a386Sopenharmony_ci 55cb93a386Sopenharmony_ci for (const skvm::OptimizedInstruction& inst : b.optimize()) { 56cb93a386Sopenharmony_ci REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true); 57cb93a386Sopenharmony_ci } 58cb93a386Sopenharmony_ci} 59cb93a386Sopenharmony_ci 60cb93a386Sopenharmony_ciDEF_TEST(SkVM_memset, r) { 61cb93a386Sopenharmony_ci skvm::Builder b; 62cb93a386Sopenharmony_ci b.store32(b.varying<int>(), b.splat(42)); 63cb93a386Sopenharmony_ci 64cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& p) { 65cb93a386Sopenharmony_ci int buf[18]; 66cb93a386Sopenharmony_ci buf[17] = 47; 67cb93a386Sopenharmony_ci 68cb93a386Sopenharmony_ci p.eval(17, buf); 69cb93a386Sopenharmony_ci for (int i = 0; i < 17; i++) { 70cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 42); 71cb93a386Sopenharmony_ci } 72cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[17] == 47); 73cb93a386Sopenharmony_ci }); 74cb93a386Sopenharmony_ci} 75cb93a386Sopenharmony_ci 76cb93a386Sopenharmony_ciDEF_TEST(SkVM_memcpy, r) { 77cb93a386Sopenharmony_ci skvm::Builder b; 78cb93a386Sopenharmony_ci { 79cb93a386Sopenharmony_ci auto src = b.varying<int>(), 80cb93a386Sopenharmony_ci dst = b.varying<int>(); 81cb93a386Sopenharmony_ci b.store32(dst, b.load32(src)); 82cb93a386Sopenharmony_ci } 83cb93a386Sopenharmony_ci 84cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& p) { 85cb93a386Sopenharmony_ci int src[] = {1,2,3,4,5,6,7,8,9}, 86cb93a386Sopenharmony_ci dst[] = {0,0,0,0,0,0,0,0,0}; 87cb93a386Sopenharmony_ci 88cb93a386Sopenharmony_ci p.eval(SK_ARRAY_COUNT(src)-1, src, dst); 89cb93a386Sopenharmony_ci for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) { 90cb93a386Sopenharmony_ci REPORTER_ASSERT(r, dst[i] == src[i]); 91cb93a386Sopenharmony_ci } 92cb93a386Sopenharmony_ci size_t i = SK_ARRAY_COUNT(src)-1; 93cb93a386Sopenharmony_ci REPORTER_ASSERT(r, dst[i] == 0); 94cb93a386Sopenharmony_ci }); 95cb93a386Sopenharmony_ci} 96cb93a386Sopenharmony_ci 97cb93a386Sopenharmony_ciDEF_TEST(SkVM_allow_jit, r) { 98cb93a386Sopenharmony_ci skvm::Builder b; 99cb93a386Sopenharmony_ci { 100cb93a386Sopenharmony_ci auto src = b.varying<int>(), 101cb93a386Sopenharmony_ci dst = b.varying<int>(); 102cb93a386Sopenharmony_ci b.store32(dst, b.load32(src)); 103cb93a386Sopenharmony_ci } 104cb93a386Sopenharmony_ci 105cb93a386Sopenharmony_ci if (b.done("test-allow_jit", /*allow_jit=*/true).hasJIT()) { 106cb93a386Sopenharmony_ci REPORTER_ASSERT(r, !b.done("", false).hasJIT()); 107cb93a386Sopenharmony_ci } 108cb93a386Sopenharmony_ci} 109cb93a386Sopenharmony_ci 110cb93a386Sopenharmony_ciDEF_TEST(SkVM_LoopCounts, r) { 111cb93a386Sopenharmony_ci // Make sure we cover all the exact N we want. 112cb93a386Sopenharmony_ci 113cb93a386Sopenharmony_ci // buf[i] += 1 114cb93a386Sopenharmony_ci skvm::Builder b; 115cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<int>(); 116cb93a386Sopenharmony_ci b.store32(arg, 117cb93a386Sopenharmony_ci b.add(b.splat(1), 118cb93a386Sopenharmony_ci b.load32(arg))); 119cb93a386Sopenharmony_ci 120cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 121cb93a386Sopenharmony_ci int buf[64]; 122cb93a386Sopenharmony_ci for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) { 123cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) { 124cb93a386Sopenharmony_ci buf[i] = i; 125cb93a386Sopenharmony_ci } 126cb93a386Sopenharmony_ci program.eval(N, buf); 127cb93a386Sopenharmony_ci 128cb93a386Sopenharmony_ci for (int i = 0; i < N; i++) { 129cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == i+1); 130cb93a386Sopenharmony_ci } 131cb93a386Sopenharmony_ci for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) { 132cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == i); 133cb93a386Sopenharmony_ci } 134cb93a386Sopenharmony_ci } 135cb93a386Sopenharmony_ci }); 136cb93a386Sopenharmony_ci} 137cb93a386Sopenharmony_ci 138cb93a386Sopenharmony_ciDEF_TEST(SkVM_gather32, r) { 139cb93a386Sopenharmony_ci skvm::Builder b; 140cb93a386Sopenharmony_ci { 141cb93a386Sopenharmony_ci skvm::UPtr uniforms = b.uniform(); 142cb93a386Sopenharmony_ci skvm::Ptr buf = b.varying<int>(); 143cb93a386Sopenharmony_ci skvm::I32 x = b.load32(buf); 144cb93a386Sopenharmony_ci b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7)))); 145cb93a386Sopenharmony_ci } 146cb93a386Sopenharmony_ci 147cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 148cb93a386Sopenharmony_ci const int img[] = {12,34,56,78, 90,98,76,54}; 149cb93a386Sopenharmony_ci 150cb93a386Sopenharmony_ci int buf[20]; 151cb93a386Sopenharmony_ci for (int i = 0; i < 20; i++) { 152cb93a386Sopenharmony_ci buf[i] = i; 153cb93a386Sopenharmony_ci } 154cb93a386Sopenharmony_ci 155cb93a386Sopenharmony_ci struct Uniforms { 156cb93a386Sopenharmony_ci const int* img; 157cb93a386Sopenharmony_ci } uniforms{img}; 158cb93a386Sopenharmony_ci 159cb93a386Sopenharmony_ci program.eval(20, &uniforms, buf); 160cb93a386Sopenharmony_ci int i = 0; 161cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 12); i++; 162cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 34); i++; 163cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 56); i++; 164cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 78); i++; 165cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 90); i++; 166cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 98); i++; 167cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 76); i++; 168cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 54); i++; 169cb93a386Sopenharmony_ci 170cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 12); i++; 171cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 34); i++; 172cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 56); i++; 173cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 78); i++; 174cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 90); i++; 175cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 98); i++; 176cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 76); i++; 177cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 54); i++; 178cb93a386Sopenharmony_ci 179cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 12); i++; 180cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 34); i++; 181cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 56); i++; 182cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == 78); i++; 183cb93a386Sopenharmony_ci }); 184cb93a386Sopenharmony_ci} 185cb93a386Sopenharmony_ci 186cb93a386Sopenharmony_ciDEF_TEST(SkVM_gathers, r) { 187cb93a386Sopenharmony_ci skvm::Builder b; 188cb93a386Sopenharmony_ci { 189cb93a386Sopenharmony_ci skvm::UPtr uniforms = b.uniform(); 190cb93a386Sopenharmony_ci skvm::Ptr buf32 = b.varying<int>(), 191cb93a386Sopenharmony_ci buf16 = b.varying<uint16_t>(), 192cb93a386Sopenharmony_ci buf8 = b.varying<uint8_t>(); 193cb93a386Sopenharmony_ci 194cb93a386Sopenharmony_ci skvm::I32 x = b.load32(buf32); 195cb93a386Sopenharmony_ci 196cb93a386Sopenharmony_ci b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7)))); 197cb93a386Sopenharmony_ci b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15)))); 198cb93a386Sopenharmony_ci b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31)))); 199cb93a386Sopenharmony_ci } 200cb93a386Sopenharmony_ci 201cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 202cb93a386Sopenharmony_ci const int img[] = {12,34,56,78, 90,98,76,54}; 203cb93a386Sopenharmony_ci 204cb93a386Sopenharmony_ci constexpr int N = 20; 205cb93a386Sopenharmony_ci int buf32[N]; 206cb93a386Sopenharmony_ci uint16_t buf16[N]; 207cb93a386Sopenharmony_ci uint8_t buf8 [N]; 208cb93a386Sopenharmony_ci 209cb93a386Sopenharmony_ci for (int i = 0; i < 20; i++) { 210cb93a386Sopenharmony_ci buf32[i] = i; 211cb93a386Sopenharmony_ci } 212cb93a386Sopenharmony_ci 213cb93a386Sopenharmony_ci struct Uniforms { 214cb93a386Sopenharmony_ci const int* img; 215cb93a386Sopenharmony_ci } uniforms{img}; 216cb93a386Sopenharmony_ci 217cb93a386Sopenharmony_ci program.eval(N, &uniforms, buf32, buf16, buf8); 218cb93a386Sopenharmony_ci int i = 0; 219cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++; 220cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++; 221cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++; 222cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++; 223cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++; 224cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++; 225cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++; 226cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++; 227cb93a386Sopenharmony_ci 228cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++; 229cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++; 230cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++; 231cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++; 232cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++; 233cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++; 234cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++; 235cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++; 236cb93a386Sopenharmony_ci 237cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++; 238cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++; 239cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++; 240cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++; 241cb93a386Sopenharmony_ci }); 242cb93a386Sopenharmony_ci} 243cb93a386Sopenharmony_ci 244cb93a386Sopenharmony_ciDEF_TEST(SkVM_gathers2, r) { 245cb93a386Sopenharmony_ci skvm::Builder b; 246cb93a386Sopenharmony_ci { 247cb93a386Sopenharmony_ci skvm::UPtr uniforms = b.uniform(); 248cb93a386Sopenharmony_ci skvm::Ptr buf32 = b.varying<int>(), 249cb93a386Sopenharmony_ci buf16 = b.varying<uint16_t>(), 250cb93a386Sopenharmony_ci buf8 = b.varying<uint8_t>(); 251cb93a386Sopenharmony_ci 252cb93a386Sopenharmony_ci skvm::I32 x = b.load32(buf32); 253cb93a386Sopenharmony_ci 254cb93a386Sopenharmony_ci b.store32(buf32, b.gather32(uniforms,0, x)); 255cb93a386Sopenharmony_ci b.store16(buf16, b.gather16(uniforms,0, x)); 256cb93a386Sopenharmony_ci b.store8 (buf8 , b.gather8 (uniforms,0, x)); 257cb93a386Sopenharmony_ci } 258cb93a386Sopenharmony_ci 259cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 260cb93a386Sopenharmony_ci uint8_t img[256]; 261cb93a386Sopenharmony_ci for (int i = 0; i < 256; i++) { 262cb93a386Sopenharmony_ci img[i] = i; 263cb93a386Sopenharmony_ci } 264cb93a386Sopenharmony_ci 265cb93a386Sopenharmony_ci int buf32[64]; 266cb93a386Sopenharmony_ci uint16_t buf16[64]; 267cb93a386Sopenharmony_ci uint8_t buf8 [64]; 268cb93a386Sopenharmony_ci 269cb93a386Sopenharmony_ci for (int i = 0; i < 64; i++) { 270cb93a386Sopenharmony_ci buf32[i] = (i*47)&63; 271cb93a386Sopenharmony_ci buf16[i] = 0; 272cb93a386Sopenharmony_ci buf8 [i] = 0; 273cb93a386Sopenharmony_ci } 274cb93a386Sopenharmony_ci 275cb93a386Sopenharmony_ci struct Uniforms { 276cb93a386Sopenharmony_ci const uint8_t* img; 277cb93a386Sopenharmony_ci } uniforms{img}; 278cb93a386Sopenharmony_ci 279cb93a386Sopenharmony_ci program.eval(64, &uniforms, buf32, buf16, buf8); 280cb93a386Sopenharmony_ci 281cb93a386Sopenharmony_ci for (int i = 0; i < 64; i++) { 282cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,... 283cb93a386Sopenharmony_ci } 284cb93a386Sopenharmony_ci 285cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf16[ 0] == 0x0100); 286cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf16[63] == 0x2322); 287cb93a386Sopenharmony_ci 288cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[ 0] == 0x03020100); 289cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf32[63] == 0x47464544); 290cb93a386Sopenharmony_ci }); 291cb93a386Sopenharmony_ci} 292cb93a386Sopenharmony_ci 293cb93a386Sopenharmony_ciDEF_TEST(SkVM_bitops, r) { 294cb93a386Sopenharmony_ci skvm::Builder b; 295cb93a386Sopenharmony_ci { 296cb93a386Sopenharmony_ci skvm::Ptr ptr = b.varying<int>(); 297cb93a386Sopenharmony_ci 298cb93a386Sopenharmony_ci skvm::I32 x = b.load32(ptr); 299cb93a386Sopenharmony_ci 300cb93a386Sopenharmony_ci x = b.bit_and (x, b.splat(0xf1)); // 0x40 301cb93a386Sopenharmony_ci x = b.bit_or (x, b.splat(0x80)); // 0xc0 302cb93a386Sopenharmony_ci x = b.bit_xor (x, b.splat(0xfe)); // 0x3e 303cb93a386Sopenharmony_ci x = b.bit_clear(x, b.splat(0x30)); // 0x0e 304cb93a386Sopenharmony_ci 305cb93a386Sopenharmony_ci x = b.shl(x, 28); // 0xe000'0000 306cb93a386Sopenharmony_ci x = b.sra(x, 28); // 0xffff'fffe 307cb93a386Sopenharmony_ci x = b.shr(x, 1); // 0x7fff'ffff 308cb93a386Sopenharmony_ci 309cb93a386Sopenharmony_ci b.store32(ptr, x); 310cb93a386Sopenharmony_ci } 311cb93a386Sopenharmony_ci 312cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 313cb93a386Sopenharmony_ci int x = 0x42; 314cb93a386Sopenharmony_ci program.eval(1, &x); 315cb93a386Sopenharmony_ci REPORTER_ASSERT(r, x == 0x7fff'ffff); 316cb93a386Sopenharmony_ci }); 317cb93a386Sopenharmony_ci} 318cb93a386Sopenharmony_ci 319cb93a386Sopenharmony_ciDEF_TEST(SkVM_select_is_NaN, r) { 320cb93a386Sopenharmony_ci skvm::Builder b; 321cb93a386Sopenharmony_ci { 322cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<float>(), 323cb93a386Sopenharmony_ci dst = b.varying<float>(); 324cb93a386Sopenharmony_ci 325cb93a386Sopenharmony_ci skvm::F32 x = b.loadF(src); 326cb93a386Sopenharmony_ci x = select(is_NaN(x), b.splat(0.0f) 327cb93a386Sopenharmony_ci , x); 328cb93a386Sopenharmony_ci b.storeF(dst, x); 329cb93a386Sopenharmony_ci } 330cb93a386Sopenharmony_ci 331cb93a386Sopenharmony_ci std::vector<skvm::OptimizedInstruction> program = b.optimize(); 332cb93a386Sopenharmony_ci REPORTER_ASSERT(r, program.size() == 4); 333cb93a386Sopenharmony_ci REPORTER_ASSERT(r, program[0].op == skvm::Op::load32); 334cb93a386Sopenharmony_ci REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32); 335cb93a386Sopenharmony_ci REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear); 336cb93a386Sopenharmony_ci REPORTER_ASSERT(r, program[3].op == skvm::Op::store32); 337cb93a386Sopenharmony_ci 338cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 339cb93a386Sopenharmony_ci // ±NaN, ±0, ±1, ±inf 340cb93a386Sopenharmony_ci uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000, 341cb93a386Sopenharmony_ci 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000}; 342cb93a386Sopenharmony_ci uint32_t dst[SK_ARRAY_COUNT(src)]; 343cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(src), src, dst); 344cb93a386Sopenharmony_ci 345cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) { 346cb93a386Sopenharmony_ci REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i])); 347cb93a386Sopenharmony_ci } 348cb93a386Sopenharmony_ci }); 349cb93a386Sopenharmony_ci} 350cb93a386Sopenharmony_ci 351cb93a386Sopenharmony_ciDEF_TEST(SkVM_f32, r) { 352cb93a386Sopenharmony_ci skvm::Builder b; 353cb93a386Sopenharmony_ci { 354cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<float>(); 355cb93a386Sopenharmony_ci 356cb93a386Sopenharmony_ci skvm::F32 x = b.loadF(arg), 357cb93a386Sopenharmony_ci y = b.add(x,x), // y = 2x 358cb93a386Sopenharmony_ci z = b.sub(y,x), // z = 2x-x = x 359cb93a386Sopenharmony_ci w = b.div(z,x); // w = x/x = 1 360cb93a386Sopenharmony_ci b.storeF(arg, w); 361cb93a386Sopenharmony_ci } 362cb93a386Sopenharmony_ci 363cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 364cb93a386Sopenharmony_ci float buf[] = { 1,2,3,4,5,6,7,8,9 }; 365cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(buf), buf); 366cb93a386Sopenharmony_ci for (float v : buf) { 367cb93a386Sopenharmony_ci REPORTER_ASSERT(r, v == 1.0f); 368cb93a386Sopenharmony_ci } 369cb93a386Sopenharmony_ci }); 370cb93a386Sopenharmony_ci} 371cb93a386Sopenharmony_ci 372cb93a386Sopenharmony_ciDEF_TEST(SkVM_cmp_i32, r) { 373cb93a386Sopenharmony_ci skvm::Builder b; 374cb93a386Sopenharmony_ci { 375cb93a386Sopenharmony_ci skvm::I32 x = b.load32(b.varying<int>()); 376cb93a386Sopenharmony_ci 377cb93a386Sopenharmony_ci auto to_bit = [&](int shift, skvm::I32 mask) { 378cb93a386Sopenharmony_ci return b.shl(b.bit_and(mask, b.splat(0x1)), shift); 379cb93a386Sopenharmony_ci }; 380cb93a386Sopenharmony_ci 381cb93a386Sopenharmony_ci skvm::I32 m = b.splat(0); 382cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0)))); 383cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1)))); 384cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2)))); 385cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3)))); 386cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4)))); 387cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5)))); 388cb93a386Sopenharmony_ci 389cb93a386Sopenharmony_ci b.store32(b.varying<int>(), m); 390cb93a386Sopenharmony_ci } 391cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 392cb93a386Sopenharmony_ci int in[] = { 0,1,2,3,4,5,6,7,8,9 }; 393cb93a386Sopenharmony_ci int out[SK_ARRAY_COUNT(in)]; 394cb93a386Sopenharmony_ci 395cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(in), in, out); 396cb93a386Sopenharmony_ci 397cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[0] == 0b001111); 398cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[1] == 0b001100); 399cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[2] == 0b001010); 400cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[3] == 0b001010); 401cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[4] == 0b000010); 402cb93a386Sopenharmony_ci for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) { 403cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[i] == 0b110010); 404cb93a386Sopenharmony_ci } 405cb93a386Sopenharmony_ci }); 406cb93a386Sopenharmony_ci} 407cb93a386Sopenharmony_ci 408cb93a386Sopenharmony_ciDEF_TEST(SkVM_cmp_f32, r) { 409cb93a386Sopenharmony_ci skvm::Builder b; 410cb93a386Sopenharmony_ci { 411cb93a386Sopenharmony_ci skvm::F32 x = b.loadF(b.varying<float>()); 412cb93a386Sopenharmony_ci 413cb93a386Sopenharmony_ci auto to_bit = [&](int shift, skvm::I32 mask) { 414cb93a386Sopenharmony_ci return b.shl(b.bit_and(mask, b.splat(0x1)), shift); 415cb93a386Sopenharmony_ci }; 416cb93a386Sopenharmony_ci 417cb93a386Sopenharmony_ci skvm::I32 m = b.splat(0); 418cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f)))); 419cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f)))); 420cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f)))); 421cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f)))); 422cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f)))); 423cb93a386Sopenharmony_ci m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f)))); 424cb93a386Sopenharmony_ci 425cb93a386Sopenharmony_ci b.store32(b.varying<int>(), m); 426cb93a386Sopenharmony_ci } 427cb93a386Sopenharmony_ci 428cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 429cb93a386Sopenharmony_ci float in[] = { 0,1,2,3,4,5,6,7,8,9 }; 430cb93a386Sopenharmony_ci int out[SK_ARRAY_COUNT(in)]; 431cb93a386Sopenharmony_ci 432cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(in), in, out); 433cb93a386Sopenharmony_ci 434cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[0] == 0b001111); 435cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[1] == 0b001100); 436cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[2] == 0b001010); 437cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[3] == 0b001010); 438cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[4] == 0b000010); 439cb93a386Sopenharmony_ci for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) { 440cb93a386Sopenharmony_ci REPORTER_ASSERT(r, out[i] == 0b110010); 441cb93a386Sopenharmony_ci } 442cb93a386Sopenharmony_ci }); 443cb93a386Sopenharmony_ci} 444cb93a386Sopenharmony_ci 445cb93a386Sopenharmony_ciDEF_TEST(SkVM_index, r) { 446cb93a386Sopenharmony_ci skvm::Builder b; 447cb93a386Sopenharmony_ci b.store32(b.varying<int>(), b.index()); 448cb93a386Sopenharmony_ci 449cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 450cb93a386Sopenharmony_ci int buf[23]; 451cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(buf), buf); 452cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) { 453cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i); 454cb93a386Sopenharmony_ci } 455cb93a386Sopenharmony_ci }); 456cb93a386Sopenharmony_ci} 457cb93a386Sopenharmony_ci 458cb93a386Sopenharmony_ciDEF_TEST(SkVM_mad, r) { 459cb93a386Sopenharmony_ci // This program is designed to exercise the tricky corners of instruction 460cb93a386Sopenharmony_ci // and register selection for Op::mad_f32. 461cb93a386Sopenharmony_ci 462cb93a386Sopenharmony_ci skvm::Builder b; 463cb93a386Sopenharmony_ci { 464cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<int>(); 465cb93a386Sopenharmony_ci 466cb93a386Sopenharmony_ci skvm::F32 x = b.to_F32(b.load32(arg)), 467cb93a386Sopenharmony_ci y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y]. 468cb93a386Sopenharmony_ci z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok. 469cb93a386Sopenharmony_ci w = b.mad(z,z,y), // w can alias z but not y. 470cb93a386Sopenharmony_ci v = b.mad(w,y,w); // Got to stop somewhere. 471cb93a386Sopenharmony_ci b.store32(arg, b.trunc(v)); 472cb93a386Sopenharmony_ci } 473cb93a386Sopenharmony_ci 474cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 475cb93a386Sopenharmony_ci int x = 2; 476cb93a386Sopenharmony_ci program.eval(1, &x); 477cb93a386Sopenharmony_ci // x = 2 478cb93a386Sopenharmony_ci // y = 2*2 + 2 = 6 479cb93a386Sopenharmony_ci // z = 6*6 + 2 = 38 480cb93a386Sopenharmony_ci // w = 38*38 + 6 = 1450 481cb93a386Sopenharmony_ci // v = 1450*6 + 1450 = 10150 482cb93a386Sopenharmony_ci REPORTER_ASSERT(r, x == 10150); 483cb93a386Sopenharmony_ci }); 484cb93a386Sopenharmony_ci} 485cb93a386Sopenharmony_ci 486cb93a386Sopenharmony_ciDEF_TEST(SkVM_fms, r) { 487cb93a386Sopenharmony_ci // Create a pattern that can be peepholed into an Op::fms_f32. 488cb93a386Sopenharmony_ci skvm::Builder b; 489cb93a386Sopenharmony_ci { 490cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<int>(); 491cb93a386Sopenharmony_ci 492cb93a386Sopenharmony_ci skvm::F32 x = b.to_F32(b.load32(arg)), 493cb93a386Sopenharmony_ci v = b.sub(b.mul(x, b.splat(2.0f)), 494cb93a386Sopenharmony_ci b.splat(1.0f)); 495cb93a386Sopenharmony_ci b.store32(arg, b.trunc(v)); 496cb93a386Sopenharmony_ci } 497cb93a386Sopenharmony_ci 498cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 499cb93a386Sopenharmony_ci int buf[] = {0,1,2,3,4,5,6,7,8,9,10}; 500cb93a386Sopenharmony_ci program.eval((int)SK_ARRAY_COUNT(buf), &buf); 501cb93a386Sopenharmony_ci 502cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) { 503cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] = 2*i-1); 504cb93a386Sopenharmony_ci } 505cb93a386Sopenharmony_ci }); 506cb93a386Sopenharmony_ci} 507cb93a386Sopenharmony_ci 508cb93a386Sopenharmony_ciDEF_TEST(SkVM_fnma, r) { 509cb93a386Sopenharmony_ci // Create a pattern that can be peepholed into an Op::fnma_f32. 510cb93a386Sopenharmony_ci skvm::Builder b; 511cb93a386Sopenharmony_ci { 512cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<int>(); 513cb93a386Sopenharmony_ci 514cb93a386Sopenharmony_ci skvm::F32 x = b.to_F32(b.load32(arg)), 515cb93a386Sopenharmony_ci v = b.sub(b.splat(1.0f), 516cb93a386Sopenharmony_ci b.mul(x, b.splat(2.0f))); 517cb93a386Sopenharmony_ci b.store32(arg, b.trunc(v)); 518cb93a386Sopenharmony_ci } 519cb93a386Sopenharmony_ci 520cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 521cb93a386Sopenharmony_ci int buf[] = {0,1,2,3,4,5,6,7,8,9,10}; 522cb93a386Sopenharmony_ci program.eval((int)SK_ARRAY_COUNT(buf), &buf); 523cb93a386Sopenharmony_ci 524cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) { 525cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] = 1-2*i); 526cb93a386Sopenharmony_ci } 527cb93a386Sopenharmony_ci }); 528cb93a386Sopenharmony_ci} 529cb93a386Sopenharmony_ci 530cb93a386Sopenharmony_ciDEF_TEST(SkVM_madder, r) { 531cb93a386Sopenharmony_ci skvm::Builder b; 532cb93a386Sopenharmony_ci { 533cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<float>(); 534cb93a386Sopenharmony_ci 535cb93a386Sopenharmony_ci skvm::F32 x = b.loadF(arg), 536cb93a386Sopenharmony_ci y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y]. 537cb93a386Sopenharmony_ci z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y]. 538cb93a386Sopenharmony_ci w = b.mad(y,y,z); 539cb93a386Sopenharmony_ci b.storeF(arg, w); 540cb93a386Sopenharmony_ci } 541cb93a386Sopenharmony_ci 542cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 543cb93a386Sopenharmony_ci float x = 2.0f; 544cb93a386Sopenharmony_ci // y = 2*2 + 2 = 6 545cb93a386Sopenharmony_ci // z = 6*2 + 6 = 18 546cb93a386Sopenharmony_ci // w = 6*6 + 18 = 54 547cb93a386Sopenharmony_ci program.eval(1, &x); 548cb93a386Sopenharmony_ci REPORTER_ASSERT(r, x == 54.0f); 549cb93a386Sopenharmony_ci }); 550cb93a386Sopenharmony_ci} 551cb93a386Sopenharmony_ci 552cb93a386Sopenharmony_ciDEF_TEST(SkVM_floor, r) { 553cb93a386Sopenharmony_ci skvm::Builder b; 554cb93a386Sopenharmony_ci { 555cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<float>(); 556cb93a386Sopenharmony_ci b.storeF(arg, b.floor(b.loadF(arg))); 557cb93a386Sopenharmony_ci } 558cb93a386Sopenharmony_ci 559cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 560cb93a386Sopenharmony_ci float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f }; 561cb93a386Sopenharmony_ci float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f }; 562cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(buf), buf); 563cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) { 564cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == want[i]); 565cb93a386Sopenharmony_ci } 566cb93a386Sopenharmony_ci }); 567cb93a386Sopenharmony_ci} 568cb93a386Sopenharmony_ci 569cb93a386Sopenharmony_ciDEF_TEST(SkVM_round, r) { 570cb93a386Sopenharmony_ci skvm::Builder b; 571cb93a386Sopenharmony_ci { 572cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<float>(); 573cb93a386Sopenharmony_ci skvm::Ptr dst = b.varying<int>(); 574cb93a386Sopenharmony_ci b.store32(dst, b.round(b.loadF(src))); 575cb93a386Sopenharmony_ci } 576cb93a386Sopenharmony_ci 577cb93a386Sopenharmony_ci // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even. 578cb93a386Sopenharmony_ci // We haven't explicitly guaranteed that here... it just probably is. 579cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 580cb93a386Sopenharmony_ci float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f }; 581cb93a386Sopenharmony_ci int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 }; 582cb93a386Sopenharmony_ci int dst[SK_ARRAY_COUNT(buf)]; 583cb93a386Sopenharmony_ci 584cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(buf), buf, dst); 585cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) { 586cb93a386Sopenharmony_ci REPORTER_ASSERT(r, dst[i] == want[i]); 587cb93a386Sopenharmony_ci } 588cb93a386Sopenharmony_ci }); 589cb93a386Sopenharmony_ci} 590cb93a386Sopenharmony_ci 591cb93a386Sopenharmony_ciDEF_TEST(SkVM_min, r) { 592cb93a386Sopenharmony_ci skvm::Builder b; 593cb93a386Sopenharmony_ci { 594cb93a386Sopenharmony_ci skvm::Ptr src1 = b.varying<float>(); 595cb93a386Sopenharmony_ci skvm::Ptr src2 = b.varying<float>(); 596cb93a386Sopenharmony_ci skvm::Ptr dst = b.varying<float>(); 597cb93a386Sopenharmony_ci 598cb93a386Sopenharmony_ci b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2))); 599cb93a386Sopenharmony_ci } 600cb93a386Sopenharmony_ci 601cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 602cb93a386Sopenharmony_ci float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f}; 603cb93a386Sopenharmony_ci float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f}; 604cb93a386Sopenharmony_ci float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f}; 605cb93a386Sopenharmony_ci float d[SK_ARRAY_COUNT(s1)]; 606cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(d), s1, s2, d); 607cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) { 608cb93a386Sopenharmony_ci REPORTER_ASSERT(r, d[i] == want[i]); 609cb93a386Sopenharmony_ci } 610cb93a386Sopenharmony_ci }); 611cb93a386Sopenharmony_ci} 612cb93a386Sopenharmony_ci 613cb93a386Sopenharmony_ciDEF_TEST(SkVM_max, r) { 614cb93a386Sopenharmony_ci skvm::Builder b; 615cb93a386Sopenharmony_ci { 616cb93a386Sopenharmony_ci skvm::Ptr src1 = b.varying<float>(); 617cb93a386Sopenharmony_ci skvm::Ptr src2 = b.varying<float>(); 618cb93a386Sopenharmony_ci skvm::Ptr dst = b.varying<float>(); 619cb93a386Sopenharmony_ci 620cb93a386Sopenharmony_ci b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2))); 621cb93a386Sopenharmony_ci } 622cb93a386Sopenharmony_ci 623cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 624cb93a386Sopenharmony_ci float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f}; 625cb93a386Sopenharmony_ci float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f}; 626cb93a386Sopenharmony_ci float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f}; 627cb93a386Sopenharmony_ci float d[SK_ARRAY_COUNT(s1)]; 628cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(d), s1, s2, d); 629cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) { 630cb93a386Sopenharmony_ci REPORTER_ASSERT(r, d[i] == want[i]); 631cb93a386Sopenharmony_ci } 632cb93a386Sopenharmony_ci }); 633cb93a386Sopenharmony_ci} 634cb93a386Sopenharmony_ci 635cb93a386Sopenharmony_ciDEF_TEST(SkVM_hoist, r) { 636cb93a386Sopenharmony_ci // This program uses enough constants that it will fail to JIT if we hoist them. 637cb93a386Sopenharmony_ci // The JIT will try again without hoisting, and that'll just need 2 registers. 638cb93a386Sopenharmony_ci skvm::Builder b; 639cb93a386Sopenharmony_ci { 640cb93a386Sopenharmony_ci skvm::Ptr arg = b.varying<int>(); 641cb93a386Sopenharmony_ci skvm::I32 x = b.load32(arg); 642cb93a386Sopenharmony_ci for (int i = 0; i < 32; i++) { 643cb93a386Sopenharmony_ci x = b.add(x, b.splat(i)); 644cb93a386Sopenharmony_ci } 645cb93a386Sopenharmony_ci b.store32(arg, x); 646cb93a386Sopenharmony_ci } 647cb93a386Sopenharmony_ci 648cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 649cb93a386Sopenharmony_ci int x = 4; 650cb93a386Sopenharmony_ci program.eval(1, &x); 651cb93a386Sopenharmony_ci // x += 0 + 1 + 2 + 3 + ... + 30 + 31 652cb93a386Sopenharmony_ci // x += 496 653cb93a386Sopenharmony_ci REPORTER_ASSERT(r, x == 500); 654cb93a386Sopenharmony_ci }); 655cb93a386Sopenharmony_ci} 656cb93a386Sopenharmony_ci 657cb93a386Sopenharmony_ciDEF_TEST(SkVM_select, r) { 658cb93a386Sopenharmony_ci skvm::Builder b; 659cb93a386Sopenharmony_ci { 660cb93a386Sopenharmony_ci skvm::Ptr buf = b.varying<int>(); 661cb93a386Sopenharmony_ci 662cb93a386Sopenharmony_ci skvm::I32 x = b.load32(buf); 663cb93a386Sopenharmony_ci 664cb93a386Sopenharmony_ci x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) ); 665cb93a386Sopenharmony_ci 666cb93a386Sopenharmony_ci b.store32(buf, x); 667cb93a386Sopenharmony_ci } 668cb93a386Sopenharmony_ci 669cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 670cb93a386Sopenharmony_ci int buf[] = { 0,1,2,3,4,5,6,7,8 }; 671cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(buf), buf); 672cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) { 673cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42)); 674cb93a386Sopenharmony_ci } 675cb93a386Sopenharmony_ci }); 676cb93a386Sopenharmony_ci} 677cb93a386Sopenharmony_ci 678cb93a386Sopenharmony_ciDEF_TEST(SkVM_swap, r) { 679cb93a386Sopenharmony_ci skvm::Builder b; 680cb93a386Sopenharmony_ci { 681cb93a386Sopenharmony_ci // This program is the equivalent of 682cb93a386Sopenharmony_ci // x = *X 683cb93a386Sopenharmony_ci // y = *Y 684cb93a386Sopenharmony_ci // *X = y 685cb93a386Sopenharmony_ci // *Y = x 686cb93a386Sopenharmony_ci // One rescheduling of the program based only on data flow of Op arguments is 687cb93a386Sopenharmony_ci // x = *X 688cb93a386Sopenharmony_ci // *Y = x 689cb93a386Sopenharmony_ci // y = *Y 690cb93a386Sopenharmony_ci // *X = y 691cb93a386Sopenharmony_ci // but this reordering does not produce the same results and is invalid. 692cb93a386Sopenharmony_ci skvm::Ptr X = b.varying<int>(), 693cb93a386Sopenharmony_ci Y = b.varying<int>(); 694cb93a386Sopenharmony_ci 695cb93a386Sopenharmony_ci skvm::I32 x = b.load32(X), 696cb93a386Sopenharmony_ci y = b.load32(Y); 697cb93a386Sopenharmony_ci 698cb93a386Sopenharmony_ci b.store32(X, y); 699cb93a386Sopenharmony_ci b.store32(Y, x); 700cb93a386Sopenharmony_ci } 701cb93a386Sopenharmony_ci 702cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 703cb93a386Sopenharmony_ci int b1[] = { 0,1,2,3 }; 704cb93a386Sopenharmony_ci int b2[] = { 4,5,6,7 }; 705cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(b1), b1, b2); 706cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(b1); i++) { 707cb93a386Sopenharmony_ci REPORTER_ASSERT(r, b1[i] == 4 + i); 708cb93a386Sopenharmony_ci REPORTER_ASSERT(r, b2[i] == i); 709cb93a386Sopenharmony_ci } 710cb93a386Sopenharmony_ci }); 711cb93a386Sopenharmony_ci} 712cb93a386Sopenharmony_ci 713cb93a386Sopenharmony_ciDEF_TEST(SkVM_NewOps, r) { 714cb93a386Sopenharmony_ci // Exercise a somewhat arbitrary set of new ops. 715cb93a386Sopenharmony_ci skvm::Builder b; 716cb93a386Sopenharmony_ci { 717cb93a386Sopenharmony_ci skvm::Ptr buf = b.varying<int16_t>(); 718cb93a386Sopenharmony_ci skvm::UPtr uniforms = b.uniform(); 719cb93a386Sopenharmony_ci 720cb93a386Sopenharmony_ci skvm::I32 x = b.load16(buf); 721cb93a386Sopenharmony_ci 722cb93a386Sopenharmony_ci const size_t kPtr = sizeof(const int*); 723cb93a386Sopenharmony_ci 724cb93a386Sopenharmony_ci x = b.add(x, b.uniform32(uniforms, kPtr+0)); 725cb93a386Sopenharmony_ci x = b.mul(x, b.uniform32(uniforms, kPtr+4)); 726cb93a386Sopenharmony_ci x = b.sub(x, b.uniform32(uniforms, kPtr+8)); 727cb93a386Sopenharmony_ci 728cb93a386Sopenharmony_ci skvm::I32 limit = b.uniform32(uniforms, kPtr+12); 729cb93a386Sopenharmony_ci x = b.select(b.lt(x, b.splat(0)), b.splat(0), x); 730cb93a386Sopenharmony_ci x = b.select(b.gt(x, limit ), limit , x); 731cb93a386Sopenharmony_ci 732cb93a386Sopenharmony_ci x = b.gather8(uniforms,0, x); 733cb93a386Sopenharmony_ci 734cb93a386Sopenharmony_ci b.store16(buf, x); 735cb93a386Sopenharmony_ci } 736cb93a386Sopenharmony_ci 737cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 738cb93a386Sopenharmony_ci const int N = 31; 739cb93a386Sopenharmony_ci int16_t buf[N]; 740cb93a386Sopenharmony_ci for (int i = 0; i < N; i++) { 741cb93a386Sopenharmony_ci buf[i] = i; 742cb93a386Sopenharmony_ci } 743cb93a386Sopenharmony_ci 744cb93a386Sopenharmony_ci const int M = 16; 745cb93a386Sopenharmony_ci uint8_t img[M]; 746cb93a386Sopenharmony_ci for (int i = 0; i < M; i++) { 747cb93a386Sopenharmony_ci img[i] = i*i; 748cb93a386Sopenharmony_ci } 749cb93a386Sopenharmony_ci 750cb93a386Sopenharmony_ci struct { 751cb93a386Sopenharmony_ci const uint8_t* img; 752cb93a386Sopenharmony_ci int add = 5; 753cb93a386Sopenharmony_ci int mul = 3; 754cb93a386Sopenharmony_ci int sub = 18; 755cb93a386Sopenharmony_ci int limit = M-1; 756cb93a386Sopenharmony_ci } uniforms{img}; 757cb93a386Sopenharmony_ci 758cb93a386Sopenharmony_ci program.eval(N, buf, &uniforms); 759cb93a386Sopenharmony_ci 760cb93a386Sopenharmony_ci for (int i = 0; i < N; i++) { 761cb93a386Sopenharmony_ci // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1). 762cb93a386Sopenharmony_ci int x = 3*(i-1); 763cb93a386Sopenharmony_ci 764cb93a386Sopenharmony_ci // Then that's pinned to the limits of img. 765cb93a386Sopenharmony_ci if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly... 766cb93a386Sopenharmony_ci if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly 767cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == img[x]); 768cb93a386Sopenharmony_ci } 769cb93a386Sopenharmony_ci }); 770cb93a386Sopenharmony_ci} 771cb93a386Sopenharmony_ci 772cb93a386Sopenharmony_ciDEF_TEST(SKVM_array32, r) { 773cb93a386Sopenharmony_ci 774cb93a386Sopenharmony_ci 775cb93a386Sopenharmony_ci 776cb93a386Sopenharmony_ci skvm::Builder b; 777cb93a386Sopenharmony_ci skvm::Uniforms uniforms(b.uniform(), 0); 778cb93a386Sopenharmony_ci // Take up the first slot, so other uniforms are not at 0 offset. 779cb93a386Sopenharmony_ci uniforms.push(0); 780cb93a386Sopenharmony_ci int i[] = {3, 7}; 781cb93a386Sopenharmony_ci skvm::Uniform array = uniforms.pushArray(i); 782cb93a386Sopenharmony_ci float f[] = {5, 9}; 783cb93a386Sopenharmony_ci skvm::Uniform arrayF = uniforms.pushArrayF(f); 784cb93a386Sopenharmony_ci { 785cb93a386Sopenharmony_ci skvm::Ptr buf0 = b.varying<int32_t>(), 786cb93a386Sopenharmony_ci buf1 = b.varying<int32_t>(), 787cb93a386Sopenharmony_ci buf2 = b.varying<int32_t>(); 788cb93a386Sopenharmony_ci 789cb93a386Sopenharmony_ci skvm::I32 j = b.array32(array, 0); 790cb93a386Sopenharmony_ci b.store32(buf0, j); 791cb93a386Sopenharmony_ci skvm::I32 k = b.array32(array, 1); 792cb93a386Sopenharmony_ci b.store32(buf1, k); 793cb93a386Sopenharmony_ci 794cb93a386Sopenharmony_ci skvm::F32 x = b.arrayF(arrayF, 0); 795cb93a386Sopenharmony_ci skvm::F32 y = b.arrayF(arrayF, 1); 796cb93a386Sopenharmony_ci b.store32(buf2, b.trunc(b.add(x, y))); 797cb93a386Sopenharmony_ci } 798cb93a386Sopenharmony_ci 799cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 800cb93a386Sopenharmony_ci const int K = 10; 801cb93a386Sopenharmony_ci int32_t buf0[K], 802cb93a386Sopenharmony_ci buf1[K], 803cb93a386Sopenharmony_ci buf2[K]; 804cb93a386Sopenharmony_ci 805cb93a386Sopenharmony_ci // reset the i[0] for the two tests. 806cb93a386Sopenharmony_ci i[0] = 3; 807cb93a386Sopenharmony_ci f[1] = 9; 808cb93a386Sopenharmony_ci program.eval(K, uniforms.buf.data(), buf0, buf1, buf2); 809cb93a386Sopenharmony_ci for (auto v : buf0) { 810cb93a386Sopenharmony_ci REPORTER_ASSERT(r, v == 3); 811cb93a386Sopenharmony_ci } 812cb93a386Sopenharmony_ci for (auto v : buf1) { 813cb93a386Sopenharmony_ci REPORTER_ASSERT(r, v == 7); 814cb93a386Sopenharmony_ci } 815cb93a386Sopenharmony_ci for (auto v : buf2) { 816cb93a386Sopenharmony_ci REPORTER_ASSERT(r, v == 14); 817cb93a386Sopenharmony_ci } 818cb93a386Sopenharmony_ci i[0] = 4; 819cb93a386Sopenharmony_ci f[1] = 10; 820cb93a386Sopenharmony_ci program.eval(K, uniforms.buf.data(), buf0, buf1, buf2); 821cb93a386Sopenharmony_ci for (auto v : buf0) { 822cb93a386Sopenharmony_ci REPORTER_ASSERT(r, v == 4); 823cb93a386Sopenharmony_ci } 824cb93a386Sopenharmony_ci for (auto v : buf1) { 825cb93a386Sopenharmony_ci REPORTER_ASSERT(r, v == 7); 826cb93a386Sopenharmony_ci } 827cb93a386Sopenharmony_ci for (auto v : buf2) { 828cb93a386Sopenharmony_ci REPORTER_ASSERT(r, v == 15); 829cb93a386Sopenharmony_ci } 830cb93a386Sopenharmony_ci }); 831cb93a386Sopenharmony_ci} 832cb93a386Sopenharmony_ci 833cb93a386Sopenharmony_ciDEF_TEST(SkVM_sqrt, r) { 834cb93a386Sopenharmony_ci skvm::Builder b; 835cb93a386Sopenharmony_ci auto buf = b.varying<int>(); 836cb93a386Sopenharmony_ci b.storeF(buf, b.sqrt(b.loadF(buf))); 837cb93a386Sopenharmony_ci 838cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 839cb93a386Sopenharmony_ci constexpr int K = 17; 840cb93a386Sopenharmony_ci float buf[K]; 841cb93a386Sopenharmony_ci for (int i = 0; i < K; i++) { 842cb93a386Sopenharmony_ci buf[i] = (float)(i*i); 843cb93a386Sopenharmony_ci } 844cb93a386Sopenharmony_ci 845cb93a386Sopenharmony_ci // x^2 -> x 846cb93a386Sopenharmony_ci program.eval(K, buf); 847cb93a386Sopenharmony_ci 848cb93a386Sopenharmony_ci for (int i = 0; i < K; i++) { 849cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == (float)i); 850cb93a386Sopenharmony_ci } 851cb93a386Sopenharmony_ci }); 852cb93a386Sopenharmony_ci} 853cb93a386Sopenharmony_ci 854cb93a386Sopenharmony_ciDEF_TEST(SkVM_MSAN, r) { 855cb93a386Sopenharmony_ci // This little memset32() program should be able to JIT, but if we run that 856cb93a386Sopenharmony_ci // JIT code in an MSAN build, it won't see the writes initialize buf. So 857cb93a386Sopenharmony_ci // this tests that we're using the interpreter instead. 858cb93a386Sopenharmony_ci skvm::Builder b; 859cb93a386Sopenharmony_ci b.store32(b.varying<int>(), b.splat(42)); 860cb93a386Sopenharmony_ci 861cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 862cb93a386Sopenharmony_ci constexpr int K = 17; 863cb93a386Sopenharmony_ci int buf[K]; // Intentionally uninitialized. 864cb93a386Sopenharmony_ci program.eval(K, buf); 865cb93a386Sopenharmony_ci sk_msan_assert_initialized(buf, buf+K); 866cb93a386Sopenharmony_ci for (int x : buf) { 867cb93a386Sopenharmony_ci REPORTER_ASSERT(r, x == 42); 868cb93a386Sopenharmony_ci } 869cb93a386Sopenharmony_ci }); 870cb93a386Sopenharmony_ci} 871cb93a386Sopenharmony_ci 872cb93a386Sopenharmony_ciDEF_TEST(SkVM_assert, r) { 873cb93a386Sopenharmony_ci skvm::Builder b; 874cb93a386Sopenharmony_ci b.assert_true(b.lt(b.load32(b.varying<int>()), 875cb93a386Sopenharmony_ci b.splat(42))); 876cb93a386Sopenharmony_ci 877cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 878cb93a386Sopenharmony_ci int buf[] = { 0,1,2,3,4,5,6,7,8,9 }; 879cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(buf), buf); 880cb93a386Sopenharmony_ci }); 881cb93a386Sopenharmony_ci} 882cb93a386Sopenharmony_ci 883cb93a386Sopenharmony_ciDEF_TEST(SkVM_trace_line, r) { 884cb93a386Sopenharmony_ci skvm::Builder b; 885cb93a386Sopenharmony_ci b.trace_line(b.splat(0xFFFFFFFF), 123); 886cb93a386Sopenharmony_ci 887cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program) { 888cb93a386Sopenharmony_ci // The trace_line instruction has no behavior yet. 889cb93a386Sopenharmony_ci program.eval(1); 890cb93a386Sopenharmony_ci }); 891cb93a386Sopenharmony_ci} 892cb93a386Sopenharmony_ci 893cb93a386Sopenharmony_ciDEF_TEST(SkVM_premul, reporter) { 894cb93a386Sopenharmony_ci // Test that premul is short-circuited when alpha is known opaque. 895cb93a386Sopenharmony_ci { 896cb93a386Sopenharmony_ci skvm::Builder p; 897cb93a386Sopenharmony_ci auto rptr = p.varying<int>(), 898cb93a386Sopenharmony_ci aptr = p.varying<int>(); 899cb93a386Sopenharmony_ci 900cb93a386Sopenharmony_ci skvm::F32 r = p.loadF(rptr), 901cb93a386Sopenharmony_ci g = p.splat(0.0f), 902cb93a386Sopenharmony_ci b = p.splat(0.0f), 903cb93a386Sopenharmony_ci a = p.loadF(aptr); 904cb93a386Sopenharmony_ci 905cb93a386Sopenharmony_ci p.premul(&r, &g, &b, a); 906cb93a386Sopenharmony_ci p.storeF(rptr, r); 907cb93a386Sopenharmony_ci 908cb93a386Sopenharmony_ci // load red, load alpha, red *= alpha, store red 909cb93a386Sopenharmony_ci REPORTER_ASSERT(reporter, p.done().instructions().size() == 4); 910cb93a386Sopenharmony_ci } 911cb93a386Sopenharmony_ci 912cb93a386Sopenharmony_ci { 913cb93a386Sopenharmony_ci skvm::Builder p; 914cb93a386Sopenharmony_ci auto rptr = p.varying<int>(); 915cb93a386Sopenharmony_ci 916cb93a386Sopenharmony_ci skvm::F32 r = p.loadF(rptr), 917cb93a386Sopenharmony_ci g = p.splat(0.0f), 918cb93a386Sopenharmony_ci b = p.splat(0.0f), 919cb93a386Sopenharmony_ci a = p.splat(1.0f); 920cb93a386Sopenharmony_ci 921cb93a386Sopenharmony_ci p.premul(&r, &g, &b, a); 922cb93a386Sopenharmony_ci p.storeF(rptr, r); 923cb93a386Sopenharmony_ci 924cb93a386Sopenharmony_ci // load red, store red 925cb93a386Sopenharmony_ci REPORTER_ASSERT(reporter, p.done().instructions().size() == 2); 926cb93a386Sopenharmony_ci } 927cb93a386Sopenharmony_ci 928cb93a386Sopenharmony_ci // Same deal for unpremul. 929cb93a386Sopenharmony_ci { 930cb93a386Sopenharmony_ci skvm::Builder p; 931cb93a386Sopenharmony_ci auto rptr = p.varying<int>(), 932cb93a386Sopenharmony_ci aptr = p.varying<int>(); 933cb93a386Sopenharmony_ci 934cb93a386Sopenharmony_ci skvm::F32 r = p.loadF(rptr), 935cb93a386Sopenharmony_ci g = p.splat(0.0f), 936cb93a386Sopenharmony_ci b = p.splat(0.0f), 937cb93a386Sopenharmony_ci a = p.loadF(aptr); 938cb93a386Sopenharmony_ci 939cb93a386Sopenharmony_ci p.unpremul(&r, &g, &b, a); 940cb93a386Sopenharmony_ci p.storeF(rptr, r); 941cb93a386Sopenharmony_ci 942cb93a386Sopenharmony_ci // load red, load alpha, a bunch of unpremul instructions, store red 943cb93a386Sopenharmony_ci REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4); 944cb93a386Sopenharmony_ci } 945cb93a386Sopenharmony_ci 946cb93a386Sopenharmony_ci { 947cb93a386Sopenharmony_ci skvm::Builder p; 948cb93a386Sopenharmony_ci auto rptr = p.varying<int>(); 949cb93a386Sopenharmony_ci 950cb93a386Sopenharmony_ci skvm::F32 r = p.loadF(rptr), 951cb93a386Sopenharmony_ci g = p.splat(0.0f), 952cb93a386Sopenharmony_ci b = p.splat(0.0f), 953cb93a386Sopenharmony_ci a = p.splat(1.0f); 954cb93a386Sopenharmony_ci 955cb93a386Sopenharmony_ci p.unpremul(&r, &g, &b, a); 956cb93a386Sopenharmony_ci p.storeF(rptr, r); 957cb93a386Sopenharmony_ci 958cb93a386Sopenharmony_ci // load red, store red 959cb93a386Sopenharmony_ci REPORTER_ASSERT(reporter, p.done().instructions().size() == 2); 960cb93a386Sopenharmony_ci } 961cb93a386Sopenharmony_ci} 962cb93a386Sopenharmony_ci 963cb93a386Sopenharmony_citemplate <typename Fn> 964cb93a386Sopenharmony_cistatic void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) { 965cb93a386Sopenharmony_ci uint8_t buf[4096]; 966cb93a386Sopenharmony_ci skvm::Assembler a{buf}; 967cb93a386Sopenharmony_ci fn(a); 968cb93a386Sopenharmony_ci 969cb93a386Sopenharmony_ci REPORTER_ASSERT(r, a.size() == expected.size()); 970cb93a386Sopenharmony_ci 971cb93a386Sopenharmony_ci auto got = (const uint8_t*)buf, 972cb93a386Sopenharmony_ci want = expected.begin(); 973cb93a386Sopenharmony_ci for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) { 974cb93a386Sopenharmony_ci REPORTER_ASSERT(r, got[i] == want[i], 975cb93a386Sopenharmony_ci "byte %d was %02x, want %02x", i, got[i], want[i]); 976cb93a386Sopenharmony_ci } 977cb93a386Sopenharmony_ci} 978cb93a386Sopenharmony_ci 979cb93a386Sopenharmony_ciDEF_TEST(SkVM_Assembler, r) { 980cb93a386Sopenharmony_ci // Easiest way to generate test cases is 981cb93a386Sopenharmony_ci // 982cb93a386Sopenharmony_ci // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel 983cb93a386Sopenharmony_ci // 984cb93a386Sopenharmony_ci // The -x86-asm-syntax=intel bit is optional, controlling the 985cb93a386Sopenharmony_ci // input syntax only; the output will always be AT&T op x,y,dst style. 986cb93a386Sopenharmony_ci // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find 987cb93a386Sopenharmony_ci // that a bit easier to use here, despite maybe favoring AT&T overall. 988cb93a386Sopenharmony_ci 989cb93a386Sopenharmony_ci using A = skvm::Assembler; 990cb93a386Sopenharmony_ci // Our exit strategy from AVX code. 991cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 992cb93a386Sopenharmony_ci a.int3(); 993cb93a386Sopenharmony_ci a.vzeroupper(); 994cb93a386Sopenharmony_ci a.ret(); 995cb93a386Sopenharmony_ci },{ 996cb93a386Sopenharmony_ci 0xcc, 997cb93a386Sopenharmony_ci 0xc5, 0xf8, 0x77, 998cb93a386Sopenharmony_ci 0xc3, 999cb93a386Sopenharmony_ci }); 1000cb93a386Sopenharmony_ci 1001cb93a386Sopenharmony_ci // Align should pad with zero 1002cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1003cb93a386Sopenharmony_ci a.ret(); 1004cb93a386Sopenharmony_ci a.align(4); 1005cb93a386Sopenharmony_ci },{ 1006cb93a386Sopenharmony_ci 0xc3, 1007cb93a386Sopenharmony_ci 0x00, 0x00, 0x00, 1008cb93a386Sopenharmony_ci }); 1009cb93a386Sopenharmony_ci 1010cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1011cb93a386Sopenharmony_ci a.add(A::rax, 8); // Always good to test rax. 1012cb93a386Sopenharmony_ci a.sub(A::rax, 32); 1013cb93a386Sopenharmony_ci 1014cb93a386Sopenharmony_ci a.add(A::rdi, 12); // Last 0x48 REX 1015cb93a386Sopenharmony_ci a.sub(A::rdi, 8); 1016cb93a386Sopenharmony_ci 1017cb93a386Sopenharmony_ci a.add(A::r8 , 7); // First 0x49 REX 1018cb93a386Sopenharmony_ci a.sub(A::r8 , 4); 1019cb93a386Sopenharmony_ci 1020cb93a386Sopenharmony_ci a.add(A::rsi, 128); // Requires 4 byte immediate. 1021cb93a386Sopenharmony_ci a.sub(A::r8 , 1000000); 1022cb93a386Sopenharmony_ci 1023cb93a386Sopenharmony_ci a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi) 1024cb93a386Sopenharmony_ci a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi) 1025cb93a386Sopenharmony_ci a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp) 1026cb93a386Sopenharmony_ci a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12) 1027cb93a386Sopenharmony_ci a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4) 1028cb93a386Sopenharmony_ci a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4) 1029cb93a386Sopenharmony_ci a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4) 1030cb93a386Sopenharmony_ci a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2) 1031cb93a386Sopenharmony_ci a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax) 1032cb93a386Sopenharmony_ci a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11) 1033cb93a386Sopenharmony_ci 1034cb93a386Sopenharmony_ci a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11) 1035cb93a386Sopenharmony_ci 1036cb93a386Sopenharmony_ci a.add( A::rax , A::rcx); // addq %rcx, %rax 1037cb93a386Sopenharmony_ci a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax) 1038cb93a386Sopenharmony_ci a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax) 1039cb93a386Sopenharmony_ci a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx 1040cb93a386Sopenharmony_ci 1041cb93a386Sopenharmony_ci a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx 1042cb93a386Sopenharmony_ci },{ 1043cb93a386Sopenharmony_ci 0x48, 0x83, 0b11'000'000, 0x08, 1044cb93a386Sopenharmony_ci 0x48, 0x83, 0b11'101'000, 0x20, 1045cb93a386Sopenharmony_ci 1046cb93a386Sopenharmony_ci 0x48, 0x83, 0b11'000'111, 0x0c, 1047cb93a386Sopenharmony_ci 0x48, 0x83, 0b11'101'111, 0x08, 1048cb93a386Sopenharmony_ci 1049cb93a386Sopenharmony_ci 0x49, 0x83, 0b11'000'000, 0x07, 1050cb93a386Sopenharmony_ci 0x49, 0x83, 0b11'101'000, 0x04, 1051cb93a386Sopenharmony_ci 1052cb93a386Sopenharmony_ci 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00, 1053cb93a386Sopenharmony_ci 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00, 1054cb93a386Sopenharmony_ci 1055cb93a386Sopenharmony_ci 0x48,0x83,0x06,0x07, 1056cb93a386Sopenharmony_ci 0x48,0x83,0x46,0x0c,0x07, 1057cb93a386Sopenharmony_ci 0x48,0x83,0x44,0x24,0x0c,0x07, 1058cb93a386Sopenharmony_ci 0x49,0x83,0x44,0x24,0x0c,0x07, 1059cb93a386Sopenharmony_ci 0x48,0x83,0x44,0x84,0x0c,0x07, 1060cb93a386Sopenharmony_ci 0x49,0x83,0x44,0x84,0x0c,0x07, 1061cb93a386Sopenharmony_ci 0x4a,0x83,0x44,0xa0,0x0c,0x07, 1062cb93a386Sopenharmony_ci 0x4b,0x83,0x44,0x43,0x0c,0x07, 1063cb93a386Sopenharmony_ci 0x49,0x83,0x44,0x03,0x0c,0x07, 1064cb93a386Sopenharmony_ci 0x4a,0x83,0x44,0x18,0x0c,0x07, 1065cb93a386Sopenharmony_ci 1066cb93a386Sopenharmony_ci 0x4a,0x83,0x6c,0x18,0x0c,0x07, 1067cb93a386Sopenharmony_ci 1068cb93a386Sopenharmony_ci 0x48,0x01,0xc8, 1069cb93a386Sopenharmony_ci 0x48,0x01,0x08, 1070cb93a386Sopenharmony_ci 0x48,0x01,0x48,0x0c, 1071cb93a386Sopenharmony_ci 0x48,0x03,0x48,0x0c, 1072cb93a386Sopenharmony_ci 0x48,0x2b,0x48,0x0c, 1073cb93a386Sopenharmony_ci }); 1074cb93a386Sopenharmony_ci 1075cb93a386Sopenharmony_ci 1076cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1077cb93a386Sopenharmony_ci a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX. 1078cb93a386Sopenharmony_ci a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX. 1079cb93a386Sopenharmony_ci a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX. 1080cb93a386Sopenharmony_ci a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX. 1081cb93a386Sopenharmony_ci a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX. 1082cb93a386Sopenharmony_ci a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right. 1083cb93a386Sopenharmony_ci },{ 1084cb93a386Sopenharmony_ci /* VEX */ /*op*/ /*modRM*/ 1085cb93a386Sopenharmony_ci 0xc5, 0xf5, 0xfe, 0xc2, 1086cb93a386Sopenharmony_ci 0xc5, 0x75, 0xfe, 0xc2, 1087cb93a386Sopenharmony_ci 0xc5, 0xbd, 0xfe, 0xc2, 1088cb93a386Sopenharmony_ci 0xc4, 0xc1, 0x75, 0xfe, 0xc0, 1089cb93a386Sopenharmony_ci 0xc4, 0xe2, 0x75, 0x40, 0xc2, 1090cb93a386Sopenharmony_ci 0xc5, 0xf5, 0xfa, 0xc2, 1091cb93a386Sopenharmony_ci }); 1092cb93a386Sopenharmony_ci 1093cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1094cb93a386Sopenharmony_ci a.vpaddw (A::ymm4, A::ymm3, A::ymm2); 1095cb93a386Sopenharmony_ci a.vpavgw (A::ymm4, A::ymm3, A::ymm2); 1096cb93a386Sopenharmony_ci a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2); 1097cb93a386Sopenharmony_ci a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2); 1098cb93a386Sopenharmony_ci 1099cb93a386Sopenharmony_ci a.vpminsw (A::ymm4, A::ymm3, A::ymm2); 1100cb93a386Sopenharmony_ci a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2); 1101cb93a386Sopenharmony_ci a.vpminuw (A::ymm4, A::ymm3, A::ymm2); 1102cb93a386Sopenharmony_ci a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2); 1103cb93a386Sopenharmony_ci 1104cb93a386Sopenharmony_ci a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2); 1105cb93a386Sopenharmony_ci a.vpabsw (A::ymm4, A::ymm3); 1106cb93a386Sopenharmony_ci a.vpsllw (A::ymm4, A::ymm3, 12); 1107cb93a386Sopenharmony_ci a.vpsraw (A::ymm4, A::ymm3, 12); 1108cb93a386Sopenharmony_ci },{ 1109cb93a386Sopenharmony_ci 0xc5, 0xe5, 0xfd, 0xe2, 1110cb93a386Sopenharmony_ci 0xc5, 0xe5, 0xe3, 0xe2, 1111cb93a386Sopenharmony_ci 0xc5, 0xe5, 0x75, 0xe2, 1112cb93a386Sopenharmony_ci 0xc5, 0xe5, 0x65, 0xe2, 1113cb93a386Sopenharmony_ci 1114cb93a386Sopenharmony_ci 0xc5, 0xe5, 0xea, 0xe2, 1115cb93a386Sopenharmony_ci 0xc5, 0xe5, 0xee, 0xe2, 1116cb93a386Sopenharmony_ci 0xc4,0xe2,0x65, 0x3a, 0xe2, 1117cb93a386Sopenharmony_ci 0xc4,0xe2,0x65, 0x3e, 0xe2, 1118cb93a386Sopenharmony_ci 1119cb93a386Sopenharmony_ci 0xc4,0xe2,0x65, 0x0b, 0xe2, 1120cb93a386Sopenharmony_ci 0xc4,0xe2,0x7d, 0x1d, 0xe3, 1121cb93a386Sopenharmony_ci 0xc5,0xdd,0x71, 0xf3, 0x0c, 1122cb93a386Sopenharmony_ci 0xc5,0xdd,0x71, 0xe3, 0x0c, 1123cb93a386Sopenharmony_ci }); 1124cb93a386Sopenharmony_ci 1125cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1126cb93a386Sopenharmony_ci A::Label l; 1127cb93a386Sopenharmony_ci a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0 1128cb93a386Sopenharmony_ci a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2); 1129cb93a386Sopenharmony_ci a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2); 1130cb93a386Sopenharmony_ci a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2); 1131cb93a386Sopenharmony_ci a.vcmpltps (A::ymm0, A::ymm1, A::ymm2); 1132cb93a386Sopenharmony_ci a.vcmpleps (A::ymm0, A::ymm1, A::ymm2); 1133cb93a386Sopenharmony_ci a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2); 1134cb93a386Sopenharmony_ci a.label(&l); // 28 bytes after the vcmpeqps that uses it. 1135cb93a386Sopenharmony_ci },{ 1136cb93a386Sopenharmony_ci 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00, 1137cb93a386Sopenharmony_ci 0xc5,0xf5,0x76,0xc2, 1138cb93a386Sopenharmony_ci 0xc5,0xf5,0x66,0xc2, 1139cb93a386Sopenharmony_ci 0xc5,0xf4,0xc2,0xc2,0x00, 1140cb93a386Sopenharmony_ci 0xc5,0xf4,0xc2,0xc2,0x01, 1141cb93a386Sopenharmony_ci 0xc5,0xf4,0xc2,0xc2,0x02, 1142cb93a386Sopenharmony_ci 0xc5,0xf4,0xc2,0xc2,0x04, 1143cb93a386Sopenharmony_ci }); 1144cb93a386Sopenharmony_ci 1145cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1146cb93a386Sopenharmony_ci a.vminps(A::ymm0, A::ymm1, A::ymm2); 1147cb93a386Sopenharmony_ci a.vmaxps(A::ymm0, A::ymm1, A::ymm2); 1148cb93a386Sopenharmony_ci },{ 1149cb93a386Sopenharmony_ci 0xc5,0xf4,0x5d,0xc2, 1150cb93a386Sopenharmony_ci 0xc5,0xf4,0x5f,0xc2, 1151cb93a386Sopenharmony_ci }); 1152cb93a386Sopenharmony_ci 1153cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1154cb93a386Sopenharmony_ci a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3); 1155cb93a386Sopenharmony_ci },{ 1156cb93a386Sopenharmony_ci 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30, 1157cb93a386Sopenharmony_ci }); 1158cb93a386Sopenharmony_ci 1159cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1160cb93a386Sopenharmony_ci a.vpsrld(A::ymm15, A::ymm2, 8); 1161cb93a386Sopenharmony_ci a.vpsrld(A::ymm0 , A::ymm8, 5); 1162cb93a386Sopenharmony_ci },{ 1163cb93a386Sopenharmony_ci 0xc5, 0x85, 0x72,0xd2, 0x08, 1164cb93a386Sopenharmony_ci 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05, 1165cb93a386Sopenharmony_ci }); 1166cb93a386Sopenharmony_ci 1167cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1168cb93a386Sopenharmony_ci A::Label l; 1169cb93a386Sopenharmony_ci a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32}); 1170cb93a386Sopenharmony_ci a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20); 1171cb93a386Sopenharmony_ci a.vpermq(A::ymm1, A::ymm2, 5); 1172cb93a386Sopenharmony_ci a.label(&l); // 6 bytes after vperm2f128 1173cb93a386Sopenharmony_ci },{ 1174cb93a386Sopenharmony_ci 0xc4,0xe2,0x6d,0x16,0x4f,0x20, 1175cb93a386Sopenharmony_ci 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20, 1176cb93a386Sopenharmony_ci 0xc4,0xe3,0xfd, 0x00,0xca, 0x05, 1177cb93a386Sopenharmony_ci }); 1178cb93a386Sopenharmony_ci 1179cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1180cb93a386Sopenharmony_ci a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi}); 1181cb93a386Sopenharmony_ci a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3); 1182cb93a386Sopenharmony_ci },{ 1183cb93a386Sopenharmony_ci 0xc5,0xed,0x62,0x0f, 1184cb93a386Sopenharmony_ci 0xc5,0xed,0x6a,0xcb, 1185cb93a386Sopenharmony_ci }); 1186cb93a386Sopenharmony_ci 1187cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1188cb93a386Sopenharmony_ci a.vroundps(A::ymm1, A::ymm2, A::NEAREST); 1189cb93a386Sopenharmony_ci a.vroundps(A::ymm1, A::ymm2, A::FLOOR); 1190cb93a386Sopenharmony_ci a.vroundps(A::ymm1, A::ymm2, A::CEIL); 1191cb93a386Sopenharmony_ci a.vroundps(A::ymm1, A::ymm2, A::TRUNC); 1192cb93a386Sopenharmony_ci },{ 1193cb93a386Sopenharmony_ci 0xc4,0xe3,0x7d,0x08,0xca,0x00, 1194cb93a386Sopenharmony_ci 0xc4,0xe3,0x7d,0x08,0xca,0x01, 1195cb93a386Sopenharmony_ci 0xc4,0xe3,0x7d,0x08,0xca,0x02, 1196cb93a386Sopenharmony_ci 0xc4,0xe3,0x7d,0x08,0xca,0x03, 1197cb93a386Sopenharmony_ci }); 1198cb93a386Sopenharmony_ci 1199cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1200cb93a386Sopenharmony_ci A::Label l; 1201cb93a386Sopenharmony_ci a.label(&l); 1202cb93a386Sopenharmony_ci a.byte(1); 1203cb93a386Sopenharmony_ci a.byte(2); 1204cb93a386Sopenharmony_ci a.byte(3); 1205cb93a386Sopenharmony_ci a.byte(4); 1206cb93a386Sopenharmony_ci 1207cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm0 , &l); 1208cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm1 , &l); 1209cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm8 , &l); 1210cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm15, &l); 1211cb93a386Sopenharmony_ci 1212cb93a386Sopenharmony_ci a.vpshufb(A::ymm4, A::ymm3, &l); 1213cb93a386Sopenharmony_ci a.vpaddd (A::ymm4, A::ymm3, &l); 1214cb93a386Sopenharmony_ci a.vpsubd (A::ymm4, A::ymm3, &l); 1215cb93a386Sopenharmony_ci 1216cb93a386Sopenharmony_ci a.vptest(A::ymm4, &l); 1217cb93a386Sopenharmony_ci 1218cb93a386Sopenharmony_ci a.vmulps (A::ymm4, A::ymm3, &l); 1219cb93a386Sopenharmony_ci },{ 1220cb93a386Sopenharmony_ci 0x01, 0x02, 0x03, 0x4, 1221cb93a386Sopenharmony_ci 1222cb93a386Sopenharmony_ci /* VEX */ /*op*/ /* ModRM */ /* offset */ 1223cb93a386Sopenharmony_ci 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13 1224cb93a386Sopenharmony_ci 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22 1225cb93a386Sopenharmony_ci 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31 1226cb93a386Sopenharmony_ci 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40 1227cb93a386Sopenharmony_ci 1228cb93a386Sopenharmony_ci 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49 1229cb93a386Sopenharmony_ci 1230cb93a386Sopenharmony_ci 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57 1231cb93a386Sopenharmony_ci 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65 1232cb93a386Sopenharmony_ci 1233cb93a386Sopenharmony_ci 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74 1234cb93a386Sopenharmony_ci 1235cb93a386Sopenharmony_ci 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82 1236cb93a386Sopenharmony_ci }); 1237cb93a386Sopenharmony_ci 1238cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1239cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0}); 1240cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7}); 1241cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12}); 1242cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400}); 1243cb93a386Sopenharmony_ci 1244cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm8, A::xmm0); 1245cb93a386Sopenharmony_ci a.vbroadcastss(A::ymm0, A::xmm13); 1246cb93a386Sopenharmony_ci },{ 1247cb93a386Sopenharmony_ci /* VEX */ /*op*/ /*ModRM*/ /*offset*/ 1248cb93a386Sopenharmony_ci 0xc4,0xe2,0x7d, 0x18, 0b00'000'111, 1249cb93a386Sopenharmony_ci 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07, 1250cb93a386Sopenharmony_ci 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4, 1251cb93a386Sopenharmony_ci 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00, 1252cb93a386Sopenharmony_ci 1253cb93a386Sopenharmony_ci 0xc4,0x62,0x7d, 0x18, 0b11'000'000, 1254cb93a386Sopenharmony_ci 0xc4,0xc2,0x7d, 0x18, 0b11'000'101, 1255cb93a386Sopenharmony_ci }); 1256cb93a386Sopenharmony_ci 1257cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1258cb93a386Sopenharmony_ci A::Label l; 1259cb93a386Sopenharmony_ci a.label(&l); 1260cb93a386Sopenharmony_ci a.jne(&l); 1261cb93a386Sopenharmony_ci a.jne(&l); 1262cb93a386Sopenharmony_ci a.je (&l); 1263cb93a386Sopenharmony_ci a.jmp(&l); 1264cb93a386Sopenharmony_ci a.jl (&l); 1265cb93a386Sopenharmony_ci a.jc (&l); 1266cb93a386Sopenharmony_ci 1267cb93a386Sopenharmony_ci a.cmp(A::rdx, 1); 1268cb93a386Sopenharmony_ci a.cmp(A::rax, 12); 1269cb93a386Sopenharmony_ci a.cmp(A::r14, 2000000000); 1270cb93a386Sopenharmony_ci },{ 1271cb93a386Sopenharmony_ci 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes 1272cb93a386Sopenharmony_ci 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes 1273cb93a386Sopenharmony_ci 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes 1274cb93a386Sopenharmony_ci 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes 1275cb93a386Sopenharmony_ci 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes 1276cb93a386Sopenharmony_ci 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes 1277cb93a386Sopenharmony_ci 1278cb93a386Sopenharmony_ci 0x48,0x83,0xfa,0x01, 1279cb93a386Sopenharmony_ci 0x48,0x83,0xf8,0x0c, 1280cb93a386Sopenharmony_ci 0x49,0x81,0xfe,0x00,0x94,0x35,0x77, 1281cb93a386Sopenharmony_ci }); 1282cb93a386Sopenharmony_ci 1283cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1284cb93a386Sopenharmony_ci a.vmovups(A::ymm5, A::Mem{A::rsi}); 1285cb93a386Sopenharmony_ci a.vmovups(A::Mem{A::rsi}, A::ymm5); 1286cb93a386Sopenharmony_ci 1287cb93a386Sopenharmony_ci a.vmovups(A::xmm5, A::Mem{A::rsi}); 1288cb93a386Sopenharmony_ci a.vmovups(A::Mem{A::rsi}, A::xmm5); 1289cb93a386Sopenharmony_ci 1290cb93a386Sopenharmony_ci a.vpmovzxwd(A::ymm4, A::Mem{A::rsi}); 1291cb93a386Sopenharmony_ci a.vpmovzxbd(A::ymm4, A::Mem{A::rsi}); 1292cb93a386Sopenharmony_ci 1293cb93a386Sopenharmony_ci a.vmovq(A::Mem{A::rdx}, A::xmm15); 1294cb93a386Sopenharmony_ci },{ 1295cb93a386Sopenharmony_ci /* VEX */ /*Op*/ /* ModRM */ 1296cb93a386Sopenharmony_ci 0xc5, 0xfc, 0x10, 0b00'101'110, 1297cb93a386Sopenharmony_ci 0xc5, 0xfc, 0x11, 0b00'101'110, 1298cb93a386Sopenharmony_ci 1299cb93a386Sopenharmony_ci 0xc5, 0xf8, 0x10, 0b00'101'110, 1300cb93a386Sopenharmony_ci 0xc5, 0xf8, 0x11, 0b00'101'110, 1301cb93a386Sopenharmony_ci 1302cb93a386Sopenharmony_ci 0xc4,0xe2,0x7d, 0x33, 0b00'100'110, 1303cb93a386Sopenharmony_ci 0xc4,0xe2,0x7d, 0x31, 0b00'100'110, 1304cb93a386Sopenharmony_ci 1305cb93a386Sopenharmony_ci 0xc5, 0x79, 0xd6, 0b00'111'010, 1306cb93a386Sopenharmony_ci }); 1307cb93a386Sopenharmony_ci 1308cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1309cb93a386Sopenharmony_ci a.vmovups(A::ymm5, A::Mem{A::rsp, 0}); 1310cb93a386Sopenharmony_ci a.vmovups(A::ymm5, A::Mem{A::rsp, 64}); 1311cb93a386Sopenharmony_ci a.vmovups(A::ymm5, A::Mem{A::rsp,128}); 1312cb93a386Sopenharmony_ci 1313cb93a386Sopenharmony_ci a.vmovups(A::Mem{A::rsp, 0}, A::ymm5); 1314cb93a386Sopenharmony_ci a.vmovups(A::Mem{A::rsp, 64}, A::ymm5); 1315cb93a386Sopenharmony_ci a.vmovups(A::Mem{A::rsp,128}, A::ymm5); 1316cb93a386Sopenharmony_ci },{ 1317cb93a386Sopenharmony_ci 0xc5,0xfc,0x10,0x2c,0x24, 1318cb93a386Sopenharmony_ci 0xc5,0xfc,0x10,0x6c,0x24,0x40, 1319cb93a386Sopenharmony_ci 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00, 1320cb93a386Sopenharmony_ci 1321cb93a386Sopenharmony_ci 0xc5,0xfc,0x11,0x2c,0x24, 1322cb93a386Sopenharmony_ci 0xc5,0xfc,0x11,0x6c,0x24,0x40, 1323cb93a386Sopenharmony_ci 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00, 1324cb93a386Sopenharmony_ci }); 1325cb93a386Sopenharmony_ci 1326cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1327cb93a386Sopenharmony_ci a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst. 1328cb93a386Sopenharmony_ci a.movzbq(A::rax, A::Mem{A::r8,}); // High src register. 1329cb93a386Sopenharmony_ci a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register. 1330cb93a386Sopenharmony_ci a.movzbq(A::r8, A::Mem{A::rsi, 12}); 1331cb93a386Sopenharmony_ci a.movzbq(A::r8, A::Mem{A::rsi, 400}); 1332cb93a386Sopenharmony_ci 1333cb93a386Sopenharmony_ci a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst. 1334cb93a386Sopenharmony_ci a.movzwq(A::rax, A::Mem{A::r8,}); // High src register. 1335cb93a386Sopenharmony_ci a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register. 1336cb93a386Sopenharmony_ci a.movzwq(A::r8, A::Mem{A::rsi, 12}); 1337cb93a386Sopenharmony_ci a.movzwq(A::r8, A::Mem{A::rsi, 400}); 1338cb93a386Sopenharmony_ci 1339cb93a386Sopenharmony_ci a.vmovd(A::Mem{A::rax}, A::xmm0); 1340cb93a386Sopenharmony_ci a.vmovd(A::Mem{A::rax}, A::xmm8); 1341cb93a386Sopenharmony_ci a.vmovd(A::Mem{A::r8 }, A::xmm0); 1342cb93a386Sopenharmony_ci 1343cb93a386Sopenharmony_ci a.vmovd(A::xmm0, A::Mem{A::rax}); 1344cb93a386Sopenharmony_ci a.vmovd(A::xmm8, A::Mem{A::rax}); 1345cb93a386Sopenharmony_ci a.vmovd(A::xmm0, A::Mem{A::r8 }); 1346cb93a386Sopenharmony_ci 1347cb93a386Sopenharmony_ci a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR}); 1348cb93a386Sopenharmony_ci a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO }); 1349cb93a386Sopenharmony_ci a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx}); 1350cb93a386Sopenharmony_ci 1351cb93a386Sopenharmony_ci a.vmovd(A::rax, A::xmm0); 1352cb93a386Sopenharmony_ci a.vmovd(A::rax, A::xmm8); 1353cb93a386Sopenharmony_ci a.vmovd(A::r8 , A::xmm0); 1354cb93a386Sopenharmony_ci 1355cb93a386Sopenharmony_ci a.vmovd(A::xmm0, A::rax); 1356cb93a386Sopenharmony_ci a.vmovd(A::xmm8, A::rax); 1357cb93a386Sopenharmony_ci a.vmovd(A::xmm0, A::r8 ); 1358cb93a386Sopenharmony_ci 1359cb93a386Sopenharmony_ci a.movb(A::Mem{A::rdx}, A::rax); 1360cb93a386Sopenharmony_ci a.movb(A::Mem{A::rdx}, A::r8 ); 1361cb93a386Sopenharmony_ci a.movb(A::Mem{A::r8 }, A::rax); 1362cb93a386Sopenharmony_ci 1363cb93a386Sopenharmony_ci a.movb(A::rdx, A::Mem{A::rax}); 1364cb93a386Sopenharmony_ci a.movb(A::rdx, A::Mem{A::r8 }); 1365cb93a386Sopenharmony_ci a.movb(A::r8 , A::Mem{A::rax}); 1366cb93a386Sopenharmony_ci 1367cb93a386Sopenharmony_ci a.movb(A::rdx, 12); 1368cb93a386Sopenharmony_ci a.movb(A::rax, 4); 1369cb93a386Sopenharmony_ci a.movb(A::r8 , -1); 1370cb93a386Sopenharmony_ci 1371cb93a386Sopenharmony_ci a.movb(A::Mem{A::rdx}, 12); 1372cb93a386Sopenharmony_ci a.movb(A::Mem{A::rax}, 4); 1373cb93a386Sopenharmony_ci a.movb(A::Mem{A::r8 }, -1); 1374cb93a386Sopenharmony_ci },{ 1375cb93a386Sopenharmony_ci 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax 1376cb93a386Sopenharmony_ci 0x49,0x0f,0xb6,0x00, 1377cb93a386Sopenharmony_ci 0x4c,0x0f,0xb6,0x06, 1378cb93a386Sopenharmony_ci 0x4c,0x0f,0xb6,0x46, 12, 1379cb93a386Sopenharmony_ci 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00, 1380cb93a386Sopenharmony_ci 1381cb93a386Sopenharmony_ci 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax 1382cb93a386Sopenharmony_ci 0x49,0x0f,0xb7,0x00, 1383cb93a386Sopenharmony_ci 0x4c,0x0f,0xb7,0x06, 1384cb93a386Sopenharmony_ci 0x4c,0x0f,0xb7,0x46, 12, 1385cb93a386Sopenharmony_ci 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00, 1386cb93a386Sopenharmony_ci 1387cb93a386Sopenharmony_ci 0xc5,0xf9,0x7e,0x00, 1388cb93a386Sopenharmony_ci 0xc5,0x79,0x7e,0x00, 1389cb93a386Sopenharmony_ci 0xc4,0xc1,0x79,0x7e,0x00, 1390cb93a386Sopenharmony_ci 1391cb93a386Sopenharmony_ci 0xc5,0xf9,0x6e,0x00, 1392cb93a386Sopenharmony_ci 0xc5,0x79,0x6e,0x00, 1393cb93a386Sopenharmony_ci 0xc4,0xc1,0x79,0x6e,0x00, 1394cb93a386Sopenharmony_ci 1395cb93a386Sopenharmony_ci 0xc5,0xf9,0x6e,0x04,0x88, 1396cb93a386Sopenharmony_ci 0xc4,0x21,0x79,0x6e,0x3c,0x40, 1397cb93a386Sopenharmony_ci 0xc4,0xc1,0x79,0x6e,0x04,0x08, 1398cb93a386Sopenharmony_ci 1399cb93a386Sopenharmony_ci 0xc5,0xf9,0x7e,0xc0, 1400cb93a386Sopenharmony_ci 0xc5,0x79,0x7e,0xc0, 1401cb93a386Sopenharmony_ci 0xc4,0xc1,0x79,0x7e,0xc0, 1402cb93a386Sopenharmony_ci 1403cb93a386Sopenharmony_ci 0xc5,0xf9,0x6e,0xc0, 1404cb93a386Sopenharmony_ci 0xc5,0x79,0x6e,0xc0, 1405cb93a386Sopenharmony_ci 0xc4,0xc1,0x79,0x6e,0xc0, 1406cb93a386Sopenharmony_ci 1407cb93a386Sopenharmony_ci 0x48 ,0x88, 0x02, 1408cb93a386Sopenharmony_ci 0x4c, 0x88, 0x02, 1409cb93a386Sopenharmony_ci 0x49, 0x88, 0x00, 1410cb93a386Sopenharmony_ci 1411cb93a386Sopenharmony_ci 0x48 ,0x8a, 0x10, 1412cb93a386Sopenharmony_ci 0x49, 0x8a, 0x10, 1413cb93a386Sopenharmony_ci 0x4c, 0x8a, 0x00, 1414cb93a386Sopenharmony_ci 1415cb93a386Sopenharmony_ci 0x48, 0xc6, 0xc2, 0x0c, 1416cb93a386Sopenharmony_ci 0x48, 0xc6, 0xc0, 0x04, 1417cb93a386Sopenharmony_ci 0x49, 0xc6, 0xc0, 0xff, 1418cb93a386Sopenharmony_ci 1419cb93a386Sopenharmony_ci 0x48, 0xc6, 0x02, 0x0c, 1420cb93a386Sopenharmony_ci 0x48, 0xc6, 0x00, 0x04, 1421cb93a386Sopenharmony_ci 0x49, 0xc6, 0x00, 0xff, 1422cb93a386Sopenharmony_ci }); 1423cb93a386Sopenharmony_ci 1424cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1425cb93a386Sopenharmony_ci a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1 1426cb93a386Sopenharmony_ci a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8; 1427cb93a386Sopenharmony_ci 1428cb93a386Sopenharmony_ci a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1 1429cb93a386Sopenharmony_ci a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8 1430cb93a386Sopenharmony_ci 1431cb93a386Sopenharmony_ci a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1 1432cb93a386Sopenharmony_ci a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8 1433cb93a386Sopenharmony_ci 1434cb93a386Sopenharmony_ci a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1 1435cb93a386Sopenharmony_ci a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8 1436cb93a386Sopenharmony_ci 1437cb93a386Sopenharmony_ci a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi) 1438cb93a386Sopenharmony_ci a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8) 1439cb93a386Sopenharmony_ci 1440cb93a386Sopenharmony_ci a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7); 1441cb93a386Sopenharmony_ci a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15); 1442cb93a386Sopenharmony_ci 1443cb93a386Sopenharmony_ci a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7); 1444cb93a386Sopenharmony_ci a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15); 1445cb93a386Sopenharmony_ci },{ 1446cb93a386Sopenharmony_ci 0xc4,0xe3,0x39, 0x22, 0x0e, 1, 1447cb93a386Sopenharmony_ci 0xc4,0x43,0x71, 0x22, 0x00, 3, 1448cb93a386Sopenharmony_ci 1449cb93a386Sopenharmony_ci 0xc5,0xb9, 0xc4, 0x0e, 4, 1450cb93a386Sopenharmony_ci 0xc4,0x41,0x71, 0xc4, 0x00, 12, 1451cb93a386Sopenharmony_ci 1452cb93a386Sopenharmony_ci 0xc4,0xe3,0x39, 0x20, 0x0e, 4, 1453cb93a386Sopenharmony_ci 0xc4,0x43,0x71, 0x20, 0x00, 12, 1454cb93a386Sopenharmony_ci 1455cb93a386Sopenharmony_ci 0xc4,0x63,0x7d,0x39,0xc1, 1, 1456cb93a386Sopenharmony_ci 0xc4,0xc3,0x7d,0x39,0xc8, 0, 1457cb93a386Sopenharmony_ci 1458cb93a386Sopenharmony_ci 0xc4,0x63,0x79,0x16,0x06, 3, 1459cb93a386Sopenharmony_ci 0xc4,0xc3,0x79,0x16,0x08, 2, 1460cb93a386Sopenharmony_ci 1461cb93a386Sopenharmony_ci 0xc4,0x63,0x79, 0x15, 0x06, 7, 1462cb93a386Sopenharmony_ci 0xc4,0xc3,0x79, 0x15, 0x08, 15, 1463cb93a386Sopenharmony_ci 1464cb93a386Sopenharmony_ci 0xc4,0x63,0x79, 0x14, 0x06, 7, 1465cb93a386Sopenharmony_ci 0xc4,0xc3,0x79, 0x14, 0x08, 15, 1466cb93a386Sopenharmony_ci }); 1467cb93a386Sopenharmony_ci 1468cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1469cb93a386Sopenharmony_ci a.vpandn(A::ymm3, A::ymm12, A::ymm2); 1470cb93a386Sopenharmony_ci },{ 1471cb93a386Sopenharmony_ci 0xc5, 0x9d, 0xdf, 0xda, 1472cb93a386Sopenharmony_ci }); 1473cb93a386Sopenharmony_ci 1474cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1475cb93a386Sopenharmony_ci A::Label l; 1476cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3 1477cb93a386Sopenharmony_ci 1478cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3 1479cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3 1480cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3 1481cb93a386Sopenharmony_ci 1482cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3 1483cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3 1484cb93a386Sopenharmony_ci 1485cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3 1486cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3 1487cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3 1488cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3 1489cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3 1490cb93a386Sopenharmony_ci 1491cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3 1492cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3 1493cb93a386Sopenharmony_ci a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3 1494cb93a386Sopenharmony_ci 1495cb93a386Sopenharmony_ci a.vcvttps2dq(A::ymm3, A::ymm2); 1496cb93a386Sopenharmony_ci a.vcvtdq2ps (A::ymm3, A::ymm2); 1497cb93a386Sopenharmony_ci a.vcvtps2dq (A::ymm3, A::ymm2); 1498cb93a386Sopenharmony_ci a.vsqrtps (A::ymm3, A::ymm2); 1499cb93a386Sopenharmony_ci a.label(&l); 1500cb93a386Sopenharmony_ci },{ 1501cb93a386Sopenharmony_ci 0xc5,0xfd,0x6f,0xda, 1502cb93a386Sopenharmony_ci 1503cb93a386Sopenharmony_ci 0xc5,0xfd,0x6f,0x1e, 1504cb93a386Sopenharmony_ci 0xc5,0xfd,0x6f,0x1c,0x24, 1505cb93a386Sopenharmony_ci 0xc4,0xc1,0x7d,0x6f,0x1b, 1506cb93a386Sopenharmony_ci 1507cb93a386Sopenharmony_ci 0xc5,0xfd,0x6f,0x5e,0x04, 1508cb93a386Sopenharmony_ci 0xc5,0xfd,0x6f,0x5c,0x24,0x04, 1509cb93a386Sopenharmony_ci 1510cb93a386Sopenharmony_ci 0xc5,0xfd,0x6f,0x5c,0xc6,0x04, 1511cb93a386Sopenharmony_ci 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04, 1512cb93a386Sopenharmony_ci 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04, 1513cb93a386Sopenharmony_ci 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04, 1514cb93a386Sopenharmony_ci 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04, 1515cb93a386Sopenharmony_ci 1516cb93a386Sopenharmony_ci 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40, 1517cb93a386Sopenharmony_ci 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00, 1518cb93a386Sopenharmony_ci 1519cb93a386Sopenharmony_ci 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00, 1520cb93a386Sopenharmony_ci 1521cb93a386Sopenharmony_ci 0xc5,0xfe,0x5b,0xda, 1522cb93a386Sopenharmony_ci 0xc5,0xfc,0x5b,0xda, 1523cb93a386Sopenharmony_ci 0xc5,0xfd,0x5b,0xda, 1524cb93a386Sopenharmony_ci 0xc5,0xfc,0x51,0xda, 1525cb93a386Sopenharmony_ci }); 1526cb93a386Sopenharmony_ci 1527cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1528cb93a386Sopenharmony_ci a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT); 1529cb93a386Sopenharmony_ci a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL); 1530cb93a386Sopenharmony_ci 1531cb93a386Sopenharmony_ci a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE}); 1532cb93a386Sopenharmony_ci a.vcvtph2ps(A::ymm2, A::xmm3); 1533cb93a386Sopenharmony_ci },{ 1534cb93a386Sopenharmony_ci 0xc4,0xe3,0x7d,0x1d,0xd3,0x04, 1535cb93a386Sopenharmony_ci 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02, 1536cb93a386Sopenharmony_ci 1537cb93a386Sopenharmony_ci 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c, 1538cb93a386Sopenharmony_ci 0xc4,0xe2,0x7d,0x13,0xd3, 1539cb93a386Sopenharmony_ci }); 1540cb93a386Sopenharmony_ci 1541cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1542cb93a386Sopenharmony_ci a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 ); 1543cb93a386Sopenharmony_ci a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 ); 1544cb93a386Sopenharmony_ci a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 ); 1545cb93a386Sopenharmony_ci a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 ); 1546cb93a386Sopenharmony_ci a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 ); 1547cb93a386Sopenharmony_ci a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12); 1548cb93a386Sopenharmony_ci a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12); 1549cb93a386Sopenharmony_ci },{ 1550cb93a386Sopenharmony_ci 0xc4,0xe2,0x6d,0x92,0x0c,0x87, 1551cb93a386Sopenharmony_ci 0xc4,0xe2,0x75,0x92,0x04,0x10, 1552cb93a386Sopenharmony_ci 0xc4,0x62,0x75,0x92,0x14,0x10, 1553cb93a386Sopenharmony_ci 0xc4,0xa2,0x75,0x92,0x04,0x20, 1554cb93a386Sopenharmony_ci 0xc4,0xc2,0x75,0x92,0x04,0x11, 1555cb93a386Sopenharmony_ci 0xc4,0xe2,0x1d,0x92,0x04,0x10, 1556cb93a386Sopenharmony_ci 0xc4,0xe2,0x1d,0x92,0x04,0xd0, 1557cb93a386Sopenharmony_ci }); 1558cb93a386Sopenharmony_ci 1559cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1560cb93a386Sopenharmony_ci a.mov(A::rax, A::Mem{A::rdi, 0}); 1561cb93a386Sopenharmony_ci a.mov(A::rax, A::Mem{A::rdi, 1}); 1562cb93a386Sopenharmony_ci a.mov(A::rax, A::Mem{A::rdi, 512}); 1563cb93a386Sopenharmony_ci a.mov(A::r15, A::Mem{A::r13, 42}); 1564cb93a386Sopenharmony_ci a.mov(A::rax, A::Mem{A::r13, 42}); 1565cb93a386Sopenharmony_ci a.mov(A::r15, A::Mem{A::rax, 42}); 1566cb93a386Sopenharmony_ci a.mov(A::rax, 1); 1567cb93a386Sopenharmony_ci a.mov(A::rax, A::rcx); 1568cb93a386Sopenharmony_ci },{ 1569cb93a386Sopenharmony_ci 0x48, 0x8b, 0x07, 1570cb93a386Sopenharmony_ci 0x48, 0x8b, 0x47, 0x01, 1571cb93a386Sopenharmony_ci 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00, 1572cb93a386Sopenharmony_ci 0x4d, 0x8b, 0x7d, 0x2a, 1573cb93a386Sopenharmony_ci 0x49, 0x8b, 0x45, 0x2a, 1574cb93a386Sopenharmony_ci 0x4c, 0x8b, 0x78, 0x2a, 1575cb93a386Sopenharmony_ci 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00, 1576cb93a386Sopenharmony_ci 0x48, 0x89, 0xc8, 1577cb93a386Sopenharmony_ci }); 1578cb93a386Sopenharmony_ci 1579cb93a386Sopenharmony_ci // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64 1580cb93a386Sopenharmony_ci 1581cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1582cb93a386Sopenharmony_ci a.and16b(A::v4, A::v3, A::v1); 1583cb93a386Sopenharmony_ci a.orr16b(A::v4, A::v3, A::v1); 1584cb93a386Sopenharmony_ci a.eor16b(A::v4, A::v3, A::v1); 1585cb93a386Sopenharmony_ci a.bic16b(A::v4, A::v3, A::v1); 1586cb93a386Sopenharmony_ci a.bsl16b(A::v4, A::v3, A::v1); 1587cb93a386Sopenharmony_ci a.not16b(A::v4, A::v3); 1588cb93a386Sopenharmony_ci 1589cb93a386Sopenharmony_ci a.add4s(A::v4, A::v3, A::v1); 1590cb93a386Sopenharmony_ci a.sub4s(A::v4, A::v3, A::v1); 1591cb93a386Sopenharmony_ci a.mul4s(A::v4, A::v3, A::v1); 1592cb93a386Sopenharmony_ci 1593cb93a386Sopenharmony_ci a.cmeq4s(A::v4, A::v3, A::v1); 1594cb93a386Sopenharmony_ci a.cmgt4s(A::v4, A::v3, A::v1); 1595cb93a386Sopenharmony_ci 1596cb93a386Sopenharmony_ci a.sub8h(A::v4, A::v3, A::v1); 1597cb93a386Sopenharmony_ci a.mul8h(A::v4, A::v3, A::v1); 1598cb93a386Sopenharmony_ci 1599cb93a386Sopenharmony_ci a.fadd4s(A::v4, A::v3, A::v1); 1600cb93a386Sopenharmony_ci a.fsub4s(A::v4, A::v3, A::v1); 1601cb93a386Sopenharmony_ci a.fmul4s(A::v4, A::v3, A::v1); 1602cb93a386Sopenharmony_ci a.fdiv4s(A::v4, A::v3, A::v1); 1603cb93a386Sopenharmony_ci a.fmin4s(A::v4, A::v3, A::v1); 1604cb93a386Sopenharmony_ci a.fmax4s(A::v4, A::v3, A::v1); 1605cb93a386Sopenharmony_ci 1606cb93a386Sopenharmony_ci a.fneg4s (A::v4, A::v3); 1607cb93a386Sopenharmony_ci a.fsqrt4s(A::v4, A::v3); 1608cb93a386Sopenharmony_ci 1609cb93a386Sopenharmony_ci a.fmla4s(A::v4, A::v3, A::v1); 1610cb93a386Sopenharmony_ci a.fmls4s(A::v4, A::v3, A::v1); 1611cb93a386Sopenharmony_ci 1612cb93a386Sopenharmony_ci a.fcmeq4s(A::v4, A::v3, A::v1); 1613cb93a386Sopenharmony_ci a.fcmgt4s(A::v4, A::v3, A::v1); 1614cb93a386Sopenharmony_ci a.fcmge4s(A::v4, A::v3, A::v1); 1615cb93a386Sopenharmony_ci },{ 1616cb93a386Sopenharmony_ci 0x64,0x1c,0x21,0x4e, 1617cb93a386Sopenharmony_ci 0x64,0x1c,0xa1,0x4e, 1618cb93a386Sopenharmony_ci 0x64,0x1c,0x21,0x6e, 1619cb93a386Sopenharmony_ci 0x64,0x1c,0x61,0x4e, 1620cb93a386Sopenharmony_ci 0x64,0x1c,0x61,0x6e, 1621cb93a386Sopenharmony_ci 0x64,0x58,0x20,0x6e, 1622cb93a386Sopenharmony_ci 1623cb93a386Sopenharmony_ci 0x64,0x84,0xa1,0x4e, 1624cb93a386Sopenharmony_ci 0x64,0x84,0xa1,0x6e, 1625cb93a386Sopenharmony_ci 0x64,0x9c,0xa1,0x4e, 1626cb93a386Sopenharmony_ci 1627cb93a386Sopenharmony_ci 0x64,0x8c,0xa1,0x6e, 1628cb93a386Sopenharmony_ci 0x64,0x34,0xa1,0x4e, 1629cb93a386Sopenharmony_ci 1630cb93a386Sopenharmony_ci 0x64,0x84,0x61,0x6e, 1631cb93a386Sopenharmony_ci 0x64,0x9c,0x61,0x4e, 1632cb93a386Sopenharmony_ci 1633cb93a386Sopenharmony_ci 0x64,0xd4,0x21,0x4e, 1634cb93a386Sopenharmony_ci 0x64,0xd4,0xa1,0x4e, 1635cb93a386Sopenharmony_ci 0x64,0xdc,0x21,0x6e, 1636cb93a386Sopenharmony_ci 0x64,0xfc,0x21,0x6e, 1637cb93a386Sopenharmony_ci 0x64,0xf4,0xa1,0x4e, 1638cb93a386Sopenharmony_ci 0x64,0xf4,0x21,0x4e, 1639cb93a386Sopenharmony_ci 1640cb93a386Sopenharmony_ci 0x64,0xf8,0xa0,0x6e, 1641cb93a386Sopenharmony_ci 0x64,0xf8,0xa1,0x6e, 1642cb93a386Sopenharmony_ci 1643cb93a386Sopenharmony_ci 0x64,0xcc,0x21,0x4e, 1644cb93a386Sopenharmony_ci 0x64,0xcc,0xa1,0x4e, 1645cb93a386Sopenharmony_ci 1646cb93a386Sopenharmony_ci 0x64,0xe4,0x21,0x4e, 1647cb93a386Sopenharmony_ci 0x64,0xe4,0xa1,0x6e, 1648cb93a386Sopenharmony_ci 0x64,0xe4,0x21,0x6e, 1649cb93a386Sopenharmony_ci }); 1650cb93a386Sopenharmony_ci 1651cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1652cb93a386Sopenharmony_ci a.shl4s(A::v4, A::v3, 0); 1653cb93a386Sopenharmony_ci a.shl4s(A::v4, A::v3, 1); 1654cb93a386Sopenharmony_ci a.shl4s(A::v4, A::v3, 8); 1655cb93a386Sopenharmony_ci a.shl4s(A::v4, A::v3, 16); 1656cb93a386Sopenharmony_ci a.shl4s(A::v4, A::v3, 31); 1657cb93a386Sopenharmony_ci 1658cb93a386Sopenharmony_ci a.sshr4s(A::v4, A::v3, 1); 1659cb93a386Sopenharmony_ci a.sshr4s(A::v4, A::v3, 8); 1660cb93a386Sopenharmony_ci a.sshr4s(A::v4, A::v3, 31); 1661cb93a386Sopenharmony_ci 1662cb93a386Sopenharmony_ci a.ushr4s(A::v4, A::v3, 1); 1663cb93a386Sopenharmony_ci a.ushr4s(A::v4, A::v3, 8); 1664cb93a386Sopenharmony_ci a.ushr4s(A::v4, A::v3, 31); 1665cb93a386Sopenharmony_ci 1666cb93a386Sopenharmony_ci a.ushr8h(A::v4, A::v3, 1); 1667cb93a386Sopenharmony_ci a.ushr8h(A::v4, A::v3, 8); 1668cb93a386Sopenharmony_ci a.ushr8h(A::v4, A::v3, 15); 1669cb93a386Sopenharmony_ci },{ 1670cb93a386Sopenharmony_ci 0x64,0x54,0x20,0x4f, 1671cb93a386Sopenharmony_ci 0x64,0x54,0x21,0x4f, 1672cb93a386Sopenharmony_ci 0x64,0x54,0x28,0x4f, 1673cb93a386Sopenharmony_ci 0x64,0x54,0x30,0x4f, 1674cb93a386Sopenharmony_ci 0x64,0x54,0x3f,0x4f, 1675cb93a386Sopenharmony_ci 1676cb93a386Sopenharmony_ci 0x64,0x04,0x3f,0x4f, 1677cb93a386Sopenharmony_ci 0x64,0x04,0x38,0x4f, 1678cb93a386Sopenharmony_ci 0x64,0x04,0x21,0x4f, 1679cb93a386Sopenharmony_ci 1680cb93a386Sopenharmony_ci 0x64,0x04,0x3f,0x6f, 1681cb93a386Sopenharmony_ci 0x64,0x04,0x38,0x6f, 1682cb93a386Sopenharmony_ci 0x64,0x04,0x21,0x6f, 1683cb93a386Sopenharmony_ci 1684cb93a386Sopenharmony_ci 0x64,0x04,0x1f,0x6f, 1685cb93a386Sopenharmony_ci 0x64,0x04,0x18,0x6f, 1686cb93a386Sopenharmony_ci 0x64,0x04,0x11,0x6f, 1687cb93a386Sopenharmony_ci }); 1688cb93a386Sopenharmony_ci 1689cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1690cb93a386Sopenharmony_ci a.sli4s(A::v4, A::v3, 0); 1691cb93a386Sopenharmony_ci a.sli4s(A::v4, A::v3, 1); 1692cb93a386Sopenharmony_ci a.sli4s(A::v4, A::v3, 8); 1693cb93a386Sopenharmony_ci a.sli4s(A::v4, A::v3, 16); 1694cb93a386Sopenharmony_ci a.sli4s(A::v4, A::v3, 31); 1695cb93a386Sopenharmony_ci },{ 1696cb93a386Sopenharmony_ci 0x64,0x54,0x20,0x6f, 1697cb93a386Sopenharmony_ci 0x64,0x54,0x21,0x6f, 1698cb93a386Sopenharmony_ci 0x64,0x54,0x28,0x6f, 1699cb93a386Sopenharmony_ci 0x64,0x54,0x30,0x6f, 1700cb93a386Sopenharmony_ci 0x64,0x54,0x3f,0x6f, 1701cb93a386Sopenharmony_ci }); 1702cb93a386Sopenharmony_ci 1703cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1704cb93a386Sopenharmony_ci a.scvtf4s (A::v4, A::v3); 1705cb93a386Sopenharmony_ci a.fcvtzs4s(A::v4, A::v3); 1706cb93a386Sopenharmony_ci a.fcvtns4s(A::v4, A::v3); 1707cb93a386Sopenharmony_ci a.frintp4s(A::v4, A::v3); 1708cb93a386Sopenharmony_ci a.frintm4s(A::v4, A::v3); 1709cb93a386Sopenharmony_ci a.fcvtn (A::v4, A::v3); 1710cb93a386Sopenharmony_ci a.fcvtl (A::v4, A::v3); 1711cb93a386Sopenharmony_ci },{ 1712cb93a386Sopenharmony_ci 0x64,0xd8,0x21,0x4e, 1713cb93a386Sopenharmony_ci 0x64,0xb8,0xa1,0x4e, 1714cb93a386Sopenharmony_ci 0x64,0xa8,0x21,0x4e, 1715cb93a386Sopenharmony_ci 0x64,0x88,0xa1,0x4e, 1716cb93a386Sopenharmony_ci 0x64,0x98,0x21,0x4e, 1717cb93a386Sopenharmony_ci 0x64,0x68,0x21,0x0e, 1718cb93a386Sopenharmony_ci 0x64,0x78,0x21,0x0e, 1719cb93a386Sopenharmony_ci }); 1720cb93a386Sopenharmony_ci 1721cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1722cb93a386Sopenharmony_ci a.sub (A::sp, A::sp, 32); // sub sp, sp, #32 1723cb93a386Sopenharmony_ci a.strq(A::v0, A::sp, 1); // str q0, [sp, #16] 1724cb93a386Sopenharmony_ci a.strq(A::v1, A::sp); // str q1, [sp] 1725cb93a386Sopenharmony_ci a.strd(A::v0, A::sp, 6); // str s0, [sp, #48] 1726cb93a386Sopenharmony_ci a.strs(A::v0, A::sp, 6); // str s0, [sp, #24] 1727cb93a386Sopenharmony_ci a.strh(A::v0, A::sp, 10); // str h0, [sp, #20] 1728cb93a386Sopenharmony_ci a.strb(A::v0, A::sp, 47); // str b0, [sp, #47] 1729cb93a386Sopenharmony_ci a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42] 1730cb93a386Sopenharmony_ci a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94] 1731cb93a386Sopenharmony_ci a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40] 1732cb93a386Sopenharmony_ci a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8] 1733cb93a386Sopenharmony_ci a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048] 1734cb93a386Sopenharmony_ci a.add (A::sp, A::sp, 32); // add sp, sp, #32 1735cb93a386Sopenharmony_ci },{ 1736cb93a386Sopenharmony_ci 0xff,0x83,0x00,0xd1, 1737cb93a386Sopenharmony_ci 0xe0,0x07,0x80,0x3d, 1738cb93a386Sopenharmony_ci 0xe1,0x03,0x80,0x3d, 1739cb93a386Sopenharmony_ci 0xe0,0x1b,0x00,0xfd, 1740cb93a386Sopenharmony_ci 0xe0,0x1b,0x00,0xbd, 1741cb93a386Sopenharmony_ci 0xe0,0x2b,0x00,0x7d, 1742cb93a386Sopenharmony_ci 0xe0,0xbf,0x00,0x3d, 1743cb93a386Sopenharmony_ci 0xe9,0xab,0x40,0x3d, 1744cb93a386Sopenharmony_ci 0xe9,0xbf,0x40,0x7d, 1745cb93a386Sopenharmony_ci 0xe7,0x2b,0x40,0xbd, 1746cb93a386Sopenharmony_ci 0xe7,0x07,0x40,0xfd, 1747cb93a386Sopenharmony_ci 0xe5,0x03,0xc2,0x3d, 1748cb93a386Sopenharmony_ci 0xff,0x83,0x00,0x91, 1749cb93a386Sopenharmony_ci }); 1750cb93a386Sopenharmony_ci 1751cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1752cb93a386Sopenharmony_ci a.brk(0); 1753cb93a386Sopenharmony_ci a.brk(65535); 1754cb93a386Sopenharmony_ci 1755cb93a386Sopenharmony_ci a.ret(A::x30); // Conventional ret using link register. 1756cb93a386Sopenharmony_ci a.ret(A::x13); // Can really return using any register if we like. 1757cb93a386Sopenharmony_ci 1758cb93a386Sopenharmony_ci a.add(A::x2, A::x2, 4); 1759cb93a386Sopenharmony_ci a.add(A::x3, A::x2, 32); 1760cb93a386Sopenharmony_ci 1761cb93a386Sopenharmony_ci a.sub(A::x2, A::x2, 4); 1762cb93a386Sopenharmony_ci a.sub(A::x3, A::x2, 32); 1763cb93a386Sopenharmony_ci 1764cb93a386Sopenharmony_ci a.subs(A::x2, A::x2, 4); 1765cb93a386Sopenharmony_ci a.subs(A::x3, A::x2, 32); 1766cb93a386Sopenharmony_ci 1767cb93a386Sopenharmony_ci a.subs(A::xzr, A::x2, 4); // These are actually the same instruction! 1768cb93a386Sopenharmony_ci a.cmp(A::x2, 4); 1769cb93a386Sopenharmony_ci 1770cb93a386Sopenharmony_ci A::Label l; 1771cb93a386Sopenharmony_ci a.label(&l); 1772cb93a386Sopenharmony_ci a.bne(&l); 1773cb93a386Sopenharmony_ci a.bne(&l); 1774cb93a386Sopenharmony_ci a.blt(&l); 1775cb93a386Sopenharmony_ci a.b(&l); 1776cb93a386Sopenharmony_ci a.cbnz(A::x2, &l); 1777cb93a386Sopenharmony_ci a.cbz(A::x2, &l); 1778cb93a386Sopenharmony_ci 1779cb93a386Sopenharmony_ci a.add(A::x3, A::x2, A::x1); // add x3,x2,x1 1780cb93a386Sopenharmony_ci a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3 1781cb93a386Sopenharmony_ci },{ 1782cb93a386Sopenharmony_ci 0x00,0x00,0x20,0xd4, 1783cb93a386Sopenharmony_ci 0xe0,0xff,0x3f,0xd4, 1784cb93a386Sopenharmony_ci 1785cb93a386Sopenharmony_ci 0xc0,0x03,0x5f,0xd6, 1786cb93a386Sopenharmony_ci 0xa0,0x01,0x5f,0xd6, 1787cb93a386Sopenharmony_ci 1788cb93a386Sopenharmony_ci 0x42,0x10,0x00,0x91, 1789cb93a386Sopenharmony_ci 0x43,0x80,0x00,0x91, 1790cb93a386Sopenharmony_ci 1791cb93a386Sopenharmony_ci 0x42,0x10,0x00,0xd1, 1792cb93a386Sopenharmony_ci 0x43,0x80,0x00,0xd1, 1793cb93a386Sopenharmony_ci 1794cb93a386Sopenharmony_ci 0x42,0x10,0x00,0xf1, 1795cb93a386Sopenharmony_ci 0x43,0x80,0x00,0xf1, 1796cb93a386Sopenharmony_ci 1797cb93a386Sopenharmony_ci 0x5f,0x10,0x00,0xf1, 1798cb93a386Sopenharmony_ci 0x5f,0x10,0x00,0xf1, 1799cb93a386Sopenharmony_ci 1800cb93a386Sopenharmony_ci 0x01,0x00,0x00,0x54, // b.ne #0 1801cb93a386Sopenharmony_ci 0xe1,0xff,0xff,0x54, // b.ne #-4 1802cb93a386Sopenharmony_ci 0xcb,0xff,0xff,0x54, // b.lt #-8 1803cb93a386Sopenharmony_ci 0xae,0xff,0xff,0x54, // b.al #-12 1804cb93a386Sopenharmony_ci 0x82,0xff,0xff,0xb5, // cbnz x2, #-16 1805cb93a386Sopenharmony_ci 0x62,0xff,0xff,0xb4, // cbz x2, #-20 1806cb93a386Sopenharmony_ci 1807cb93a386Sopenharmony_ci 0x43,0x00,0x01,0x8b, 1808cb93a386Sopenharmony_ci 0x43,0x0c,0x81,0x8b, 1809cb93a386Sopenharmony_ci }); 1810cb93a386Sopenharmony_ci 1811cb93a386Sopenharmony_ci // Can we cbz() to a not-yet-defined label? 1812cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1813cb93a386Sopenharmony_ci A::Label l; 1814cb93a386Sopenharmony_ci a.cbz(A::x2, &l); 1815cb93a386Sopenharmony_ci a.add(A::x3, A::x2, 32); 1816cb93a386Sopenharmony_ci a.label(&l); 1817cb93a386Sopenharmony_ci a.ret(A::x30); 1818cb93a386Sopenharmony_ci },{ 1819cb93a386Sopenharmony_ci 0x42,0x00,0x00,0xb4, // cbz x2, #8 1820cb93a386Sopenharmony_ci 0x43,0x80,0x00,0x91, // add x3, x2, #32 1821cb93a386Sopenharmony_ci 0xc0,0x03,0x5f,0xd6, // ret 1822cb93a386Sopenharmony_ci }); 1823cb93a386Sopenharmony_ci 1824cb93a386Sopenharmony_ci // If we start a label as a backward label, 1825cb93a386Sopenharmony_ci // can we redefine it to be a future label? 1826cb93a386Sopenharmony_ci // (Not sure this is useful... just want to test it works.) 1827cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1828cb93a386Sopenharmony_ci A::Label l1; 1829cb93a386Sopenharmony_ci a.label(&l1); 1830cb93a386Sopenharmony_ci a.add(A::x3, A::x2, 32); 1831cb93a386Sopenharmony_ci a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky. 1832cb93a386Sopenharmony_ci 1833cb93a386Sopenharmony_ci A::Label l2; // Start off the same... 1834cb93a386Sopenharmony_ci a.label(&l2); 1835cb93a386Sopenharmony_ci a.add(A::x3, A::x2, 32); 1836cb93a386Sopenharmony_ci a.cbz(A::x2, &l2); // Looks like this will go backward... 1837cb93a386Sopenharmony_ci a.add(A::x2, A::x2, 4); 1838cb93a386Sopenharmony_ci a.add(A::x3, A::x2, 32); 1839cb93a386Sopenharmony_ci a.label(&l2); // But no... actually forward! What a switcheroo! 1840cb93a386Sopenharmony_ci },{ 1841cb93a386Sopenharmony_ci 0x43,0x80,0x00,0x91, // add x3, x2, #32 1842cb93a386Sopenharmony_ci 0xe2,0xff,0xff,0xb4, // cbz x2, #-4 1843cb93a386Sopenharmony_ci 1844cb93a386Sopenharmony_ci 0x43,0x80,0x00,0x91, // add x3, x2, #32 1845cb93a386Sopenharmony_ci 0x62,0x00,0x00,0xb4, // cbz x2, #12 1846cb93a386Sopenharmony_ci 0x42,0x10,0x00,0x91, // add x2, x2, #4 1847cb93a386Sopenharmony_ci 0x43,0x80,0x00,0x91, // add x3, x2, #32 1848cb93a386Sopenharmony_ci }); 1849cb93a386Sopenharmony_ci 1850cb93a386Sopenharmony_ci // Loading from a label on ARM. 1851cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1852cb93a386Sopenharmony_ci A::Label fore,aft; 1853cb93a386Sopenharmony_ci a.label(&fore); 1854cb93a386Sopenharmony_ci a.word(0x01234567); 1855cb93a386Sopenharmony_ci a.ldrq(A::v1, &fore); 1856cb93a386Sopenharmony_ci a.ldrq(A::v2, &aft); 1857cb93a386Sopenharmony_ci a.label(&aft); 1858cb93a386Sopenharmony_ci a.word(0x76543210); 1859cb93a386Sopenharmony_ci },{ 1860cb93a386Sopenharmony_ci 0x67,0x45,0x23,0x01, 1861cb93a386Sopenharmony_ci 0xe1,0xff,0xff,0x9c, // ldr q1, #-4 1862cb93a386Sopenharmony_ci 0x22,0x00,0x00,0x9c, // ldr q2, #4 1863cb93a386Sopenharmony_ci 0x10,0x32,0x54,0x76, 1864cb93a386Sopenharmony_ci }); 1865cb93a386Sopenharmony_ci 1866cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1867cb93a386Sopenharmony_ci a.ldrq(A::v0, A::x8); 1868cb93a386Sopenharmony_ci a.strq(A::v0, A::x8); 1869cb93a386Sopenharmony_ci },{ 1870cb93a386Sopenharmony_ci 0x00,0x01,0xc0,0x3d, 1871cb93a386Sopenharmony_ci 0x00,0x01,0x80,0x3d, 1872cb93a386Sopenharmony_ci }); 1873cb93a386Sopenharmony_ci 1874cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1875cb93a386Sopenharmony_ci a.dup4s (A::v0, A::x8); 1876cb93a386Sopenharmony_ci a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding 1877cb93a386Sopenharmony_ci a.ld1r8h (A::v0, A::x8); 1878cb93a386Sopenharmony_ci a.ld1r16b(A::v0, A::x8); 1879cb93a386Sopenharmony_ci },{ 1880cb93a386Sopenharmony_ci 0x00,0x0d,0x04,0x4e, 1881cb93a386Sopenharmony_ci 0x00,0xc9,0x40,0x4d, 1882cb93a386Sopenharmony_ci 0x00,0xc5,0x40,0x4d, 1883cb93a386Sopenharmony_ci 0x00,0xc1,0x40,0x4d, 1884cb93a386Sopenharmony_ci }); 1885cb93a386Sopenharmony_ci 1886cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1887cb93a386Sopenharmony_ci a.ld24s(A::v0, A::x8); // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding 1888cb93a386Sopenharmony_ci a.ld44s(A::v0, A::x8); 1889cb93a386Sopenharmony_ci a.st24s(A::v0, A::x8); 1890cb93a386Sopenharmony_ci a.st44s(A::v0, A::x8); // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding 1891cb93a386Sopenharmony_ci 1892cb93a386Sopenharmony_ci a.ld24s(A::v0, A::x8, 0); //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding 1893cb93a386Sopenharmony_ci a.ld24s(A::v0, A::x8, 1); 1894cb93a386Sopenharmony_ci a.ld24s(A::v0, A::x8, 2); 1895cb93a386Sopenharmony_ci a.ld24s(A::v0, A::x8, 3); 1896cb93a386Sopenharmony_ci 1897cb93a386Sopenharmony_ci a.ld44s(A::v0, A::x8, 0); // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8] 1898cb93a386Sopenharmony_ci a.ld44s(A::v0, A::x8, 1); 1899cb93a386Sopenharmony_ci a.ld44s(A::v0, A::x8, 2); 1900cb93a386Sopenharmony_ci a.ld44s(A::v0, A::x8, 3); 1901cb93a386Sopenharmony_ci },{ 1902cb93a386Sopenharmony_ci 0x00,0x89,0x40,0x4c, 1903cb93a386Sopenharmony_ci 0x00,0x09,0x40,0x4c, 1904cb93a386Sopenharmony_ci 0x00,0x89,0x00,0x4c, 1905cb93a386Sopenharmony_ci 0x00,0x09,0x00,0x4c, 1906cb93a386Sopenharmony_ci 1907cb93a386Sopenharmony_ci 0x00,0x81,0x60,0x0d, 1908cb93a386Sopenharmony_ci 0x00,0x91,0x60,0x0d, 1909cb93a386Sopenharmony_ci 0x00,0x81,0x60,0x4d, 1910cb93a386Sopenharmony_ci 0x00,0x91,0x60,0x4d, 1911cb93a386Sopenharmony_ci 1912cb93a386Sopenharmony_ci 0x00,0xa1,0x60,0x0d, 1913cb93a386Sopenharmony_ci 0x00,0xb1,0x60,0x0d, 1914cb93a386Sopenharmony_ci 0x00,0xa1,0x60,0x4d, 1915cb93a386Sopenharmony_ci 0x00,0xb1,0x60,0x4d, 1916cb93a386Sopenharmony_ci }); 1917cb93a386Sopenharmony_ci 1918cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1919cb93a386Sopenharmony_ci a.xtns2h(A::v0, A::v0); 1920cb93a386Sopenharmony_ci a.xtnh2b(A::v0, A::v0); 1921cb93a386Sopenharmony_ci a.strs (A::v0, A::x0); 1922cb93a386Sopenharmony_ci 1923cb93a386Sopenharmony_ci a.ldrs (A::v0, A::x0); 1924cb93a386Sopenharmony_ci a.uxtlb2h(A::v0, A::v0); 1925cb93a386Sopenharmony_ci a.uxtlh2s(A::v0, A::v0); 1926cb93a386Sopenharmony_ci 1927cb93a386Sopenharmony_ci a.uminv4s(A::v3, A::v4); 1928cb93a386Sopenharmony_ci a.movs (A::x3, A::v4,0); // mov.s w3,v4[0] 1929cb93a386Sopenharmony_ci a.movs (A::x3, A::v4,1); // mov.s w3,v4[1] 1930cb93a386Sopenharmony_ci a.inss (A::v4, A::x3,3); // ins.s v4[3],w3 1931cb93a386Sopenharmony_ci },{ 1932cb93a386Sopenharmony_ci 0x00,0x28,0x61,0x0e, 1933cb93a386Sopenharmony_ci 0x00,0x28,0x21,0x0e, 1934cb93a386Sopenharmony_ci 0x00,0x00,0x00,0xbd, 1935cb93a386Sopenharmony_ci 1936cb93a386Sopenharmony_ci 0x00,0x00,0x40,0xbd, 1937cb93a386Sopenharmony_ci 0x00,0xa4,0x08,0x2f, 1938cb93a386Sopenharmony_ci 0x00,0xa4,0x10,0x2f, 1939cb93a386Sopenharmony_ci 1940cb93a386Sopenharmony_ci 0x83,0xa8,0xb1,0x6e, 1941cb93a386Sopenharmony_ci 0x83,0x3c,0x04,0x0e, 1942cb93a386Sopenharmony_ci 0x83,0x3c,0x0c,0x0e, 1943cb93a386Sopenharmony_ci 0x64,0x1c,0x1c,0x4e, 1944cb93a386Sopenharmony_ci }); 1945cb93a386Sopenharmony_ci 1946cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1947cb93a386Sopenharmony_ci a.ldrb(A::v0, A::x8); 1948cb93a386Sopenharmony_ci a.strb(A::v0, A::x8); 1949cb93a386Sopenharmony_ci },{ 1950cb93a386Sopenharmony_ci 0x00,0x01,0x40,0x3d, 1951cb93a386Sopenharmony_ci 0x00,0x01,0x00,0x3d, 1952cb93a386Sopenharmony_ci }); 1953cb93a386Sopenharmony_ci 1954cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1955cb93a386Sopenharmony_ci a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24] 1956cb93a386Sopenharmony_ci a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12] 1957cb93a386Sopenharmony_ci a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6] 1958cb93a386Sopenharmony_ci a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3] 1959cb93a386Sopenharmony_ci 1960cb93a386Sopenharmony_ci a.strs(A::x0, A::x1, 3); // str w0, [x1, #12] 1961cb93a386Sopenharmony_ci },{ 1962cb93a386Sopenharmony_ci 0x20,0x0c,0x40,0xf9, 1963cb93a386Sopenharmony_ci 0x20,0x0c,0x40,0xb9, 1964cb93a386Sopenharmony_ci 0x20,0x0c,0x40,0x79, 1965cb93a386Sopenharmony_ci 0x20,0x0c,0x40,0x39, 1966cb93a386Sopenharmony_ci 1967cb93a386Sopenharmony_ci 0x20,0x0c,0x00,0xb9, 1968cb93a386Sopenharmony_ci }); 1969cb93a386Sopenharmony_ci 1970cb93a386Sopenharmony_ci test_asm(r, [&](A& a) { 1971cb93a386Sopenharmony_ci a.tbl (A::v0, A::v1, A::v2); 1972cb93a386Sopenharmony_ci a.uzp14s(A::v0, A::v1, A::v2); 1973cb93a386Sopenharmony_ci a.uzp24s(A::v0, A::v1, A::v2); 1974cb93a386Sopenharmony_ci a.zip14s(A::v0, A::v1, A::v2); 1975cb93a386Sopenharmony_ci a.zip24s(A::v0, A::v1, A::v2); 1976cb93a386Sopenharmony_ci },{ 1977cb93a386Sopenharmony_ci 0x20,0x00,0x02,0x4e, 1978cb93a386Sopenharmony_ci 0x20,0x18,0x82,0x4e, 1979cb93a386Sopenharmony_ci 0x20,0x58,0x82,0x4e, 1980cb93a386Sopenharmony_ci 0x20,0x38,0x82,0x4e, 1981cb93a386Sopenharmony_ci 0x20,0x78,0x82,0x4e, 1982cb93a386Sopenharmony_ci }); 1983cb93a386Sopenharmony_ci} 1984cb93a386Sopenharmony_ci 1985cb93a386Sopenharmony_ciDEF_TEST(SkVM_approx_math, r) { 1986cb93a386Sopenharmony_ci auto eval = [](int N, float values[], auto fn) { 1987cb93a386Sopenharmony_ci skvm::Builder b; 1988cb93a386Sopenharmony_ci skvm::Ptr inout = b.varying<float>(); 1989cb93a386Sopenharmony_ci 1990cb93a386Sopenharmony_ci b.storeF(inout, fn(&b, b.loadF(inout))); 1991cb93a386Sopenharmony_ci 1992cb93a386Sopenharmony_ci b.done().eval(N, values); 1993cb93a386Sopenharmony_ci }; 1994cb93a386Sopenharmony_ci 1995cb93a386Sopenharmony_ci auto compare = [r](int N, const float values[], const float expected[]) { 1996cb93a386Sopenharmony_ci for (int i = 0; i < N; ++i) { 1997cb93a386Sopenharmony_ci REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f)); 1998cb93a386Sopenharmony_ci } 1999cb93a386Sopenharmony_ci }; 2000cb93a386Sopenharmony_ci 2001cb93a386Sopenharmony_ci // log2 2002cb93a386Sopenharmony_ci { 2003cb93a386Sopenharmony_ci float values[] = {0.25f, 0.5f, 1, 2, 4, 8}; 2004cb93a386Sopenharmony_ci constexpr int N = SK_ARRAY_COUNT(values); 2005cb93a386Sopenharmony_ci eval(N, values, [](skvm::Builder* b, skvm::F32 v) { 2006cb93a386Sopenharmony_ci return b->approx_log2(v); 2007cb93a386Sopenharmony_ci }); 2008cb93a386Sopenharmony_ci const float expected[] = {-2, -1, 0, 1, 2, 3}; 2009cb93a386Sopenharmony_ci compare(N, values, expected); 2010cb93a386Sopenharmony_ci } 2011cb93a386Sopenharmony_ci 2012cb93a386Sopenharmony_ci // pow2 2013cb93a386Sopenharmony_ci { 2014cb93a386Sopenharmony_ci float values[] = {-2, -1, 0, 1, 2, 3}; 2015cb93a386Sopenharmony_ci constexpr int N = SK_ARRAY_COUNT(values); 2016cb93a386Sopenharmony_ci eval(N, values, [](skvm::Builder* b, skvm::F32 v) { 2017cb93a386Sopenharmony_ci return b->approx_pow2(v); 2018cb93a386Sopenharmony_ci }); 2019cb93a386Sopenharmony_ci const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8}; 2020cb93a386Sopenharmony_ci compare(N, values, expected); 2021cb93a386Sopenharmony_ci } 2022cb93a386Sopenharmony_ci 2023cb93a386Sopenharmony_ci // powf -- x^0.5 2024cb93a386Sopenharmony_ci { 2025cb93a386Sopenharmony_ci float bases[] = {0, 1, 4, 9, 16}; 2026cb93a386Sopenharmony_ci constexpr int N = SK_ARRAY_COUNT(bases); 2027cb93a386Sopenharmony_ci eval(N, bases, [](skvm::Builder* b, skvm::F32 base) { 2028cb93a386Sopenharmony_ci return b->approx_powf(base, b->splat(0.5f)); 2029cb93a386Sopenharmony_ci }); 2030cb93a386Sopenharmony_ci const float expected[] = {0, 1, 2, 3, 4}; 2031cb93a386Sopenharmony_ci compare(N, bases, expected); 2032cb93a386Sopenharmony_ci } 2033cb93a386Sopenharmony_ci // powf -- 3^x 2034cb93a386Sopenharmony_ci { 2035cb93a386Sopenharmony_ci float exps[] = {-2, -1, 0, 1, 2}; 2036cb93a386Sopenharmony_ci constexpr int N = SK_ARRAY_COUNT(exps); 2037cb93a386Sopenharmony_ci eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) { 2038cb93a386Sopenharmony_ci return b->approx_powf(b->splat(3.0f), exp); 2039cb93a386Sopenharmony_ci }); 2040cb93a386Sopenharmony_ci const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9}; 2041cb93a386Sopenharmony_ci compare(N, exps, expected); 2042cb93a386Sopenharmony_ci } 2043cb93a386Sopenharmony_ci 2044cb93a386Sopenharmony_ci auto test = [r](float arg, float expected, float tolerance, auto prog) { 2045cb93a386Sopenharmony_ci skvm::Builder b; 2046cb93a386Sopenharmony_ci skvm::Ptr inout = b.varying<float>(); 2047cb93a386Sopenharmony_ci b.storeF(inout, prog(b.loadF(inout))); 2048cb93a386Sopenharmony_ci float actual = arg; 2049cb93a386Sopenharmony_ci b.done().eval(1, &actual); 2050cb93a386Sopenharmony_ci 2051cb93a386Sopenharmony_ci float err = std::abs(actual - expected); 2052cb93a386Sopenharmony_ci 2053cb93a386Sopenharmony_ci if (err > tolerance) { 2054cb93a386Sopenharmony_ci // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual); 2055cb93a386Sopenharmony_ci REPORTER_ASSERT(r, true); 2056cb93a386Sopenharmony_ci } 2057cb93a386Sopenharmony_ci return err; 2058cb93a386Sopenharmony_ci }; 2059cb93a386Sopenharmony_ci 2060cb93a386Sopenharmony_ci auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) { 2061cb93a386Sopenharmony_ci skvm::Builder b; 2062cb93a386Sopenharmony_ci skvm::Ptr in0 = b.varying<float>(); 2063cb93a386Sopenharmony_ci skvm::Ptr in1 = b.varying<float>(); 2064cb93a386Sopenharmony_ci skvm::Ptr out = b.varying<float>(); 2065cb93a386Sopenharmony_ci b.storeF(out, prog(b.loadF(in0), b.loadF(in1))); 2066cb93a386Sopenharmony_ci float actual; 2067cb93a386Sopenharmony_ci b.done().eval(1, &arg0, &arg1, &actual); 2068cb93a386Sopenharmony_ci 2069cb93a386Sopenharmony_ci float err = std::abs(actual - expected); 2070cb93a386Sopenharmony_ci 2071cb93a386Sopenharmony_ci if (err > tolerance) { 2072cb93a386Sopenharmony_ci // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual); 2073cb93a386Sopenharmony_ci REPORTER_ASSERT(r, true); 2074cb93a386Sopenharmony_ci } 2075cb93a386Sopenharmony_ci return err; 2076cb93a386Sopenharmony_ci }; 2077cb93a386Sopenharmony_ci 2078cb93a386Sopenharmony_ci // sine, cosine, tangent 2079cb93a386Sopenharmony_ci { 2080cb93a386Sopenharmony_ci constexpr float P = SK_ScalarPI; 2081cb93a386Sopenharmony_ci constexpr float tol = 0.00175f; 2082cb93a386Sopenharmony_ci for (float rad = -5*P; rad <= 5*P; rad += 0.1f) { 2083cb93a386Sopenharmony_ci test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) { 2084cb93a386Sopenharmony_ci return approx_sin(x); 2085cb93a386Sopenharmony_ci }); 2086cb93a386Sopenharmony_ci test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) { 2087cb93a386Sopenharmony_ci return approx_cos(x); 2088cb93a386Sopenharmony_ci }); 2089cb93a386Sopenharmony_ci } 2090cb93a386Sopenharmony_ci 2091cb93a386Sopenharmony_ci // Our tangent diverge more as we get near infinities (x near +- Pi/2), 2092cb93a386Sopenharmony_ci // so bring in the domain a little. 2093cb93a386Sopenharmony_ci constexpr float eps = 0.16f; 2094cb93a386Sopenharmony_ci float err = 0; 2095cb93a386Sopenharmony_ci for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) { 2096cb93a386Sopenharmony_ci err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) { 2097cb93a386Sopenharmony_ci return approx_tan(x); 2098cb93a386Sopenharmony_ci }); 2099cb93a386Sopenharmony_ci // try again with some multiples of P, to check our periodicity 2100cb93a386Sopenharmony_ci test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) { 2101cb93a386Sopenharmony_ci return approx_tan(x + 3*P); 2102cb93a386Sopenharmony_ci }); 2103cb93a386Sopenharmony_ci test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) { 2104cb93a386Sopenharmony_ci return approx_tan(x - 3*P); 2105cb93a386Sopenharmony_ci }); 2106cb93a386Sopenharmony_ci } 2107cb93a386Sopenharmony_ci if (0) { SkDebugf("tan error %g\n", err); } 2108cb93a386Sopenharmony_ci } 2109cb93a386Sopenharmony_ci 2110cb93a386Sopenharmony_ci // asin, acos, atan 2111cb93a386Sopenharmony_ci { 2112cb93a386Sopenharmony_ci constexpr float tol = 0.00175f; 2113cb93a386Sopenharmony_ci float err = 0; 2114cb93a386Sopenharmony_ci for (float x = -1; x <= 1; x += 1.0f/64) { 2115cb93a386Sopenharmony_ci err += test(x, asin(x), tol, [](skvm::F32 x) { 2116cb93a386Sopenharmony_ci return approx_asin(x); 2117cb93a386Sopenharmony_ci }); 2118cb93a386Sopenharmony_ci test(x, acos(x), tol, [](skvm::F32 x) { 2119cb93a386Sopenharmony_ci return approx_acos(x); 2120cb93a386Sopenharmony_ci }); 2121cb93a386Sopenharmony_ci } 2122cb93a386Sopenharmony_ci if (0) { SkDebugf("asin error %g\n", err); } 2123cb93a386Sopenharmony_ci 2124cb93a386Sopenharmony_ci err = 0; 2125cb93a386Sopenharmony_ci for (float x = -10; x <= 10; x += 1.0f/16) { 2126cb93a386Sopenharmony_ci err += test(x, atan(x), tol, [](skvm::F32 x) { 2127cb93a386Sopenharmony_ci return approx_atan(x); 2128cb93a386Sopenharmony_ci }); 2129cb93a386Sopenharmony_ci } 2130cb93a386Sopenharmony_ci if (0) { SkDebugf("atan error %g\n", err); } 2131cb93a386Sopenharmony_ci 2132cb93a386Sopenharmony_ci for (float y = -3; y <= 3; y += 1) { 2133cb93a386Sopenharmony_ci for (float x = -3; x <= 3; x += 1) { 2134cb93a386Sopenharmony_ci err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) { 2135cb93a386Sopenharmony_ci return approx_atan2(y,x); 2136cb93a386Sopenharmony_ci }); 2137cb93a386Sopenharmony_ci } 2138cb93a386Sopenharmony_ci } 2139cb93a386Sopenharmony_ci if (0) { SkDebugf("atan2 error %g\n", err); } 2140cb93a386Sopenharmony_ci } 2141cb93a386Sopenharmony_ci} 2142cb93a386Sopenharmony_ci 2143cb93a386Sopenharmony_ciDEF_TEST(SkVM_min_max, r) { 2144cb93a386Sopenharmony_ci // min() and max() have subtle behavior when one argument is NaN and 2145cb93a386Sopenharmony_ci // the other isn't. It's not sound to blindly swap their arguments. 2146cb93a386Sopenharmony_ci // 2147cb93a386Sopenharmony_ci // All backends must behave like std::min() and std::max(), which are 2148cb93a386Sopenharmony_ci // 2149cb93a386Sopenharmony_ci // min(x,y) = y<x ? y : x 2150cb93a386Sopenharmony_ci // max(x,y) = x<y ? y : x 2151cb93a386Sopenharmony_ci 2152cb93a386Sopenharmony_ci // ±NaN, ±0, ±1, ±inf 2153cb93a386Sopenharmony_ci const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000, 2154cb93a386Sopenharmony_ci 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000}; 2155cb93a386Sopenharmony_ci 2156cb93a386Sopenharmony_ci float f[8]; 2157cb93a386Sopenharmony_ci memcpy(f, bits, sizeof(bits)); 2158cb93a386Sopenharmony_ci 2159cb93a386Sopenharmony_ci auto identical = [&](float x, float y) { 2160cb93a386Sopenharmony_ci uint32_t X,Y; 2161cb93a386Sopenharmony_ci memcpy(&X, &x, 4); 2162cb93a386Sopenharmony_ci memcpy(&Y, &y, 4); 2163cb93a386Sopenharmony_ci return X == Y; 2164cb93a386Sopenharmony_ci }; 2165cb93a386Sopenharmony_ci 2166cb93a386Sopenharmony_ci // Test min/max with non-constant x, non-constant y. 2167cb93a386Sopenharmony_ci // (Whether x and y are varying or uniform shouldn't make any difference.) 2168cb93a386Sopenharmony_ci { 2169cb93a386Sopenharmony_ci skvm::Builder b; 2170cb93a386Sopenharmony_ci { 2171cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<float>(), 2172cb93a386Sopenharmony_ci mn = b.varying<float>(), 2173cb93a386Sopenharmony_ci mx = b.varying<float>(); 2174cb93a386Sopenharmony_ci 2175cb93a386Sopenharmony_ci skvm::F32 x = b.loadF(src), 2176cb93a386Sopenharmony_ci y = b.uniformF(b.uniform(), 0); 2177cb93a386Sopenharmony_ci 2178cb93a386Sopenharmony_ci b.storeF(mn, b.min(x,y)); 2179cb93a386Sopenharmony_ci b.storeF(mx, b.max(x,y)); 2180cb93a386Sopenharmony_ci } 2181cb93a386Sopenharmony_ci 2182cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2183cb93a386Sopenharmony_ci float mn[8], mx[8]; 2184cb93a386Sopenharmony_ci for (int i = 0; i < 8; i++) { 2185cb93a386Sopenharmony_ci // min() and max() everything with f[i]. 2186cb93a386Sopenharmony_ci program.eval(8, f,mn,mx, &f[i]); 2187cb93a386Sopenharmony_ci 2188cb93a386Sopenharmony_ci for (int j = 0; j < 8; j++) { 2189cb93a386Sopenharmony_ci REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i]))); 2190cb93a386Sopenharmony_ci REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i]))); 2191cb93a386Sopenharmony_ci } 2192cb93a386Sopenharmony_ci } 2193cb93a386Sopenharmony_ci }); 2194cb93a386Sopenharmony_ci } 2195cb93a386Sopenharmony_ci 2196cb93a386Sopenharmony_ci // Test each with constant on the right. 2197cb93a386Sopenharmony_ci for (int i = 0; i < 8; i++) { 2198cb93a386Sopenharmony_ci skvm::Builder b; 2199cb93a386Sopenharmony_ci { 2200cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<float>(), 2201cb93a386Sopenharmony_ci mn = b.varying<float>(), 2202cb93a386Sopenharmony_ci mx = b.varying<float>(); 2203cb93a386Sopenharmony_ci 2204cb93a386Sopenharmony_ci skvm::F32 x = b.loadF(src), 2205cb93a386Sopenharmony_ci y = b.splat(f[i]); 2206cb93a386Sopenharmony_ci 2207cb93a386Sopenharmony_ci b.storeF(mn, b.min(x,y)); 2208cb93a386Sopenharmony_ci b.storeF(mx, b.max(x,y)); 2209cb93a386Sopenharmony_ci } 2210cb93a386Sopenharmony_ci 2211cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2212cb93a386Sopenharmony_ci float mn[8], mx[8]; 2213cb93a386Sopenharmony_ci program.eval(8, f,mn,mx); 2214cb93a386Sopenharmony_ci for (int j = 0; j < 8; j++) { 2215cb93a386Sopenharmony_ci REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i]))); 2216cb93a386Sopenharmony_ci REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i]))); 2217cb93a386Sopenharmony_ci } 2218cb93a386Sopenharmony_ci }); 2219cb93a386Sopenharmony_ci } 2220cb93a386Sopenharmony_ci 2221cb93a386Sopenharmony_ci // Test each with constant on the left. 2222cb93a386Sopenharmony_ci for (int i = 0; i < 8; i++) { 2223cb93a386Sopenharmony_ci skvm::Builder b; 2224cb93a386Sopenharmony_ci { 2225cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<float>(), 2226cb93a386Sopenharmony_ci mn = b.varying<float>(), 2227cb93a386Sopenharmony_ci mx = b.varying<float>(); 2228cb93a386Sopenharmony_ci 2229cb93a386Sopenharmony_ci skvm::F32 x = b.splat(f[i]), 2230cb93a386Sopenharmony_ci y = b.loadF(src); 2231cb93a386Sopenharmony_ci 2232cb93a386Sopenharmony_ci b.storeF(mn, b.min(x,y)); 2233cb93a386Sopenharmony_ci b.storeF(mx, b.max(x,y)); 2234cb93a386Sopenharmony_ci } 2235cb93a386Sopenharmony_ci 2236cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2237cb93a386Sopenharmony_ci float mn[8], mx[8]; 2238cb93a386Sopenharmony_ci program.eval(8, f,mn,mx); 2239cb93a386Sopenharmony_ci for (int j = 0; j < 8; j++) { 2240cb93a386Sopenharmony_ci REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j]))); 2241cb93a386Sopenharmony_ci REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j]))); 2242cb93a386Sopenharmony_ci } 2243cb93a386Sopenharmony_ci }); 2244cb93a386Sopenharmony_ci } 2245cb93a386Sopenharmony_ci} 2246cb93a386Sopenharmony_ci 2247cb93a386Sopenharmony_ciDEF_TEST(SkVM_halfs, r) { 2248cb93a386Sopenharmony_ci const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000, 2249cb93a386Sopenharmony_ci 0xc400,0xb800,0xbc00,0xc000}; 2250cb93a386Sopenharmony_ci const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f, 2251cb93a386Sopenharmony_ci -4.0f,-0.5f,-1.0f,-2.0f}; 2252cb93a386Sopenharmony_ci { 2253cb93a386Sopenharmony_ci skvm::Builder b; 2254cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<uint16_t>(), 2255cb93a386Sopenharmony_ci dst = b.varying<float>(); 2256cb93a386Sopenharmony_ci b.storeF(dst, b.from_fp16(b.load16(src))); 2257cb93a386Sopenharmony_ci 2258cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2259cb93a386Sopenharmony_ci float dst[8]; 2260cb93a386Sopenharmony_ci program.eval(8, hs, dst); 2261cb93a386Sopenharmony_ci for (int i = 0; i < 8; i++) { 2262cb93a386Sopenharmony_ci REPORTER_ASSERT(r, dst[i] == fs[i]); 2263cb93a386Sopenharmony_ci } 2264cb93a386Sopenharmony_ci }); 2265cb93a386Sopenharmony_ci } 2266cb93a386Sopenharmony_ci { 2267cb93a386Sopenharmony_ci skvm::Builder b; 2268cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<float>(), 2269cb93a386Sopenharmony_ci dst = b.varying<uint16_t>(); 2270cb93a386Sopenharmony_ci b.store16(dst, b.to_fp16(b.loadF(src))); 2271cb93a386Sopenharmony_ci 2272cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2273cb93a386Sopenharmony_ci uint16_t dst[8]; 2274cb93a386Sopenharmony_ci program.eval(8, fs, dst); 2275cb93a386Sopenharmony_ci for (int i = 0; i < 8; i++) { 2276cb93a386Sopenharmony_ci REPORTER_ASSERT(r, dst[i] == hs[i]); 2277cb93a386Sopenharmony_ci } 2278cb93a386Sopenharmony_ci }); 2279cb93a386Sopenharmony_ci } 2280cb93a386Sopenharmony_ci} 2281cb93a386Sopenharmony_ci 2282cb93a386Sopenharmony_ciDEF_TEST(SkVM_64bit, r) { 2283cb93a386Sopenharmony_ci uint32_t lo[65], 2284cb93a386Sopenharmony_ci hi[65]; 2285cb93a386Sopenharmony_ci uint64_t wide[65]; 2286cb93a386Sopenharmony_ci for (int i = 0; i < 65; i++) { 2287cb93a386Sopenharmony_ci lo[i] = 2*i+0; 2288cb93a386Sopenharmony_ci hi[i] = 2*i+1; 2289cb93a386Sopenharmony_ci wide[i] = ((uint64_t)lo[i] << 0) 2290cb93a386Sopenharmony_ci | ((uint64_t)hi[i] << 32); 2291cb93a386Sopenharmony_ci } 2292cb93a386Sopenharmony_ci 2293cb93a386Sopenharmony_ci { 2294cb93a386Sopenharmony_ci skvm::Builder b; 2295cb93a386Sopenharmony_ci { 2296cb93a386Sopenharmony_ci skvm::Ptr widePtr = b.varying<uint64_t>(), 2297cb93a386Sopenharmony_ci loPtr = b.varying<int>(), 2298cb93a386Sopenharmony_ci hiPtr = b.varying<int>(); 2299cb93a386Sopenharmony_ci b.store32(loPtr, b.load64(widePtr, 0)); 2300cb93a386Sopenharmony_ci b.store32(hiPtr, b.load64(widePtr, 1)); 2301cb93a386Sopenharmony_ci } 2302cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2303cb93a386Sopenharmony_ci uint32_t l[65], h[65]; 2304cb93a386Sopenharmony_ci program.eval(65, wide,l,h); 2305cb93a386Sopenharmony_ci for (int i = 0; i < 65; i++) { 2306cb93a386Sopenharmony_ci REPORTER_ASSERT(r, l[i] == lo[i]); 2307cb93a386Sopenharmony_ci REPORTER_ASSERT(r, h[i] == hi[i]); 2308cb93a386Sopenharmony_ci } 2309cb93a386Sopenharmony_ci }); 2310cb93a386Sopenharmony_ci } 2311cb93a386Sopenharmony_ci 2312cb93a386Sopenharmony_ci { 2313cb93a386Sopenharmony_ci skvm::Builder b; 2314cb93a386Sopenharmony_ci { 2315cb93a386Sopenharmony_ci skvm::Ptr widePtr = b.varying<uint64_t>(), 2316cb93a386Sopenharmony_ci loPtr = b.varying<int>(), 2317cb93a386Sopenharmony_ci hiPtr = b.varying<int>(); 2318cb93a386Sopenharmony_ci b.store64(widePtr, b.load32(loPtr), b.load32(hiPtr)); 2319cb93a386Sopenharmony_ci } 2320cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2321cb93a386Sopenharmony_ci uint64_t w[65]; 2322cb93a386Sopenharmony_ci program.eval(65, w,lo,hi); 2323cb93a386Sopenharmony_ci for (int i = 0; i < 65; i++) { 2324cb93a386Sopenharmony_ci REPORTER_ASSERT(r, w[i] == wide[i]); 2325cb93a386Sopenharmony_ci } 2326cb93a386Sopenharmony_ci }); 2327cb93a386Sopenharmony_ci } 2328cb93a386Sopenharmony_ci} 2329cb93a386Sopenharmony_ci 2330cb93a386Sopenharmony_ciDEF_TEST(SkVM_128bit, r) { 2331cb93a386Sopenharmony_ci float floats[4*63]; 2332cb93a386Sopenharmony_ci uint8_t packed[4*63]; 2333cb93a386Sopenharmony_ci 2334cb93a386Sopenharmony_ci for (int i = 0; i < 4*63; i++) { 2335cb93a386Sopenharmony_ci floats[i] = i * (1/255.0f); 2336cb93a386Sopenharmony_ci } 2337cb93a386Sopenharmony_ci 2338cb93a386Sopenharmony_ci skvm::PixelFormat rgba_ffff = skvm::SkColorType_to_PixelFormat(kRGBA_F32_SkColorType), 2339cb93a386Sopenharmony_ci rgba_8888 = skvm::SkColorType_to_PixelFormat(kRGBA_8888_SkColorType); 2340cb93a386Sopenharmony_ci 2341cb93a386Sopenharmony_ci { // Convert RGBA F32 to RGBA 8888, testing 128-bit loads. 2342cb93a386Sopenharmony_ci skvm::Builder b; 2343cb93a386Sopenharmony_ci { 2344cb93a386Sopenharmony_ci skvm::Ptr dst = b.varying(4), 2345cb93a386Sopenharmony_ci src = b.varying(16); 2346cb93a386Sopenharmony_ci 2347cb93a386Sopenharmony_ci skvm::Color c = b.load(rgba_ffff, src); 2348cb93a386Sopenharmony_ci b.store(rgba_8888, dst, c); 2349cb93a386Sopenharmony_ci } 2350cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2351cb93a386Sopenharmony_ci memset(packed, 0, sizeof(packed)); 2352cb93a386Sopenharmony_ci program.eval(63, packed, floats); 2353cb93a386Sopenharmony_ci for (int i = 0; i < 4*63; i++) { 2354cb93a386Sopenharmony_ci REPORTER_ASSERT(r, packed[i] == i); 2355cb93a386Sopenharmony_ci } 2356cb93a386Sopenharmony_ci }); 2357cb93a386Sopenharmony_ci } 2358cb93a386Sopenharmony_ci 2359cb93a386Sopenharmony_ci 2360cb93a386Sopenharmony_ci { // Convert RGBA 8888 to RGBA F32, testing 128-bit stores. 2361cb93a386Sopenharmony_ci skvm::Builder b; 2362cb93a386Sopenharmony_ci { 2363cb93a386Sopenharmony_ci skvm::Ptr dst = b.varying(16), 2364cb93a386Sopenharmony_ci src = b.varying(4); 2365cb93a386Sopenharmony_ci 2366cb93a386Sopenharmony_ci skvm::Color c = b.load(rgba_8888, src); 2367cb93a386Sopenharmony_ci b.store(rgba_ffff, dst, c); 2368cb93a386Sopenharmony_ci } 2369cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2370cb93a386Sopenharmony_ci memset(floats, 0, sizeof(floats)); 2371cb93a386Sopenharmony_ci program.eval(63, floats, packed); 2372cb93a386Sopenharmony_ci for (int i = 0; i < 4*63; i++) { 2373cb93a386Sopenharmony_ci REPORTER_ASSERT(r, floats[i] == i * (1/255.0f)); 2374cb93a386Sopenharmony_ci } 2375cb93a386Sopenharmony_ci }); 2376cb93a386Sopenharmony_ci } 2377cb93a386Sopenharmony_ci 2378cb93a386Sopenharmony_ci} 2379cb93a386Sopenharmony_ci 2380cb93a386Sopenharmony_ciDEF_TEST(SkVM_is_NaN_is_finite, r) { 2381cb93a386Sopenharmony_ci skvm::Builder b; 2382cb93a386Sopenharmony_ci { 2383cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<float>(), 2384cb93a386Sopenharmony_ci nan = b.varying<int>(), 2385cb93a386Sopenharmony_ci fin = b.varying<int>(); 2386cb93a386Sopenharmony_ci b.store32(nan, is_NaN (b.loadF(src))); 2387cb93a386Sopenharmony_ci b.store32(fin, is_finite(b.loadF(src))); 2388cb93a386Sopenharmony_ci } 2389cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2390cb93a386Sopenharmony_ci // ±NaN, ±0, ±1, ±inf 2391cb93a386Sopenharmony_ci const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000, 2392cb93a386Sopenharmony_ci 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000}; 2393cb93a386Sopenharmony_ci uint32_t nan[8], fin[8]; 2394cb93a386Sopenharmony_ci program.eval(8, bits, nan,fin); 2395cb93a386Sopenharmony_ci 2396cb93a386Sopenharmony_ci for (int i = 0; i < 8; i++) { 2397cb93a386Sopenharmony_ci REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0)); 2398cb93a386Sopenharmony_ci REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 || 2399cb93a386Sopenharmony_ci i == 4 || i == 5) ? 0xffffffff : 0)); 2400cb93a386Sopenharmony_ci } 2401cb93a386Sopenharmony_ci }); 2402cb93a386Sopenharmony_ci} 2403cb93a386Sopenharmony_ci 2404cb93a386Sopenharmony_ciDEF_TEST(SkVM_args, r) { 2405cb93a386Sopenharmony_ci // Test we can handle at least six arguments. 2406cb93a386Sopenharmony_ci skvm::Builder b; 2407cb93a386Sopenharmony_ci { 2408cb93a386Sopenharmony_ci skvm::Ptr dst = b.varying<float>(), 2409cb93a386Sopenharmony_ci A = b.varying<float>(), 2410cb93a386Sopenharmony_ci B = b.varying<float>(), 2411cb93a386Sopenharmony_ci C = b.varying<float>(), 2412cb93a386Sopenharmony_ci D = b.varying<float>(), 2413cb93a386Sopenharmony_ci E = b.varying<float>(); 2414cb93a386Sopenharmony_ci storeF(dst, b.loadF(A) 2415cb93a386Sopenharmony_ci + b.loadF(B) 2416cb93a386Sopenharmony_ci + b.loadF(C) 2417cb93a386Sopenharmony_ci + b.loadF(D) 2418cb93a386Sopenharmony_ci + b.loadF(E)); 2419cb93a386Sopenharmony_ci } 2420cb93a386Sopenharmony_ci 2421cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2422cb93a386Sopenharmony_ci float dst[17],A[17],B[17],C[17],D[17],E[17]; 2423cb93a386Sopenharmony_ci for (int i = 0; i < 17; i++) { 2424cb93a386Sopenharmony_ci A[i] = B[i] = C[i] = D[i] = E[i] = (float)i; 2425cb93a386Sopenharmony_ci } 2426cb93a386Sopenharmony_ci program.eval(17, dst,A,B,C,D,E); 2427cb93a386Sopenharmony_ci for (int i = 0; i < 17; i++) { 2428cb93a386Sopenharmony_ci REPORTER_ASSERT(r, dst[i] == 5.0f*i); 2429cb93a386Sopenharmony_ci } 2430cb93a386Sopenharmony_ci }); 2431cb93a386Sopenharmony_ci} 2432cb93a386Sopenharmony_ci 2433cb93a386Sopenharmony_ciDEF_TEST(SkVM_badpack, reporter) { 2434cb93a386Sopenharmony_ci // Test case distilled from actual failing draw, 2435cb93a386Sopenharmony_ci // originally with a bad arm64 implementation of pack(). 2436cb93a386Sopenharmony_ci skvm::Builder p; 2437cb93a386Sopenharmony_ci { 2438cb93a386Sopenharmony_ci skvm::UPtr uniforms = p.uniform(); 2439cb93a386Sopenharmony_ci skvm::Ptr dst = p.varying<uint16_t>(); 2440cb93a386Sopenharmony_ci 2441cb93a386Sopenharmony_ci skvm::I32 r = round(p.uniformF(uniforms, 8) * 15), 2442cb93a386Sopenharmony_ci a = p.splat(0xf); 2443cb93a386Sopenharmony_ci 2444cb93a386Sopenharmony_ci skvm::I32 _4444 = p.splat(0); 2445cb93a386Sopenharmony_ci _4444 = pack(_4444, r, 12); 2446cb93a386Sopenharmony_ci _4444 = pack(_4444, a, 0); 2447cb93a386Sopenharmony_ci store16(dst, _4444); 2448cb93a386Sopenharmony_ci } 2449cb93a386Sopenharmony_ci 2450cb93a386Sopenharmony_ci test_jit_and_interpreter(p, [&](const skvm::Program& program){ 2451cb93a386Sopenharmony_ci const float uniforms[] = { 0.0f, 0.0f, 2452cb93a386Sopenharmony_ci 1.0f, 0.0f, 0.0f, 1.0f }; 2453cb93a386Sopenharmony_ci 2454cb93a386Sopenharmony_ci uint16_t dst[17] = {0}; 2455cb93a386Sopenharmony_ci program.eval(17, uniforms,dst); 2456cb93a386Sopenharmony_ci for (int i = 0; i < 17; i++) { 2457cb93a386Sopenharmony_ci REPORTER_ASSERT(reporter, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f); 2458cb93a386Sopenharmony_ci } 2459cb93a386Sopenharmony_ci }); 2460cb93a386Sopenharmony_ci} 2461cb93a386Sopenharmony_ci 2462cb93a386Sopenharmony_ciDEF_TEST(SkVM_features, r) { 2463cb93a386Sopenharmony_ci auto build_program = [](skvm::Builder* b) { 2464cb93a386Sopenharmony_ci skvm::F32 x = b->loadF(b->varying<float>()); 2465cb93a386Sopenharmony_ci b->storeF(b->varying<float>(), x*x+x); 2466cb93a386Sopenharmony_ci }; 2467cb93a386Sopenharmony_ci 2468cb93a386Sopenharmony_ci { // load-fma-store with FMA available. 2469cb93a386Sopenharmony_ci skvm::Features features; 2470cb93a386Sopenharmony_ci features.fma = true; 2471cb93a386Sopenharmony_ci skvm::Builder b(features); 2472cb93a386Sopenharmony_ci build_program(&b); 2473cb93a386Sopenharmony_ci REPORTER_ASSERT(r, b.optimize().size() == 3); 2474cb93a386Sopenharmony_ci } 2475cb93a386Sopenharmony_ci 2476cb93a386Sopenharmony_ci { // load-mul-add-store without FMA. 2477cb93a386Sopenharmony_ci skvm::Features features; 2478cb93a386Sopenharmony_ci features.fma = false; 2479cb93a386Sopenharmony_ci skvm::Builder b(features); 2480cb93a386Sopenharmony_ci build_program(&b); 2481cb93a386Sopenharmony_ci REPORTER_ASSERT(r, b.optimize().size() == 4); 2482cb93a386Sopenharmony_ci } 2483cb93a386Sopenharmony_ci 2484cb93a386Sopenharmony_ci { // Auto-detected, could be either. 2485cb93a386Sopenharmony_ci skvm::Builder b; 2486cb93a386Sopenharmony_ci build_program(&b); 2487cb93a386Sopenharmony_ci REPORTER_ASSERT(r, b.optimize().size() == 3 2488cb93a386Sopenharmony_ci || b.optimize().size() == 4); 2489cb93a386Sopenharmony_ci } 2490cb93a386Sopenharmony_ci} 2491cb93a386Sopenharmony_ci 2492cb93a386Sopenharmony_ciDEF_TEST(SkVM_gather_can_hoist, r) { 2493cb93a386Sopenharmony_ci // A gather instruction isn't necessarily varying... it's whatever its index is. 2494cb93a386Sopenharmony_ci // First a typical gather scenario with varying index. 2495cb93a386Sopenharmony_ci { 2496cb93a386Sopenharmony_ci skvm::Builder b; 2497cb93a386Sopenharmony_ci skvm::UPtr uniforms = b.uniform(); 2498cb93a386Sopenharmony_ci skvm::Ptr buf = b.varying<int>(); 2499cb93a386Sopenharmony_ci skvm::I32 ix = b.load32(buf); 2500cb93a386Sopenharmony_ci b.store32(buf, b.gather32(uniforms,0, ix)); 2501cb93a386Sopenharmony_ci 2502cb93a386Sopenharmony_ci skvm::Program p = b.done(); 2503cb93a386Sopenharmony_ci 2504cb93a386Sopenharmony_ci // ix is varying, so the gather is too. 2505cb93a386Sopenharmony_ci // 2506cb93a386Sopenharmony_ci // loop: 2507cb93a386Sopenharmony_ci // v0 = load32 buf 2508cb93a386Sopenharmony_ci // v1 = gather32 uniforms+0 v0 2509cb93a386Sopenharmony_ci // store32 buf v1 2510cb93a386Sopenharmony_ci REPORTER_ASSERT(r, p.instructions().size() == 3); 2511cb93a386Sopenharmony_ci REPORTER_ASSERT(r, p.loop() == 0); 2512cb93a386Sopenharmony_ci } 2513cb93a386Sopenharmony_ci 2514cb93a386Sopenharmony_ci // Now the same but with a uniform index instead. 2515cb93a386Sopenharmony_ci { 2516cb93a386Sopenharmony_ci skvm::Builder b; 2517cb93a386Sopenharmony_ci skvm::UPtr uniforms = b.uniform(); 2518cb93a386Sopenharmony_ci skvm::Ptr buf = b.varying<int>(); 2519cb93a386Sopenharmony_ci skvm::I32 ix = b.uniform32(uniforms,8); 2520cb93a386Sopenharmony_ci b.store32(buf, b.gather32(uniforms,0, ix)); 2521cb93a386Sopenharmony_ci 2522cb93a386Sopenharmony_ci skvm::Program p = b.done(); 2523cb93a386Sopenharmony_ci 2524cb93a386Sopenharmony_ci // ix is uniform, so the gather is too. 2525cb93a386Sopenharmony_ci // 2526cb93a386Sopenharmony_ci // v0 = uniform32 uniforms+8 2527cb93a386Sopenharmony_ci // v1 = gather32 uniforms+0 v0 2528cb93a386Sopenharmony_ci // loop: 2529cb93a386Sopenharmony_ci // store32 buf v1 2530cb93a386Sopenharmony_ci REPORTER_ASSERT(r, p.instructions().size() == 3); 2531cb93a386Sopenharmony_ci REPORTER_ASSERT(r, p.loop() == 2); 2532cb93a386Sopenharmony_ci } 2533cb93a386Sopenharmony_ci} 2534cb93a386Sopenharmony_ci 2535cb93a386Sopenharmony_ciDEF_TEST(SkVM_dont_dedup_loads, r) { 2536cb93a386Sopenharmony_ci // We've been assuming that all Ops with the same arguments produce the same value 2537cb93a386Sopenharmony_ci // and deduplicating them, which results in a simple common subexpression eliminator. 2538cb93a386Sopenharmony_ci // 2539cb93a386Sopenharmony_ci // But we can't soundly dedup two identical loads with a store between. 2540cb93a386Sopenharmony_ci // If we dedup the loads in this test program it will always increment by 1, not K. 2541cb93a386Sopenharmony_ci constexpr int K = 2; 2542cb93a386Sopenharmony_ci skvm::Builder b; 2543cb93a386Sopenharmony_ci { 2544cb93a386Sopenharmony_ci skvm::Ptr buf = b.varying<int>(); 2545cb93a386Sopenharmony_ci for (int i = 0; i < K; i++) { 2546cb93a386Sopenharmony_ci b.store32(buf, b.load32(buf) + 1); 2547cb93a386Sopenharmony_ci } 2548cb93a386Sopenharmony_ci } 2549cb93a386Sopenharmony_ci 2550cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2551cb93a386Sopenharmony_ci int buf[] = { 0,1,2,3,4 }; 2552cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(buf), buf); 2553cb93a386Sopenharmony_ci for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) { 2554cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[i] == i+K); 2555cb93a386Sopenharmony_ci } 2556cb93a386Sopenharmony_ci }); 2557cb93a386Sopenharmony_ci} 2558cb93a386Sopenharmony_ci 2559cb93a386Sopenharmony_ciDEF_TEST(SkVM_dont_dedup_stores, r) { 2560cb93a386Sopenharmony_ci // Following a similar line of reasoning to SkVM_dont_dedup_loads, 2561cb93a386Sopenharmony_ci // we cannot dedup stores either. A different store between two identical stores 2562cb93a386Sopenharmony_ci // will invalidate the first store, meaning we do need to reissue that store operation. 2563cb93a386Sopenharmony_ci skvm::Builder b; 2564cb93a386Sopenharmony_ci { 2565cb93a386Sopenharmony_ci skvm::Ptr buf = b.varying<int>(); 2566cb93a386Sopenharmony_ci b.store32(buf, b.splat(4)); 2567cb93a386Sopenharmony_ci b.store32(buf, b.splat(5)); 2568cb93a386Sopenharmony_ci b.store32(buf, b.splat(4)); // If we dedup'd, we'd skip this store. 2569cb93a386Sopenharmony_ci } 2570cb93a386Sopenharmony_ci 2571cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2572cb93a386Sopenharmony_ci int buf[42]; 2573cb93a386Sopenharmony_ci program.eval(SK_ARRAY_COUNT(buf), buf); 2574cb93a386Sopenharmony_ci for (int x : buf) { 2575cb93a386Sopenharmony_ci REPORTER_ASSERT(r, x == 4); 2576cb93a386Sopenharmony_ci } 2577cb93a386Sopenharmony_ci }); 2578cb93a386Sopenharmony_ci} 2579cb93a386Sopenharmony_ci 2580cb93a386Sopenharmony_ciDEF_TEST(SkVM_fast_mul, r) { 2581cb93a386Sopenharmony_ci skvm::Builder b; 2582cb93a386Sopenharmony_ci { 2583cb93a386Sopenharmony_ci skvm::Ptr src = b.varying<float>(), 2584cb93a386Sopenharmony_ci fast = b.varying<float>(), 2585cb93a386Sopenharmony_ci slow = b.varying<float>(); 2586cb93a386Sopenharmony_ci skvm::F32 x = b.loadF(src); 2587cb93a386Sopenharmony_ci b.storeF(fast, fast_mul(0.0f, x)); 2588cb93a386Sopenharmony_ci b.storeF(slow, 0.0f * x); 2589cb93a386Sopenharmony_ci } 2590cb93a386Sopenharmony_ci test_jit_and_interpreter(b, [&](const skvm::Program& program){ 2591cb93a386Sopenharmony_ci const uint32_t bits[] = { 2592cb93a386Sopenharmony_ci 0x0000'0000, 0x8000'0000, //±0 2593cb93a386Sopenharmony_ci 0x3f80'0000, 0xbf80'0000, //±1 2594cb93a386Sopenharmony_ci 0x7f80'0000, 0xff80'0000, //±inf 2595cb93a386Sopenharmony_ci 0x7f80'0001, 0xff80'0001, //±NaN 2596cb93a386Sopenharmony_ci }; 2597cb93a386Sopenharmony_ci float fast[8], 2598cb93a386Sopenharmony_ci slow[8]; 2599cb93a386Sopenharmony_ci program.eval(8,bits,fast,slow); 2600cb93a386Sopenharmony_ci 2601cb93a386Sopenharmony_ci for (int i = 0; i < 8; i++) { 2602cb93a386Sopenharmony_ci REPORTER_ASSERT(r, fast[i] == 0.0f); 2603cb93a386Sopenharmony_ci 2604cb93a386Sopenharmony_ci if (i < 4) { 2605cb93a386Sopenharmony_ci REPORTER_ASSERT(r, slow[i] == 0.0f); 2606cb93a386Sopenharmony_ci } else { 2607cb93a386Sopenharmony_ci REPORTER_ASSERT(r, isnan(slow[i])); 2608cb93a386Sopenharmony_ci } 2609cb93a386Sopenharmony_ci } 2610cb93a386Sopenharmony_ci }); 2611cb93a386Sopenharmony_ci} 2612