xref: /third_party/skia/tests/SkVMTest.cpp (revision cb93a386)
1cb93a386Sopenharmony_ci/*
2cb93a386Sopenharmony_ci * Copyright 2019 Google LLC
3cb93a386Sopenharmony_ci *
4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be
5cb93a386Sopenharmony_ci * found in the LICENSE file.
6cb93a386Sopenharmony_ci */
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci#include "include/core/SkColorPriv.h"
9cb93a386Sopenharmony_ci#include "include/private/SkColorData.h"
10cb93a386Sopenharmony_ci#include "src/core/SkCpu.h"
11cb93a386Sopenharmony_ci#include "src/core/SkMSAN.h"
12cb93a386Sopenharmony_ci#include "src/core/SkVM.h"
13cb93a386Sopenharmony_ci#include "tests/Test.h"
14cb93a386Sopenharmony_ci
15cb93a386Sopenharmony_citemplate <typename Fn>
16cb93a386Sopenharmony_cistatic void test_jit_and_interpreter(const skvm::Builder& b, Fn&& test) {
17cb93a386Sopenharmony_ci    skvm::Program p = b.done();
18cb93a386Sopenharmony_ci    test(p);
19cb93a386Sopenharmony_ci    if (p.hasJIT()) {
20cb93a386Sopenharmony_ci        test(b.done(/*debug_name=*/nullptr, /*allow_jit=*/false));
21cb93a386Sopenharmony_ci    }
22cb93a386Sopenharmony_ci}
23cb93a386Sopenharmony_ci
24cb93a386Sopenharmony_ciDEF_TEST(SkVM_eliminate_dead_code, r) {
25cb93a386Sopenharmony_ci    skvm::Builder b;
26cb93a386Sopenharmony_ci    {
27cb93a386Sopenharmony_ci        skvm::Ptr arg = b.varying<int>();
28cb93a386Sopenharmony_ci        skvm::I32 l = b.load32(arg);
29cb93a386Sopenharmony_ci        skvm::I32 a = b.add(l, l);
30cb93a386Sopenharmony_ci        b.add(a, b.splat(7));
31cb93a386Sopenharmony_ci    }
32cb93a386Sopenharmony_ci
33cb93a386Sopenharmony_ci    std::vector<skvm::Instruction> program = b.program();
34cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, program.size() == 4);
35cb93a386Sopenharmony_ci
36cb93a386Sopenharmony_ci    program = skvm::eliminate_dead_code(program);
37cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, program.size() == 0);
38cb93a386Sopenharmony_ci}
39cb93a386Sopenharmony_ci
40cb93a386Sopenharmony_ciDEF_TEST(SkVM_Pointless, r) {
41cb93a386Sopenharmony_ci    // Let's build a program with no memory arguments.
42cb93a386Sopenharmony_ci    // It should all be pegged as dead code, but we should be able to "run" it.
43cb93a386Sopenharmony_ci    skvm::Builder b;
44cb93a386Sopenharmony_ci    {
45cb93a386Sopenharmony_ci        b.add(b.splat(5.0f),
46cb93a386Sopenharmony_ci              b.splat(4.0f));
47cb93a386Sopenharmony_ci    }
48cb93a386Sopenharmony_ci
49cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
50cb93a386Sopenharmony_ci        for (int N = 0; N < 64; N++) {
51cb93a386Sopenharmony_ci            program.eval(N);
52cb93a386Sopenharmony_ci        }
53cb93a386Sopenharmony_ci    });
54cb93a386Sopenharmony_ci
55cb93a386Sopenharmony_ci    for (const skvm::OptimizedInstruction& inst : b.optimize()) {
56cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
57cb93a386Sopenharmony_ci    }
58cb93a386Sopenharmony_ci}
59cb93a386Sopenharmony_ci
60cb93a386Sopenharmony_ciDEF_TEST(SkVM_memset, r) {
61cb93a386Sopenharmony_ci    skvm::Builder b;
62cb93a386Sopenharmony_ci    b.store32(b.varying<int>(), b.splat(42));
63cb93a386Sopenharmony_ci
64cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& p) {
65cb93a386Sopenharmony_ci        int buf[18];
66cb93a386Sopenharmony_ci        buf[17] = 47;
67cb93a386Sopenharmony_ci
68cb93a386Sopenharmony_ci        p.eval(17, buf);
69cb93a386Sopenharmony_ci        for (int i = 0; i < 17; i++) {
70cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] == 42);
71cb93a386Sopenharmony_ci        }
72cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[17] == 47);
73cb93a386Sopenharmony_ci    });
74cb93a386Sopenharmony_ci}
75cb93a386Sopenharmony_ci
76cb93a386Sopenharmony_ciDEF_TEST(SkVM_memcpy, r) {
77cb93a386Sopenharmony_ci    skvm::Builder b;
78cb93a386Sopenharmony_ci    {
79cb93a386Sopenharmony_ci        auto src = b.varying<int>(),
80cb93a386Sopenharmony_ci             dst = b.varying<int>();
81cb93a386Sopenharmony_ci        b.store32(dst, b.load32(src));
82cb93a386Sopenharmony_ci    }
83cb93a386Sopenharmony_ci
84cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& p) {
85cb93a386Sopenharmony_ci        int src[] = {1,2,3,4,5,6,7,8,9},
86cb93a386Sopenharmony_ci            dst[] = {0,0,0,0,0,0,0,0,0};
87cb93a386Sopenharmony_ci
88cb93a386Sopenharmony_ci        p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
89cb93a386Sopenharmony_ci        for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
90cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, dst[i] == src[i]);
91cb93a386Sopenharmony_ci        }
92cb93a386Sopenharmony_ci        size_t i = SK_ARRAY_COUNT(src)-1;
93cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, dst[i] == 0);
94cb93a386Sopenharmony_ci    });
95cb93a386Sopenharmony_ci}
96cb93a386Sopenharmony_ci
97cb93a386Sopenharmony_ciDEF_TEST(SkVM_allow_jit, r) {
98cb93a386Sopenharmony_ci    skvm::Builder b;
99cb93a386Sopenharmony_ci    {
100cb93a386Sopenharmony_ci        auto src = b.varying<int>(),
101cb93a386Sopenharmony_ci             dst = b.varying<int>();
102cb93a386Sopenharmony_ci        b.store32(dst, b.load32(src));
103cb93a386Sopenharmony_ci    }
104cb93a386Sopenharmony_ci
105cb93a386Sopenharmony_ci    if (b.done("test-allow_jit", /*allow_jit=*/true).hasJIT()) {
106cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, !b.done("", false).hasJIT());
107cb93a386Sopenharmony_ci    }
108cb93a386Sopenharmony_ci}
109cb93a386Sopenharmony_ci
110cb93a386Sopenharmony_ciDEF_TEST(SkVM_LoopCounts, r) {
111cb93a386Sopenharmony_ci    // Make sure we cover all the exact N we want.
112cb93a386Sopenharmony_ci
113cb93a386Sopenharmony_ci    // buf[i] += 1
114cb93a386Sopenharmony_ci    skvm::Builder b;
115cb93a386Sopenharmony_ci    skvm::Ptr arg = b.varying<int>();
116cb93a386Sopenharmony_ci    b.store32(arg,
117cb93a386Sopenharmony_ci              b.add(b.splat(1),
118cb93a386Sopenharmony_ci                    b.load32(arg)));
119cb93a386Sopenharmony_ci
120cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
121cb93a386Sopenharmony_ci        int buf[64];
122cb93a386Sopenharmony_ci        for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
123cb93a386Sopenharmony_ci            for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
124cb93a386Sopenharmony_ci                buf[i] = i;
125cb93a386Sopenharmony_ci            }
126cb93a386Sopenharmony_ci            program.eval(N, buf);
127cb93a386Sopenharmony_ci
128cb93a386Sopenharmony_ci            for (int i = 0; i < N; i++) {
129cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, buf[i] == i+1);
130cb93a386Sopenharmony_ci            }
131cb93a386Sopenharmony_ci            for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
132cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, buf[i] == i);
133cb93a386Sopenharmony_ci            }
134cb93a386Sopenharmony_ci        }
135cb93a386Sopenharmony_ci    });
136cb93a386Sopenharmony_ci}
137cb93a386Sopenharmony_ci
138cb93a386Sopenharmony_ciDEF_TEST(SkVM_gather32, r) {
139cb93a386Sopenharmony_ci    skvm::Builder b;
140cb93a386Sopenharmony_ci    {
141cb93a386Sopenharmony_ci        skvm::UPtr uniforms = b.uniform();
142cb93a386Sopenharmony_ci        skvm::Ptr buf = b.varying<int>();
143cb93a386Sopenharmony_ci        skvm::I32 x = b.load32(buf);
144cb93a386Sopenharmony_ci        b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
145cb93a386Sopenharmony_ci    }
146cb93a386Sopenharmony_ci
147cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
148cb93a386Sopenharmony_ci        const int img[] = {12,34,56,78, 90,98,76,54};
149cb93a386Sopenharmony_ci
150cb93a386Sopenharmony_ci        int buf[20];
151cb93a386Sopenharmony_ci        for (int i = 0; i < 20; i++) {
152cb93a386Sopenharmony_ci            buf[i] = i;
153cb93a386Sopenharmony_ci        }
154cb93a386Sopenharmony_ci
155cb93a386Sopenharmony_ci        struct Uniforms {
156cb93a386Sopenharmony_ci            const int* img;
157cb93a386Sopenharmony_ci        } uniforms{img};
158cb93a386Sopenharmony_ci
159cb93a386Sopenharmony_ci        program.eval(20, &uniforms, buf);
160cb93a386Sopenharmony_ci        int i = 0;
161cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 12); i++;
162cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 34); i++;
163cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 56); i++;
164cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 78); i++;
165cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 90); i++;
166cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 98); i++;
167cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 76); i++;
168cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 54); i++;
169cb93a386Sopenharmony_ci
170cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 12); i++;
171cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 34); i++;
172cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 56); i++;
173cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 78); i++;
174cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 90); i++;
175cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 98); i++;
176cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 76); i++;
177cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 54); i++;
178cb93a386Sopenharmony_ci
179cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 12); i++;
180cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 34); i++;
181cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 56); i++;
182cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf[i] == 78); i++;
183cb93a386Sopenharmony_ci    });
184cb93a386Sopenharmony_ci}
185cb93a386Sopenharmony_ci
186cb93a386Sopenharmony_ciDEF_TEST(SkVM_gathers, r) {
187cb93a386Sopenharmony_ci    skvm::Builder b;
188cb93a386Sopenharmony_ci    {
189cb93a386Sopenharmony_ci        skvm::UPtr uniforms = b.uniform();
190cb93a386Sopenharmony_ci        skvm::Ptr buf32    = b.varying<int>(),
191cb93a386Sopenharmony_ci                  buf16    = b.varying<uint16_t>(),
192cb93a386Sopenharmony_ci                  buf8     = b.varying<uint8_t>();
193cb93a386Sopenharmony_ci
194cb93a386Sopenharmony_ci        skvm::I32 x = b.load32(buf32);
195cb93a386Sopenharmony_ci
196cb93a386Sopenharmony_ci        b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
197cb93a386Sopenharmony_ci        b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
198cb93a386Sopenharmony_ci        b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
199cb93a386Sopenharmony_ci    }
200cb93a386Sopenharmony_ci
201cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
202cb93a386Sopenharmony_ci        const int img[] = {12,34,56,78, 90,98,76,54};
203cb93a386Sopenharmony_ci
204cb93a386Sopenharmony_ci        constexpr int N = 20;
205cb93a386Sopenharmony_ci        int      buf32[N];
206cb93a386Sopenharmony_ci        uint16_t buf16[N];
207cb93a386Sopenharmony_ci        uint8_t  buf8 [N];
208cb93a386Sopenharmony_ci
209cb93a386Sopenharmony_ci        for (int i = 0; i < 20; i++) {
210cb93a386Sopenharmony_ci            buf32[i] = i;
211cb93a386Sopenharmony_ci        }
212cb93a386Sopenharmony_ci
213cb93a386Sopenharmony_ci        struct Uniforms {
214cb93a386Sopenharmony_ci            const int* img;
215cb93a386Sopenharmony_ci        } uniforms{img};
216cb93a386Sopenharmony_ci
217cb93a386Sopenharmony_ci        program.eval(N, &uniforms, buf32, buf16, buf8);
218cb93a386Sopenharmony_ci        int i = 0;
219cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
220cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] ==  0 && buf8[i] ==  0); i++;
221cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] ==  0); i++;
222cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] ==  0 && buf8[i] ==  0); i++;
223cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
224cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] ==  0 && buf8[i] ==  0); i++;
225cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] ==  0); i++;
226cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] ==  0 && buf8[i] ==  0); i++;
227cb93a386Sopenharmony_ci
228cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
229cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] ==  0 && buf8[i] ==  0); i++;
230cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] ==  0); i++;
231cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] ==  0 && buf8[i] ==  0); i++;
232cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
233cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] ==  0 && buf8[i] ==  0); i++;
234cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] ==  0); i++;
235cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] ==  0 && buf8[i] ==  0); i++;
236cb93a386Sopenharmony_ci
237cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
238cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] ==  0 && buf8[i] ==  0); i++;
239cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] ==  0); i++;
240cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] ==  0 && buf8[i] ==  0); i++;
241cb93a386Sopenharmony_ci    });
242cb93a386Sopenharmony_ci}
243cb93a386Sopenharmony_ci
244cb93a386Sopenharmony_ciDEF_TEST(SkVM_gathers2, r) {
245cb93a386Sopenharmony_ci    skvm::Builder b;
246cb93a386Sopenharmony_ci    {
247cb93a386Sopenharmony_ci        skvm::UPtr uniforms = b.uniform();
248cb93a386Sopenharmony_ci        skvm::Ptr buf32    = b.varying<int>(),
249cb93a386Sopenharmony_ci                  buf16    = b.varying<uint16_t>(),
250cb93a386Sopenharmony_ci                  buf8     = b.varying<uint8_t>();
251cb93a386Sopenharmony_ci
252cb93a386Sopenharmony_ci        skvm::I32 x = b.load32(buf32);
253cb93a386Sopenharmony_ci
254cb93a386Sopenharmony_ci        b.store32(buf32, b.gather32(uniforms,0, x));
255cb93a386Sopenharmony_ci        b.store16(buf16, b.gather16(uniforms,0, x));
256cb93a386Sopenharmony_ci        b.store8 (buf8 , b.gather8 (uniforms,0, x));
257cb93a386Sopenharmony_ci    }
258cb93a386Sopenharmony_ci
259cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
260cb93a386Sopenharmony_ci        uint8_t img[256];
261cb93a386Sopenharmony_ci        for (int i = 0; i < 256; i++) {
262cb93a386Sopenharmony_ci            img[i] = i;
263cb93a386Sopenharmony_ci        }
264cb93a386Sopenharmony_ci
265cb93a386Sopenharmony_ci        int      buf32[64];
266cb93a386Sopenharmony_ci        uint16_t buf16[64];
267cb93a386Sopenharmony_ci        uint8_t  buf8 [64];
268cb93a386Sopenharmony_ci
269cb93a386Sopenharmony_ci        for (int i = 0; i < 64; i++) {
270cb93a386Sopenharmony_ci            buf32[i] = (i*47)&63;
271cb93a386Sopenharmony_ci            buf16[i] = 0;
272cb93a386Sopenharmony_ci            buf8 [i] = 0;
273cb93a386Sopenharmony_ci        }
274cb93a386Sopenharmony_ci
275cb93a386Sopenharmony_ci        struct Uniforms {
276cb93a386Sopenharmony_ci            const uint8_t* img;
277cb93a386Sopenharmony_ci        } uniforms{img};
278cb93a386Sopenharmony_ci
279cb93a386Sopenharmony_ci        program.eval(64, &uniforms, buf32, buf16, buf8);
280cb93a386Sopenharmony_ci
281cb93a386Sopenharmony_ci        for (int i = 0; i < 64; i++) {
282cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf8[i] == ((i*47)&63));  // 0,47,30,13,60,...
283cb93a386Sopenharmony_ci        }
284cb93a386Sopenharmony_ci
285cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
286cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf16[63] == 0x2322);
287cb93a386Sopenharmony_ci
288cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
289cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, buf32[63] == 0x47464544);
290cb93a386Sopenharmony_ci    });
291cb93a386Sopenharmony_ci}
292cb93a386Sopenharmony_ci
293cb93a386Sopenharmony_ciDEF_TEST(SkVM_bitops, r) {
294cb93a386Sopenharmony_ci    skvm::Builder b;
295cb93a386Sopenharmony_ci    {
296cb93a386Sopenharmony_ci        skvm::Ptr ptr = b.varying<int>();
297cb93a386Sopenharmony_ci
298cb93a386Sopenharmony_ci        skvm::I32 x = b.load32(ptr);
299cb93a386Sopenharmony_ci
300cb93a386Sopenharmony_ci        x = b.bit_and  (x, b.splat(0xf1));  // 0x40
301cb93a386Sopenharmony_ci        x = b.bit_or   (x, b.splat(0x80));  // 0xc0
302cb93a386Sopenharmony_ci        x = b.bit_xor  (x, b.splat(0xfe));  // 0x3e
303cb93a386Sopenharmony_ci        x = b.bit_clear(x, b.splat(0x30));  // 0x0e
304cb93a386Sopenharmony_ci
305cb93a386Sopenharmony_ci        x = b.shl(x, 28);  // 0xe000'0000
306cb93a386Sopenharmony_ci        x = b.sra(x, 28);  // 0xffff'fffe
307cb93a386Sopenharmony_ci        x = b.shr(x,  1);  // 0x7fff'ffff
308cb93a386Sopenharmony_ci
309cb93a386Sopenharmony_ci        b.store32(ptr, x);
310cb93a386Sopenharmony_ci    }
311cb93a386Sopenharmony_ci
312cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
313cb93a386Sopenharmony_ci        int x = 0x42;
314cb93a386Sopenharmony_ci        program.eval(1, &x);
315cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, x == 0x7fff'ffff);
316cb93a386Sopenharmony_ci    });
317cb93a386Sopenharmony_ci}
318cb93a386Sopenharmony_ci
319cb93a386Sopenharmony_ciDEF_TEST(SkVM_select_is_NaN, r) {
320cb93a386Sopenharmony_ci    skvm::Builder b;
321cb93a386Sopenharmony_ci    {
322cb93a386Sopenharmony_ci        skvm::Ptr src = b.varying<float>(),
323cb93a386Sopenharmony_ci                  dst = b.varying<float>();
324cb93a386Sopenharmony_ci
325cb93a386Sopenharmony_ci        skvm::F32 x = b.loadF(src);
326cb93a386Sopenharmony_ci        x = select(is_NaN(x), b.splat(0.0f)
327cb93a386Sopenharmony_ci                            , x);
328cb93a386Sopenharmony_ci        b.storeF(dst, x);
329cb93a386Sopenharmony_ci    }
330cb93a386Sopenharmony_ci
331cb93a386Sopenharmony_ci    std::vector<skvm::OptimizedInstruction> program = b.optimize();
332cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, program.size() == 4);
333cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
334cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
335cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
336cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
337cb93a386Sopenharmony_ci
338cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
339cb93a386Sopenharmony_ci        // ±NaN, ±0, ±1, ±inf
340cb93a386Sopenharmony_ci        uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
341cb93a386Sopenharmony_ci                          0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
342cb93a386Sopenharmony_ci        uint32_t dst[SK_ARRAY_COUNT(src)];
343cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(src), src, dst);
344cb93a386Sopenharmony_ci
345cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
346cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
347cb93a386Sopenharmony_ci        }
348cb93a386Sopenharmony_ci    });
349cb93a386Sopenharmony_ci}
350cb93a386Sopenharmony_ci
351cb93a386Sopenharmony_ciDEF_TEST(SkVM_f32, r) {
352cb93a386Sopenharmony_ci    skvm::Builder b;
353cb93a386Sopenharmony_ci    {
354cb93a386Sopenharmony_ci        skvm::Ptr arg = b.varying<float>();
355cb93a386Sopenharmony_ci
356cb93a386Sopenharmony_ci        skvm::F32 x = b.loadF(arg),
357cb93a386Sopenharmony_ci                  y = b.add(x,x),   // y = 2x
358cb93a386Sopenharmony_ci                  z = b.sub(y,x),   // z = 2x-x = x
359cb93a386Sopenharmony_ci                  w = b.div(z,x);   // w = x/x = 1
360cb93a386Sopenharmony_ci        b.storeF(arg, w);
361cb93a386Sopenharmony_ci    }
362cb93a386Sopenharmony_ci
363cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
364cb93a386Sopenharmony_ci        float buf[] = { 1,2,3,4,5,6,7,8,9 };
365cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(buf), buf);
366cb93a386Sopenharmony_ci        for (float v : buf) {
367cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, v == 1.0f);
368cb93a386Sopenharmony_ci        }
369cb93a386Sopenharmony_ci    });
370cb93a386Sopenharmony_ci}
371cb93a386Sopenharmony_ci
372cb93a386Sopenharmony_ciDEF_TEST(SkVM_cmp_i32, r) {
373cb93a386Sopenharmony_ci    skvm::Builder b;
374cb93a386Sopenharmony_ci    {
375cb93a386Sopenharmony_ci        skvm::I32 x = b.load32(b.varying<int>());
376cb93a386Sopenharmony_ci
377cb93a386Sopenharmony_ci        auto to_bit = [&](int shift, skvm::I32 mask) {
378cb93a386Sopenharmony_ci            return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
379cb93a386Sopenharmony_ci        };
380cb93a386Sopenharmony_ci
381cb93a386Sopenharmony_ci        skvm::I32 m = b.splat(0);
382cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
383cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
384cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
385cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
386cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
387cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
388cb93a386Sopenharmony_ci
389cb93a386Sopenharmony_ci        b.store32(b.varying<int>(), m);
390cb93a386Sopenharmony_ci    }
391cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
392cb93a386Sopenharmony_ci        int in[] = { 0,1,2,3,4,5,6,7,8,9 };
393cb93a386Sopenharmony_ci        int out[SK_ARRAY_COUNT(in)];
394cb93a386Sopenharmony_ci
395cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(in), in, out);
396cb93a386Sopenharmony_ci
397cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[0] == 0b001111);
398cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[1] == 0b001100);
399cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[2] == 0b001010);
400cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[3] == 0b001010);
401cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[4] == 0b000010);
402cb93a386Sopenharmony_ci        for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
403cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, out[i] == 0b110010);
404cb93a386Sopenharmony_ci        }
405cb93a386Sopenharmony_ci    });
406cb93a386Sopenharmony_ci}
407cb93a386Sopenharmony_ci
408cb93a386Sopenharmony_ciDEF_TEST(SkVM_cmp_f32, r) {
409cb93a386Sopenharmony_ci    skvm::Builder b;
410cb93a386Sopenharmony_ci    {
411cb93a386Sopenharmony_ci        skvm::F32 x = b.loadF(b.varying<float>());
412cb93a386Sopenharmony_ci
413cb93a386Sopenharmony_ci        auto to_bit = [&](int shift, skvm::I32 mask) {
414cb93a386Sopenharmony_ci            return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
415cb93a386Sopenharmony_ci        };
416cb93a386Sopenharmony_ci
417cb93a386Sopenharmony_ci        skvm::I32 m = b.splat(0);
418cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
419cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
420cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
421cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
422cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
423cb93a386Sopenharmony_ci        m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
424cb93a386Sopenharmony_ci
425cb93a386Sopenharmony_ci        b.store32(b.varying<int>(), m);
426cb93a386Sopenharmony_ci    }
427cb93a386Sopenharmony_ci
428cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
429cb93a386Sopenharmony_ci        float in[] = { 0,1,2,3,4,5,6,7,8,9 };
430cb93a386Sopenharmony_ci        int out[SK_ARRAY_COUNT(in)];
431cb93a386Sopenharmony_ci
432cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(in), in, out);
433cb93a386Sopenharmony_ci
434cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[0] == 0b001111);
435cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[1] == 0b001100);
436cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[2] == 0b001010);
437cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[3] == 0b001010);
438cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, out[4] == 0b000010);
439cb93a386Sopenharmony_ci        for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
440cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, out[i] == 0b110010);
441cb93a386Sopenharmony_ci        }
442cb93a386Sopenharmony_ci    });
443cb93a386Sopenharmony_ci}
444cb93a386Sopenharmony_ci
445cb93a386Sopenharmony_ciDEF_TEST(SkVM_index, r) {
446cb93a386Sopenharmony_ci    skvm::Builder b;
447cb93a386Sopenharmony_ci    b.store32(b.varying<int>(), b.index());
448cb93a386Sopenharmony_ci
449cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
450cb93a386Sopenharmony_ci        int buf[23];
451cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(buf), buf);
452cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
453cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
454cb93a386Sopenharmony_ci        }
455cb93a386Sopenharmony_ci    });
456cb93a386Sopenharmony_ci}
457cb93a386Sopenharmony_ci
458cb93a386Sopenharmony_ciDEF_TEST(SkVM_mad, r) {
459cb93a386Sopenharmony_ci    // This program is designed to exercise the tricky corners of instruction
460cb93a386Sopenharmony_ci    // and register selection for Op::mad_f32.
461cb93a386Sopenharmony_ci
462cb93a386Sopenharmony_ci    skvm::Builder b;
463cb93a386Sopenharmony_ci    {
464cb93a386Sopenharmony_ci        skvm::Ptr arg = b.varying<int>();
465cb93a386Sopenharmony_ci
466cb93a386Sopenharmony_ci        skvm::F32 x = b.to_F32(b.load32(arg)),
467cb93a386Sopenharmony_ci                  y = b.mad(x,x,x),   // x is needed in the future, so r[x] != r[y].
468cb93a386Sopenharmony_ci                  z = b.mad(y,y,x),   // y is needed in the future, but r[z] = r[x] is ok.
469cb93a386Sopenharmony_ci                  w = b.mad(z,z,y),   // w can alias z but not y.
470cb93a386Sopenharmony_ci                  v = b.mad(w,y,w);   // Got to stop somewhere.
471cb93a386Sopenharmony_ci        b.store32(arg, b.trunc(v));
472cb93a386Sopenharmony_ci    }
473cb93a386Sopenharmony_ci
474cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
475cb93a386Sopenharmony_ci        int x = 2;
476cb93a386Sopenharmony_ci        program.eval(1, &x);
477cb93a386Sopenharmony_ci        // x = 2
478cb93a386Sopenharmony_ci        // y = 2*2 + 2 = 6
479cb93a386Sopenharmony_ci        // z = 6*6 + 2 = 38
480cb93a386Sopenharmony_ci        // w = 38*38 + 6 = 1450
481cb93a386Sopenharmony_ci        // v = 1450*6 + 1450 = 10150
482cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, x == 10150);
483cb93a386Sopenharmony_ci    });
484cb93a386Sopenharmony_ci}
485cb93a386Sopenharmony_ci
486cb93a386Sopenharmony_ciDEF_TEST(SkVM_fms, r) {
487cb93a386Sopenharmony_ci    // Create a pattern that can be peepholed into an Op::fms_f32.
488cb93a386Sopenharmony_ci    skvm::Builder b;
489cb93a386Sopenharmony_ci    {
490cb93a386Sopenharmony_ci        skvm::Ptr arg = b.varying<int>();
491cb93a386Sopenharmony_ci
492cb93a386Sopenharmony_ci        skvm::F32 x = b.to_F32(b.load32(arg)),
493cb93a386Sopenharmony_ci                  v = b.sub(b.mul(x, b.splat(2.0f)),
494cb93a386Sopenharmony_ci                            b.splat(1.0f));
495cb93a386Sopenharmony_ci        b.store32(arg, b.trunc(v));
496cb93a386Sopenharmony_ci    }
497cb93a386Sopenharmony_ci
498cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
499cb93a386Sopenharmony_ci        int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
500cb93a386Sopenharmony_ci        program.eval((int)SK_ARRAY_COUNT(buf), &buf);
501cb93a386Sopenharmony_ci
502cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
503cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] = 2*i-1);
504cb93a386Sopenharmony_ci        }
505cb93a386Sopenharmony_ci    });
506cb93a386Sopenharmony_ci}
507cb93a386Sopenharmony_ci
508cb93a386Sopenharmony_ciDEF_TEST(SkVM_fnma, r) {
509cb93a386Sopenharmony_ci    // Create a pattern that can be peepholed into an Op::fnma_f32.
510cb93a386Sopenharmony_ci    skvm::Builder b;
511cb93a386Sopenharmony_ci    {
512cb93a386Sopenharmony_ci        skvm::Ptr arg = b.varying<int>();
513cb93a386Sopenharmony_ci
514cb93a386Sopenharmony_ci        skvm::F32 x = b.to_F32(b.load32(arg)),
515cb93a386Sopenharmony_ci                  v = b.sub(b.splat(1.0f),
516cb93a386Sopenharmony_ci                            b.mul(x, b.splat(2.0f)));
517cb93a386Sopenharmony_ci        b.store32(arg, b.trunc(v));
518cb93a386Sopenharmony_ci    }
519cb93a386Sopenharmony_ci
520cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
521cb93a386Sopenharmony_ci        int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
522cb93a386Sopenharmony_ci        program.eval((int)SK_ARRAY_COUNT(buf), &buf);
523cb93a386Sopenharmony_ci
524cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
525cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] = 1-2*i);
526cb93a386Sopenharmony_ci        }
527cb93a386Sopenharmony_ci    });
528cb93a386Sopenharmony_ci}
529cb93a386Sopenharmony_ci
530cb93a386Sopenharmony_ciDEF_TEST(SkVM_madder, r) {
531cb93a386Sopenharmony_ci    skvm::Builder b;
532cb93a386Sopenharmony_ci    {
533cb93a386Sopenharmony_ci        skvm::Ptr arg = b.varying<float>();
534cb93a386Sopenharmony_ci
535cb93a386Sopenharmony_ci        skvm::F32 x = b.loadF(arg),
536cb93a386Sopenharmony_ci                  y = b.mad(x,x,x),   // x is needed in the future, so r[x] != r[y].
537cb93a386Sopenharmony_ci                  z = b.mad(y,x,y),   // r[x] can be reused after this instruction, but not r[y].
538cb93a386Sopenharmony_ci                  w = b.mad(y,y,z);
539cb93a386Sopenharmony_ci        b.storeF(arg, w);
540cb93a386Sopenharmony_ci    }
541cb93a386Sopenharmony_ci
542cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
543cb93a386Sopenharmony_ci        float x = 2.0f;
544cb93a386Sopenharmony_ci        // y = 2*2 + 2 = 6
545cb93a386Sopenharmony_ci        // z = 6*2 + 6 = 18
546cb93a386Sopenharmony_ci        // w = 6*6 + 18 = 54
547cb93a386Sopenharmony_ci        program.eval(1, &x);
548cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, x == 54.0f);
549cb93a386Sopenharmony_ci    });
550cb93a386Sopenharmony_ci}
551cb93a386Sopenharmony_ci
552cb93a386Sopenharmony_ciDEF_TEST(SkVM_floor, r) {
553cb93a386Sopenharmony_ci    skvm::Builder b;
554cb93a386Sopenharmony_ci    {
555cb93a386Sopenharmony_ci        skvm::Ptr arg = b.varying<float>();
556cb93a386Sopenharmony_ci        b.storeF(arg, b.floor(b.loadF(arg)));
557cb93a386Sopenharmony_ci    }
558cb93a386Sopenharmony_ci
559cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
560cb93a386Sopenharmony_ci        float buf[]  = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
561cb93a386Sopenharmony_ci        float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
562cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(buf), buf);
563cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
564cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] == want[i]);
565cb93a386Sopenharmony_ci        }
566cb93a386Sopenharmony_ci    });
567cb93a386Sopenharmony_ci}
568cb93a386Sopenharmony_ci
569cb93a386Sopenharmony_ciDEF_TEST(SkVM_round, r) {
570cb93a386Sopenharmony_ci    skvm::Builder b;
571cb93a386Sopenharmony_ci    {
572cb93a386Sopenharmony_ci        skvm::Ptr src = b.varying<float>();
573cb93a386Sopenharmony_ci        skvm::Ptr dst = b.varying<int>();
574cb93a386Sopenharmony_ci        b.store32(dst, b.round(b.loadF(src)));
575cb93a386Sopenharmony_ci    }
576cb93a386Sopenharmony_ci
577cb93a386Sopenharmony_ci    // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
578cb93a386Sopenharmony_ci    // We haven't explicitly guaranteed that here... it just probably is.
579cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
580cb93a386Sopenharmony_ci        float buf[]  = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
581cb93a386Sopenharmony_ci        int want[] =   { -2   ,  0   , 0   , 0   , 0   , 1   , 1   , 1   , 2   , 2    };
582cb93a386Sopenharmony_ci        int dst[SK_ARRAY_COUNT(buf)];
583cb93a386Sopenharmony_ci
584cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(buf), buf, dst);
585cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
586cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, dst[i] == want[i]);
587cb93a386Sopenharmony_ci        }
588cb93a386Sopenharmony_ci    });
589cb93a386Sopenharmony_ci}
590cb93a386Sopenharmony_ci
591cb93a386Sopenharmony_ciDEF_TEST(SkVM_min, r) {
592cb93a386Sopenharmony_ci    skvm::Builder b;
593cb93a386Sopenharmony_ci    {
594cb93a386Sopenharmony_ci        skvm::Ptr src1 = b.varying<float>();
595cb93a386Sopenharmony_ci        skvm::Ptr src2 = b.varying<float>();
596cb93a386Sopenharmony_ci        skvm::Ptr dst = b.varying<float>();
597cb93a386Sopenharmony_ci
598cb93a386Sopenharmony_ci        b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
599cb93a386Sopenharmony_ci    }
600cb93a386Sopenharmony_ci
601cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
602cb93a386Sopenharmony_ci        float s1[]  =  { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
603cb93a386Sopenharmony_ci        float s2[]  =  { 0.0f, 2.0f, 3.0f,  1.0f, -2.0f};
604cb93a386Sopenharmony_ci        float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
605cb93a386Sopenharmony_ci        float d[SK_ARRAY_COUNT(s1)];
606cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
607cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
608cb93a386Sopenharmony_ci          REPORTER_ASSERT(r, d[i] == want[i]);
609cb93a386Sopenharmony_ci        }
610cb93a386Sopenharmony_ci    });
611cb93a386Sopenharmony_ci}
612cb93a386Sopenharmony_ci
613cb93a386Sopenharmony_ciDEF_TEST(SkVM_max, r) {
614cb93a386Sopenharmony_ci    skvm::Builder b;
615cb93a386Sopenharmony_ci    {
616cb93a386Sopenharmony_ci        skvm::Ptr src1 = b.varying<float>();
617cb93a386Sopenharmony_ci        skvm::Ptr src2 = b.varying<float>();
618cb93a386Sopenharmony_ci        skvm::Ptr dst = b.varying<float>();
619cb93a386Sopenharmony_ci
620cb93a386Sopenharmony_ci        b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
621cb93a386Sopenharmony_ci    }
622cb93a386Sopenharmony_ci
623cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
624cb93a386Sopenharmony_ci        float s1[]  =  { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
625cb93a386Sopenharmony_ci        float s2[]  =  { 0.0f, 2.0f, 3.0f,  1.0f, -2.0f};
626cb93a386Sopenharmony_ci        float want[] = { 0.0f, 2.0f, 4.0f,  1.0f, -1.0f};
627cb93a386Sopenharmony_ci        float d[SK_ARRAY_COUNT(s1)];
628cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
629cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
630cb93a386Sopenharmony_ci          REPORTER_ASSERT(r, d[i] == want[i]);
631cb93a386Sopenharmony_ci        }
632cb93a386Sopenharmony_ci    });
633cb93a386Sopenharmony_ci}
634cb93a386Sopenharmony_ci
635cb93a386Sopenharmony_ciDEF_TEST(SkVM_hoist, r) {
636cb93a386Sopenharmony_ci    // This program uses enough constants that it will fail to JIT if we hoist them.
637cb93a386Sopenharmony_ci    // The JIT will try again without hoisting, and that'll just need 2 registers.
638cb93a386Sopenharmony_ci    skvm::Builder b;
639cb93a386Sopenharmony_ci    {
640cb93a386Sopenharmony_ci        skvm::Ptr arg = b.varying<int>();
641cb93a386Sopenharmony_ci        skvm::I32 x = b.load32(arg);
642cb93a386Sopenharmony_ci        for (int i = 0; i < 32; i++) {
643cb93a386Sopenharmony_ci            x = b.add(x, b.splat(i));
644cb93a386Sopenharmony_ci        }
645cb93a386Sopenharmony_ci        b.store32(arg, x);
646cb93a386Sopenharmony_ci    }
647cb93a386Sopenharmony_ci
648cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
649cb93a386Sopenharmony_ci        int x = 4;
650cb93a386Sopenharmony_ci        program.eval(1, &x);
651cb93a386Sopenharmony_ci        // x += 0 + 1 + 2 + 3 + ... + 30 + 31
652cb93a386Sopenharmony_ci        // x += 496
653cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, x == 500);
654cb93a386Sopenharmony_ci    });
655cb93a386Sopenharmony_ci}
656cb93a386Sopenharmony_ci
657cb93a386Sopenharmony_ciDEF_TEST(SkVM_select, r) {
658cb93a386Sopenharmony_ci    skvm::Builder b;
659cb93a386Sopenharmony_ci    {
660cb93a386Sopenharmony_ci        skvm::Ptr buf = b.varying<int>();
661cb93a386Sopenharmony_ci
662cb93a386Sopenharmony_ci        skvm::I32 x = b.load32(buf);
663cb93a386Sopenharmony_ci
664cb93a386Sopenharmony_ci        x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
665cb93a386Sopenharmony_ci
666cb93a386Sopenharmony_ci        b.store32(buf, x);
667cb93a386Sopenharmony_ci    }
668cb93a386Sopenharmony_ci
669cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
670cb93a386Sopenharmony_ci        int buf[] = { 0,1,2,3,4,5,6,7,8 };
671cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(buf), buf);
672cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
673cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
674cb93a386Sopenharmony_ci        }
675cb93a386Sopenharmony_ci    });
676cb93a386Sopenharmony_ci}
677cb93a386Sopenharmony_ci
678cb93a386Sopenharmony_ciDEF_TEST(SkVM_swap, r) {
679cb93a386Sopenharmony_ci    skvm::Builder b;
680cb93a386Sopenharmony_ci    {
681cb93a386Sopenharmony_ci        // This program is the equivalent of
682cb93a386Sopenharmony_ci        //     x = *X
683cb93a386Sopenharmony_ci        //     y = *Y
684cb93a386Sopenharmony_ci        //     *X = y
685cb93a386Sopenharmony_ci        //     *Y = x
686cb93a386Sopenharmony_ci        // One rescheduling of the program based only on data flow of Op arguments is
687cb93a386Sopenharmony_ci        //     x = *X
688cb93a386Sopenharmony_ci        //     *Y = x
689cb93a386Sopenharmony_ci        //     y = *Y
690cb93a386Sopenharmony_ci        //     *X = y
691cb93a386Sopenharmony_ci        // but this reordering does not produce the same results and is invalid.
692cb93a386Sopenharmony_ci        skvm::Ptr X = b.varying<int>(),
693cb93a386Sopenharmony_ci                  Y = b.varying<int>();
694cb93a386Sopenharmony_ci
695cb93a386Sopenharmony_ci        skvm::I32 x = b.load32(X),
696cb93a386Sopenharmony_ci                  y = b.load32(Y);
697cb93a386Sopenharmony_ci
698cb93a386Sopenharmony_ci        b.store32(X, y);
699cb93a386Sopenharmony_ci        b.store32(Y, x);
700cb93a386Sopenharmony_ci    }
701cb93a386Sopenharmony_ci
702cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
703cb93a386Sopenharmony_ci        int b1[] = { 0,1,2,3 };
704cb93a386Sopenharmony_ci        int b2[] = { 4,5,6,7 };
705cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(b1), b1, b2);
706cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(b1); i++) {
707cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, b1[i] == 4 + i);
708cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, b2[i] == i);
709cb93a386Sopenharmony_ci        }
710cb93a386Sopenharmony_ci    });
711cb93a386Sopenharmony_ci}
712cb93a386Sopenharmony_ci
713cb93a386Sopenharmony_ciDEF_TEST(SkVM_NewOps, r) {
714cb93a386Sopenharmony_ci    // Exercise a somewhat arbitrary set of new ops.
715cb93a386Sopenharmony_ci    skvm::Builder b;
716cb93a386Sopenharmony_ci    {
717cb93a386Sopenharmony_ci        skvm::Ptr buf = b.varying<int16_t>();
718cb93a386Sopenharmony_ci        skvm::UPtr uniforms = b.uniform();
719cb93a386Sopenharmony_ci
720cb93a386Sopenharmony_ci        skvm::I32 x = b.load16(buf);
721cb93a386Sopenharmony_ci
722cb93a386Sopenharmony_ci        const size_t kPtr = sizeof(const int*);
723cb93a386Sopenharmony_ci
724cb93a386Sopenharmony_ci        x = b.add(x, b.uniform32(uniforms, kPtr+0));
725cb93a386Sopenharmony_ci        x = b.mul(x, b.uniform32(uniforms, kPtr+4));
726cb93a386Sopenharmony_ci        x = b.sub(x, b.uniform32(uniforms, kPtr+8));
727cb93a386Sopenharmony_ci
728cb93a386Sopenharmony_ci        skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
729cb93a386Sopenharmony_ci        x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
730cb93a386Sopenharmony_ci        x = b.select(b.gt(x, limit     ), limit     , x);
731cb93a386Sopenharmony_ci
732cb93a386Sopenharmony_ci        x = b.gather8(uniforms,0, x);
733cb93a386Sopenharmony_ci
734cb93a386Sopenharmony_ci        b.store16(buf, x);
735cb93a386Sopenharmony_ci    }
736cb93a386Sopenharmony_ci
737cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
738cb93a386Sopenharmony_ci        const int N = 31;
739cb93a386Sopenharmony_ci        int16_t buf[N];
740cb93a386Sopenharmony_ci        for (int i = 0; i < N; i++) {
741cb93a386Sopenharmony_ci            buf[i] = i;
742cb93a386Sopenharmony_ci        }
743cb93a386Sopenharmony_ci
744cb93a386Sopenharmony_ci        const int M = 16;
745cb93a386Sopenharmony_ci        uint8_t img[M];
746cb93a386Sopenharmony_ci        for (int i = 0; i < M; i++) {
747cb93a386Sopenharmony_ci            img[i] = i*i;
748cb93a386Sopenharmony_ci        }
749cb93a386Sopenharmony_ci
750cb93a386Sopenharmony_ci        struct {
751cb93a386Sopenharmony_ci            const uint8_t* img;
752cb93a386Sopenharmony_ci            int      add   = 5;
753cb93a386Sopenharmony_ci            int      mul   = 3;
754cb93a386Sopenharmony_ci            int      sub   = 18;
755cb93a386Sopenharmony_ci            int      limit = M-1;
756cb93a386Sopenharmony_ci        } uniforms{img};
757cb93a386Sopenharmony_ci
758cb93a386Sopenharmony_ci        program.eval(N, buf, &uniforms);
759cb93a386Sopenharmony_ci
760cb93a386Sopenharmony_ci        for (int i = 0; i < N; i++) {
761cb93a386Sopenharmony_ci            // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
762cb93a386Sopenharmony_ci            int x = 3*(i-1);
763cb93a386Sopenharmony_ci
764cb93a386Sopenharmony_ci            // Then that's pinned to the limits of img.
765cb93a386Sopenharmony_ci            if (i < 2) { x =  0; }  // Notice i == 1 hits x == 0 exactly...
766cb93a386Sopenharmony_ci            if (i > 5) { x = 15; }  // ...and i == 6 hits x == 15 exactly
767cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] == img[x]);
768cb93a386Sopenharmony_ci        }
769cb93a386Sopenharmony_ci    });
770cb93a386Sopenharmony_ci}
771cb93a386Sopenharmony_ci
772cb93a386Sopenharmony_ciDEF_TEST(SKVM_array32, r) {
773cb93a386Sopenharmony_ci
774cb93a386Sopenharmony_ci
775cb93a386Sopenharmony_ci
776cb93a386Sopenharmony_ci    skvm::Builder b;
777cb93a386Sopenharmony_ci    skvm::Uniforms uniforms(b.uniform(), 0);
778cb93a386Sopenharmony_ci    // Take up the first slot, so other uniforms are not at 0 offset.
779cb93a386Sopenharmony_ci    uniforms.push(0);
780cb93a386Sopenharmony_ci    int i[] = {3, 7};
781cb93a386Sopenharmony_ci    skvm::Uniform array = uniforms.pushArray(i);
782cb93a386Sopenharmony_ci    float f[] = {5, 9};
783cb93a386Sopenharmony_ci    skvm::Uniform arrayF = uniforms.pushArrayF(f);
784cb93a386Sopenharmony_ci    {
785cb93a386Sopenharmony_ci        skvm::Ptr buf0     = b.varying<int32_t>(),
786cb93a386Sopenharmony_ci                  buf1     = b.varying<int32_t>(),
787cb93a386Sopenharmony_ci                  buf2     = b.varying<int32_t>();
788cb93a386Sopenharmony_ci
789cb93a386Sopenharmony_ci        skvm::I32 j = b.array32(array, 0);
790cb93a386Sopenharmony_ci        b.store32(buf0, j);
791cb93a386Sopenharmony_ci        skvm::I32 k = b.array32(array, 1);
792cb93a386Sopenharmony_ci        b.store32(buf1, k);
793cb93a386Sopenharmony_ci
794cb93a386Sopenharmony_ci        skvm::F32 x = b.arrayF(arrayF, 0);
795cb93a386Sopenharmony_ci        skvm::F32 y = b.arrayF(arrayF, 1);
796cb93a386Sopenharmony_ci        b.store32(buf2, b.trunc(b.add(x, y)));
797cb93a386Sopenharmony_ci    }
798cb93a386Sopenharmony_ci
799cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
800cb93a386Sopenharmony_ci        const int K = 10;
801cb93a386Sopenharmony_ci        int32_t buf0[K],
802cb93a386Sopenharmony_ci                buf1[K],
803cb93a386Sopenharmony_ci                buf2[K];
804cb93a386Sopenharmony_ci
805cb93a386Sopenharmony_ci        // reset the i[0] for the two tests.
806cb93a386Sopenharmony_ci        i[0] = 3;
807cb93a386Sopenharmony_ci        f[1] = 9;
808cb93a386Sopenharmony_ci        program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
809cb93a386Sopenharmony_ci        for (auto v : buf0) {
810cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, v == 3);
811cb93a386Sopenharmony_ci        }
812cb93a386Sopenharmony_ci        for (auto v : buf1) {
813cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, v == 7);
814cb93a386Sopenharmony_ci        }
815cb93a386Sopenharmony_ci        for (auto v : buf2) {
816cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, v == 14);
817cb93a386Sopenharmony_ci        }
818cb93a386Sopenharmony_ci        i[0] = 4;
819cb93a386Sopenharmony_ci        f[1] = 10;
820cb93a386Sopenharmony_ci        program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
821cb93a386Sopenharmony_ci        for (auto v : buf0) {
822cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, v == 4);
823cb93a386Sopenharmony_ci        }
824cb93a386Sopenharmony_ci        for (auto v : buf1) {
825cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, v == 7);
826cb93a386Sopenharmony_ci        }
827cb93a386Sopenharmony_ci        for (auto v : buf2) {
828cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, v == 15);
829cb93a386Sopenharmony_ci        }
830cb93a386Sopenharmony_ci    });
831cb93a386Sopenharmony_ci}
832cb93a386Sopenharmony_ci
833cb93a386Sopenharmony_ciDEF_TEST(SkVM_sqrt, r) {
834cb93a386Sopenharmony_ci    skvm::Builder b;
835cb93a386Sopenharmony_ci    auto buf = b.varying<int>();
836cb93a386Sopenharmony_ci    b.storeF(buf, b.sqrt(b.loadF(buf)));
837cb93a386Sopenharmony_ci
838cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
839cb93a386Sopenharmony_ci        constexpr int K = 17;
840cb93a386Sopenharmony_ci        float buf[K];
841cb93a386Sopenharmony_ci        for (int i = 0; i < K; i++) {
842cb93a386Sopenharmony_ci            buf[i] = (float)(i*i);
843cb93a386Sopenharmony_ci        }
844cb93a386Sopenharmony_ci
845cb93a386Sopenharmony_ci        // x^2 -> x
846cb93a386Sopenharmony_ci        program.eval(K, buf);
847cb93a386Sopenharmony_ci
848cb93a386Sopenharmony_ci        for (int i = 0; i < K; i++) {
849cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] == (float)i);
850cb93a386Sopenharmony_ci        }
851cb93a386Sopenharmony_ci    });
852cb93a386Sopenharmony_ci}
853cb93a386Sopenharmony_ci
854cb93a386Sopenharmony_ciDEF_TEST(SkVM_MSAN, r) {
855cb93a386Sopenharmony_ci    // This little memset32() program should be able to JIT, but if we run that
856cb93a386Sopenharmony_ci    // JIT code in an MSAN build, it won't see the writes initialize buf.  So
857cb93a386Sopenharmony_ci    // this tests that we're using the interpreter instead.
858cb93a386Sopenharmony_ci    skvm::Builder b;
859cb93a386Sopenharmony_ci    b.store32(b.varying<int>(), b.splat(42));
860cb93a386Sopenharmony_ci
861cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
862cb93a386Sopenharmony_ci        constexpr int K = 17;
863cb93a386Sopenharmony_ci        int buf[K];                 // Intentionally uninitialized.
864cb93a386Sopenharmony_ci        program.eval(K, buf);
865cb93a386Sopenharmony_ci        sk_msan_assert_initialized(buf, buf+K);
866cb93a386Sopenharmony_ci        for (int x : buf) {
867cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, x == 42);
868cb93a386Sopenharmony_ci        }
869cb93a386Sopenharmony_ci    });
870cb93a386Sopenharmony_ci}
871cb93a386Sopenharmony_ci
872cb93a386Sopenharmony_ciDEF_TEST(SkVM_assert, r) {
873cb93a386Sopenharmony_ci    skvm::Builder b;
874cb93a386Sopenharmony_ci    b.assert_true(b.lt(b.load32(b.varying<int>()),
875cb93a386Sopenharmony_ci                       b.splat(42)));
876cb93a386Sopenharmony_ci
877cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
878cb93a386Sopenharmony_ci        int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
879cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(buf), buf);
880cb93a386Sopenharmony_ci    });
881cb93a386Sopenharmony_ci}
882cb93a386Sopenharmony_ci
883cb93a386Sopenharmony_ciDEF_TEST(SkVM_trace_line, r) {
884cb93a386Sopenharmony_ci    skvm::Builder b;
885cb93a386Sopenharmony_ci    b.trace_line(b.splat(0xFFFFFFFF), 123);
886cb93a386Sopenharmony_ci
887cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program) {
888cb93a386Sopenharmony_ci        // The trace_line instruction has no behavior yet.
889cb93a386Sopenharmony_ci        program.eval(1);
890cb93a386Sopenharmony_ci    });
891cb93a386Sopenharmony_ci}
892cb93a386Sopenharmony_ci
893cb93a386Sopenharmony_ciDEF_TEST(SkVM_premul, reporter) {
894cb93a386Sopenharmony_ci    // Test that premul is short-circuited when alpha is known opaque.
895cb93a386Sopenharmony_ci    {
896cb93a386Sopenharmony_ci        skvm::Builder p;
897cb93a386Sopenharmony_ci        auto rptr = p.varying<int>(),
898cb93a386Sopenharmony_ci             aptr = p.varying<int>();
899cb93a386Sopenharmony_ci
900cb93a386Sopenharmony_ci        skvm::F32 r = p.loadF(rptr),
901cb93a386Sopenharmony_ci                  g = p.splat(0.0f),
902cb93a386Sopenharmony_ci                  b = p.splat(0.0f),
903cb93a386Sopenharmony_ci                  a = p.loadF(aptr);
904cb93a386Sopenharmony_ci
905cb93a386Sopenharmony_ci        p.premul(&r, &g, &b, a);
906cb93a386Sopenharmony_ci        p.storeF(rptr, r);
907cb93a386Sopenharmony_ci
908cb93a386Sopenharmony_ci        // load red, load alpha, red *= alpha, store red
909cb93a386Sopenharmony_ci        REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
910cb93a386Sopenharmony_ci    }
911cb93a386Sopenharmony_ci
912cb93a386Sopenharmony_ci    {
913cb93a386Sopenharmony_ci        skvm::Builder p;
914cb93a386Sopenharmony_ci        auto rptr = p.varying<int>();
915cb93a386Sopenharmony_ci
916cb93a386Sopenharmony_ci        skvm::F32 r = p.loadF(rptr),
917cb93a386Sopenharmony_ci                  g = p.splat(0.0f),
918cb93a386Sopenharmony_ci                  b = p.splat(0.0f),
919cb93a386Sopenharmony_ci                  a = p.splat(1.0f);
920cb93a386Sopenharmony_ci
921cb93a386Sopenharmony_ci        p.premul(&r, &g, &b, a);
922cb93a386Sopenharmony_ci        p.storeF(rptr, r);
923cb93a386Sopenharmony_ci
924cb93a386Sopenharmony_ci        // load red, store red
925cb93a386Sopenharmony_ci        REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
926cb93a386Sopenharmony_ci    }
927cb93a386Sopenharmony_ci
928cb93a386Sopenharmony_ci    // Same deal for unpremul.
929cb93a386Sopenharmony_ci    {
930cb93a386Sopenharmony_ci        skvm::Builder p;
931cb93a386Sopenharmony_ci        auto rptr = p.varying<int>(),
932cb93a386Sopenharmony_ci             aptr = p.varying<int>();
933cb93a386Sopenharmony_ci
934cb93a386Sopenharmony_ci        skvm::F32 r = p.loadF(rptr),
935cb93a386Sopenharmony_ci                  g = p.splat(0.0f),
936cb93a386Sopenharmony_ci                  b = p.splat(0.0f),
937cb93a386Sopenharmony_ci                  a = p.loadF(aptr);
938cb93a386Sopenharmony_ci
939cb93a386Sopenharmony_ci        p.unpremul(&r, &g, &b, a);
940cb93a386Sopenharmony_ci        p.storeF(rptr, r);
941cb93a386Sopenharmony_ci
942cb93a386Sopenharmony_ci        // load red, load alpha, a bunch of unpremul instructions, store red
943cb93a386Sopenharmony_ci        REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
944cb93a386Sopenharmony_ci    }
945cb93a386Sopenharmony_ci
946cb93a386Sopenharmony_ci    {
947cb93a386Sopenharmony_ci        skvm::Builder p;
948cb93a386Sopenharmony_ci        auto rptr = p.varying<int>();
949cb93a386Sopenharmony_ci
950cb93a386Sopenharmony_ci        skvm::F32 r = p.loadF(rptr),
951cb93a386Sopenharmony_ci                  g = p.splat(0.0f),
952cb93a386Sopenharmony_ci                  b = p.splat(0.0f),
953cb93a386Sopenharmony_ci                  a = p.splat(1.0f);
954cb93a386Sopenharmony_ci
955cb93a386Sopenharmony_ci        p.unpremul(&r, &g, &b, a);
956cb93a386Sopenharmony_ci        p.storeF(rptr, r);
957cb93a386Sopenharmony_ci
958cb93a386Sopenharmony_ci        // load red, store red
959cb93a386Sopenharmony_ci        REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
960cb93a386Sopenharmony_ci    }
961cb93a386Sopenharmony_ci}
962cb93a386Sopenharmony_ci
963cb93a386Sopenharmony_citemplate <typename Fn>
964cb93a386Sopenharmony_cistatic void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
965cb93a386Sopenharmony_ci    uint8_t buf[4096];
966cb93a386Sopenharmony_ci    skvm::Assembler a{buf};
967cb93a386Sopenharmony_ci    fn(a);
968cb93a386Sopenharmony_ci
969cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, a.size() == expected.size());
970cb93a386Sopenharmony_ci
971cb93a386Sopenharmony_ci    auto got = (const uint8_t*)buf,
972cb93a386Sopenharmony_ci         want = expected.begin();
973cb93a386Sopenharmony_ci    for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
974cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, got[i] == want[i],
975cb93a386Sopenharmony_ci                        "byte %d was %02x, want %02x", i, got[i], want[i]);
976cb93a386Sopenharmony_ci    }
977cb93a386Sopenharmony_ci}
978cb93a386Sopenharmony_ci
979cb93a386Sopenharmony_ciDEF_TEST(SkVM_Assembler, r) {
980cb93a386Sopenharmony_ci    // Easiest way to generate test cases is
981cb93a386Sopenharmony_ci    //
982cb93a386Sopenharmony_ci    //   echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
983cb93a386Sopenharmony_ci    //
984cb93a386Sopenharmony_ci    // The -x86-asm-syntax=intel bit is optional, controlling the
985cb93a386Sopenharmony_ci    // input syntax only; the output will always be AT&T  op x,y,dst style.
986cb93a386Sopenharmony_ci    // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
987cb93a386Sopenharmony_ci    // that a bit easier to use here, despite maybe favoring AT&T overall.
988cb93a386Sopenharmony_ci
989cb93a386Sopenharmony_ci    using A = skvm::Assembler;
990cb93a386Sopenharmony_ci    // Our exit strategy from AVX code.
991cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
992cb93a386Sopenharmony_ci        a.int3();
993cb93a386Sopenharmony_ci        a.vzeroupper();
994cb93a386Sopenharmony_ci        a.ret();
995cb93a386Sopenharmony_ci    },{
996cb93a386Sopenharmony_ci        0xcc,
997cb93a386Sopenharmony_ci        0xc5, 0xf8, 0x77,
998cb93a386Sopenharmony_ci        0xc3,
999cb93a386Sopenharmony_ci    });
1000cb93a386Sopenharmony_ci
1001cb93a386Sopenharmony_ci    // Align should pad with zero
1002cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1003cb93a386Sopenharmony_ci        a.ret();
1004cb93a386Sopenharmony_ci        a.align(4);
1005cb93a386Sopenharmony_ci    },{
1006cb93a386Sopenharmony_ci        0xc3,
1007cb93a386Sopenharmony_ci        0x00, 0x00, 0x00,
1008cb93a386Sopenharmony_ci    });
1009cb93a386Sopenharmony_ci
1010cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1011cb93a386Sopenharmony_ci        a.add(A::rax, 8);       // Always good to test rax.
1012cb93a386Sopenharmony_ci        a.sub(A::rax, 32);
1013cb93a386Sopenharmony_ci
1014cb93a386Sopenharmony_ci        a.add(A::rdi, 12);      // Last 0x48 REX
1015cb93a386Sopenharmony_ci        a.sub(A::rdi, 8);
1016cb93a386Sopenharmony_ci
1017cb93a386Sopenharmony_ci        a.add(A::r8 , 7);       // First 0x49 REX
1018cb93a386Sopenharmony_ci        a.sub(A::r8 , 4);
1019cb93a386Sopenharmony_ci
1020cb93a386Sopenharmony_ci        a.add(A::rsi, 128);     // Requires 4 byte immediate.
1021cb93a386Sopenharmony_ci        a.sub(A::r8 , 1000000);
1022cb93a386Sopenharmony_ci
1023cb93a386Sopenharmony_ci        a.add(A::Mem{A::rsi}, 7);                       // addq $7, (%rsi)
1024cb93a386Sopenharmony_ci        a.add(A::Mem{A::rsi, 12}, 7);                   // addq $7, 12(%rsi)
1025cb93a386Sopenharmony_ci        a.add(A::Mem{A::rsp, 12}, 7);                   // addq $7, 12(%rsp)
1026cb93a386Sopenharmony_ci        a.add(A::Mem{A::r12, 12}, 7);                   // addq $7, 12(%r12)
1027cb93a386Sopenharmony_ci        a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7);  // addq $7, 12(%rsp,%rax,4)
1028cb93a386Sopenharmony_ci        a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7);  // addq $7, 12(%r12,%rax,4)
1029cb93a386Sopenharmony_ci        a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7);  // addq $7, 12(%rax,%r12,4)
1030cb93a386Sopenharmony_ci        a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7);  // addq $7, 12(%r11,%r8,2)
1031cb93a386Sopenharmony_ci        a.add(A::Mem{A::r11, 12, A::rax}         , 7);  // addq $7, 12(%r11,%rax)
1032cb93a386Sopenharmony_ci        a.add(A::Mem{A::rax, 12, A::r11}         , 7);  // addq $7, 12(%rax,%r11)
1033cb93a386Sopenharmony_ci
1034cb93a386Sopenharmony_ci        a.sub(A::Mem{A::rax, 12, A::r11}         , 7);  // subq $7, 12(%rax,%r11)
1035cb93a386Sopenharmony_ci
1036cb93a386Sopenharmony_ci        a.add(       A::rax     , A::rcx);              // addq %rcx, %rax
1037cb93a386Sopenharmony_ci        a.add(A::Mem{A::rax}    , A::rcx);              // addq %rcx, (%rax)
1038cb93a386Sopenharmony_ci        a.add(A::Mem{A::rax, 12}, A::rcx);              // addq %rcx, 12(%rax)
1039cb93a386Sopenharmony_ci        a.add(A::rcx, A::Mem{A::rax, 12});              // addq 12(%rax), %rcx
1040cb93a386Sopenharmony_ci
1041cb93a386Sopenharmony_ci        a.sub(A::rcx, A::Mem{A::rax, 12});              // subq 12(%rax), %rcx
1042cb93a386Sopenharmony_ci    },{
1043cb93a386Sopenharmony_ci        0x48, 0x83, 0b11'000'000, 0x08,
1044cb93a386Sopenharmony_ci        0x48, 0x83, 0b11'101'000, 0x20,
1045cb93a386Sopenharmony_ci
1046cb93a386Sopenharmony_ci        0x48, 0x83, 0b11'000'111, 0x0c,
1047cb93a386Sopenharmony_ci        0x48, 0x83, 0b11'101'111, 0x08,
1048cb93a386Sopenharmony_ci
1049cb93a386Sopenharmony_ci        0x49, 0x83, 0b11'000'000, 0x07,
1050cb93a386Sopenharmony_ci        0x49, 0x83, 0b11'101'000, 0x04,
1051cb93a386Sopenharmony_ci
1052cb93a386Sopenharmony_ci        0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
1053cb93a386Sopenharmony_ci        0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
1054cb93a386Sopenharmony_ci
1055cb93a386Sopenharmony_ci        0x48,0x83,0x06,0x07,
1056cb93a386Sopenharmony_ci        0x48,0x83,0x46,0x0c,0x07,
1057cb93a386Sopenharmony_ci        0x48,0x83,0x44,0x24,0x0c,0x07,
1058cb93a386Sopenharmony_ci        0x49,0x83,0x44,0x24,0x0c,0x07,
1059cb93a386Sopenharmony_ci        0x48,0x83,0x44,0x84,0x0c,0x07,
1060cb93a386Sopenharmony_ci        0x49,0x83,0x44,0x84,0x0c,0x07,
1061cb93a386Sopenharmony_ci        0x4a,0x83,0x44,0xa0,0x0c,0x07,
1062cb93a386Sopenharmony_ci        0x4b,0x83,0x44,0x43,0x0c,0x07,
1063cb93a386Sopenharmony_ci        0x49,0x83,0x44,0x03,0x0c,0x07,
1064cb93a386Sopenharmony_ci        0x4a,0x83,0x44,0x18,0x0c,0x07,
1065cb93a386Sopenharmony_ci
1066cb93a386Sopenharmony_ci        0x4a,0x83,0x6c,0x18,0x0c,0x07,
1067cb93a386Sopenharmony_ci
1068cb93a386Sopenharmony_ci        0x48,0x01,0xc8,
1069cb93a386Sopenharmony_ci        0x48,0x01,0x08,
1070cb93a386Sopenharmony_ci        0x48,0x01,0x48,0x0c,
1071cb93a386Sopenharmony_ci        0x48,0x03,0x48,0x0c,
1072cb93a386Sopenharmony_ci        0x48,0x2b,0x48,0x0c,
1073cb93a386Sopenharmony_ci    });
1074cb93a386Sopenharmony_ci
1075cb93a386Sopenharmony_ci
1076cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1077cb93a386Sopenharmony_ci        a.vpaddd (A::ymm0, A::ymm1, A::ymm2);  // Low registers and 0x0f map     -> 2-byte VEX.
1078cb93a386Sopenharmony_ci        a.vpaddd (A::ymm8, A::ymm1, A::ymm2);  // A high dst register is ok      -> 2-byte VEX.
1079cb93a386Sopenharmony_ci        a.vpaddd (A::ymm0, A::ymm8, A::ymm2);  // A high first argument register -> 2-byte VEX.
1080cb93a386Sopenharmony_ci        a.vpaddd (A::ymm0, A::ymm1, A::ymm8);  // A high second argument         -> 3-byte VEX.
1081cb93a386Sopenharmony_ci        a.vpmulld(A::ymm0, A::ymm1, A::ymm2);  // Using non-0x0f map instruction -> 3-byte VEX.
1082cb93a386Sopenharmony_ci        a.vpsubd (A::ymm0, A::ymm1, A::ymm2);  // Test vpsubd to ensure argument order is right.
1083cb93a386Sopenharmony_ci    },{
1084cb93a386Sopenharmony_ci        /*    VEX     */ /*op*/ /*modRM*/
1085cb93a386Sopenharmony_ci        0xc5,       0xf5, 0xfe, 0xc2,
1086cb93a386Sopenharmony_ci        0xc5,       0x75, 0xfe, 0xc2,
1087cb93a386Sopenharmony_ci        0xc5,       0xbd, 0xfe, 0xc2,
1088cb93a386Sopenharmony_ci        0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1089cb93a386Sopenharmony_ci        0xc4, 0xe2, 0x75, 0x40, 0xc2,
1090cb93a386Sopenharmony_ci        0xc5,       0xf5, 0xfa, 0xc2,
1091cb93a386Sopenharmony_ci    });
1092cb93a386Sopenharmony_ci
1093cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1094cb93a386Sopenharmony_ci        a.vpaddw   (A::ymm4, A::ymm3, A::ymm2);
1095cb93a386Sopenharmony_ci        a.vpavgw   (A::ymm4, A::ymm3, A::ymm2);
1096cb93a386Sopenharmony_ci        a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1097cb93a386Sopenharmony_ci        a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1098cb93a386Sopenharmony_ci
1099cb93a386Sopenharmony_ci        a.vpminsw  (A::ymm4, A::ymm3, A::ymm2);
1100cb93a386Sopenharmony_ci        a.vpmaxsw  (A::ymm4, A::ymm3, A::ymm2);
1101cb93a386Sopenharmony_ci        a.vpminuw  (A::ymm4, A::ymm3, A::ymm2);
1102cb93a386Sopenharmony_ci        a.vpmaxuw  (A::ymm4, A::ymm3, A::ymm2);
1103cb93a386Sopenharmony_ci
1104cb93a386Sopenharmony_ci        a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1105cb93a386Sopenharmony_ci        a.vpabsw   (A::ymm4, A::ymm3);
1106cb93a386Sopenharmony_ci        a.vpsllw   (A::ymm4, A::ymm3, 12);
1107cb93a386Sopenharmony_ci        a.vpsraw   (A::ymm4, A::ymm3, 12);
1108cb93a386Sopenharmony_ci    },{
1109cb93a386Sopenharmony_ci        0xc5,     0xe5, 0xfd, 0xe2,
1110cb93a386Sopenharmony_ci        0xc5,     0xe5, 0xe3, 0xe2,
1111cb93a386Sopenharmony_ci        0xc5,     0xe5, 0x75, 0xe2,
1112cb93a386Sopenharmony_ci        0xc5,     0xe5, 0x65, 0xe2,
1113cb93a386Sopenharmony_ci
1114cb93a386Sopenharmony_ci        0xc5,     0xe5, 0xea, 0xe2,
1115cb93a386Sopenharmony_ci        0xc5,     0xe5, 0xee, 0xe2,
1116cb93a386Sopenharmony_ci        0xc4,0xe2,0x65, 0x3a, 0xe2,
1117cb93a386Sopenharmony_ci        0xc4,0xe2,0x65, 0x3e, 0xe2,
1118cb93a386Sopenharmony_ci
1119cb93a386Sopenharmony_ci        0xc4,0xe2,0x65, 0x0b, 0xe2,
1120cb93a386Sopenharmony_ci        0xc4,0xe2,0x7d, 0x1d, 0xe3,
1121cb93a386Sopenharmony_ci        0xc5,0xdd,0x71, 0xf3, 0x0c,
1122cb93a386Sopenharmony_ci        0xc5,0xdd,0x71, 0xe3, 0x0c,
1123cb93a386Sopenharmony_ci    });
1124cb93a386Sopenharmony_ci
1125cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1126cb93a386Sopenharmony_ci        A::Label l;
1127cb93a386Sopenharmony_ci        a.vcmpeqps (A::ymm0, A::ymm1, &l);      // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
1128cb93a386Sopenharmony_ci        a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1129cb93a386Sopenharmony_ci        a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1130cb93a386Sopenharmony_ci        a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1131cb93a386Sopenharmony_ci        a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1132cb93a386Sopenharmony_ci        a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1133cb93a386Sopenharmony_ci        a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
1134cb93a386Sopenharmony_ci        a.label(&l);   // 28 bytes after the vcmpeqps that uses it.
1135cb93a386Sopenharmony_ci    },{
1136cb93a386Sopenharmony_ci        0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
1137cb93a386Sopenharmony_ci        0xc5,0xf5,0x76,0xc2,
1138cb93a386Sopenharmony_ci        0xc5,0xf5,0x66,0xc2,
1139cb93a386Sopenharmony_ci        0xc5,0xf4,0xc2,0xc2,0x00,
1140cb93a386Sopenharmony_ci        0xc5,0xf4,0xc2,0xc2,0x01,
1141cb93a386Sopenharmony_ci        0xc5,0xf4,0xc2,0xc2,0x02,
1142cb93a386Sopenharmony_ci        0xc5,0xf4,0xc2,0xc2,0x04,
1143cb93a386Sopenharmony_ci    });
1144cb93a386Sopenharmony_ci
1145cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1146cb93a386Sopenharmony_ci        a.vminps(A::ymm0, A::ymm1, A::ymm2);
1147cb93a386Sopenharmony_ci        a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1148cb93a386Sopenharmony_ci    },{
1149cb93a386Sopenharmony_ci        0xc5,0xf4,0x5d,0xc2,
1150cb93a386Sopenharmony_ci        0xc5,0xf4,0x5f,0xc2,
1151cb93a386Sopenharmony_ci    });
1152cb93a386Sopenharmony_ci
1153cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1154cb93a386Sopenharmony_ci        a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1155cb93a386Sopenharmony_ci    },{
1156cb93a386Sopenharmony_ci        0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1157cb93a386Sopenharmony_ci    });
1158cb93a386Sopenharmony_ci
1159cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1160cb93a386Sopenharmony_ci        a.vpsrld(A::ymm15, A::ymm2, 8);
1161cb93a386Sopenharmony_ci        a.vpsrld(A::ymm0 , A::ymm8, 5);
1162cb93a386Sopenharmony_ci    },{
1163cb93a386Sopenharmony_ci        0xc5,     0x85, 0x72,0xd2, 0x08,
1164cb93a386Sopenharmony_ci        0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1165cb93a386Sopenharmony_ci    });
1166cb93a386Sopenharmony_ci
1167cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1168cb93a386Sopenharmony_ci        A::Label l;
1169cb93a386Sopenharmony_ci        a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
1170cb93a386Sopenharmony_ci        a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
1171cb93a386Sopenharmony_ci        a.vpermq(A::ymm1, A::ymm2, 5);
1172cb93a386Sopenharmony_ci        a.label(&l);  // 6 bytes after vperm2f128
1173cb93a386Sopenharmony_ci    },{
1174cb93a386Sopenharmony_ci        0xc4,0xe2,0x6d,0x16,0x4f,0x20,
1175cb93a386Sopenharmony_ci        0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
1176cb93a386Sopenharmony_ci        0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1177cb93a386Sopenharmony_ci    });
1178cb93a386Sopenharmony_ci
1179cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1180cb93a386Sopenharmony_ci        a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1181cb93a386Sopenharmony_ci        a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1182cb93a386Sopenharmony_ci    },{
1183cb93a386Sopenharmony_ci        0xc5,0xed,0x62,0x0f,
1184cb93a386Sopenharmony_ci        0xc5,0xed,0x6a,0xcb,
1185cb93a386Sopenharmony_ci    });
1186cb93a386Sopenharmony_ci
1187cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1188cb93a386Sopenharmony_ci        a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1189cb93a386Sopenharmony_ci        a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1190cb93a386Sopenharmony_ci        a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1191cb93a386Sopenharmony_ci        a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1192cb93a386Sopenharmony_ci    },{
1193cb93a386Sopenharmony_ci        0xc4,0xe3,0x7d,0x08,0xca,0x00,
1194cb93a386Sopenharmony_ci        0xc4,0xe3,0x7d,0x08,0xca,0x01,
1195cb93a386Sopenharmony_ci        0xc4,0xe3,0x7d,0x08,0xca,0x02,
1196cb93a386Sopenharmony_ci        0xc4,0xe3,0x7d,0x08,0xca,0x03,
1197cb93a386Sopenharmony_ci    });
1198cb93a386Sopenharmony_ci
1199cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1200cb93a386Sopenharmony_ci        A::Label l;
1201cb93a386Sopenharmony_ci        a.label(&l);
1202cb93a386Sopenharmony_ci        a.byte(1);
1203cb93a386Sopenharmony_ci        a.byte(2);
1204cb93a386Sopenharmony_ci        a.byte(3);
1205cb93a386Sopenharmony_ci        a.byte(4);
1206cb93a386Sopenharmony_ci
1207cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm0 , &l);
1208cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm1 , &l);
1209cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm8 , &l);
1210cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm15, &l);
1211cb93a386Sopenharmony_ci
1212cb93a386Sopenharmony_ci        a.vpshufb(A::ymm4, A::ymm3, &l);
1213cb93a386Sopenharmony_ci        a.vpaddd (A::ymm4, A::ymm3, &l);
1214cb93a386Sopenharmony_ci        a.vpsubd (A::ymm4, A::ymm3, &l);
1215cb93a386Sopenharmony_ci
1216cb93a386Sopenharmony_ci        a.vptest(A::ymm4, &l);
1217cb93a386Sopenharmony_ci
1218cb93a386Sopenharmony_ci        a.vmulps (A::ymm4, A::ymm3, &l);
1219cb93a386Sopenharmony_ci    },{
1220cb93a386Sopenharmony_ci        0x01, 0x02, 0x03, 0x4,
1221cb93a386Sopenharmony_ci
1222cb93a386Sopenharmony_ci        /*     VEX    */  /*op*/ /*   ModRM    */  /*     offset     */
1223cb93a386Sopenharmony_ci        0xc4, 0xe2, 0x7d,  0x18,   0b00'000'101,   0xf3,0xff,0xff,0xff,   // 0xfffffff3 == -13
1224cb93a386Sopenharmony_ci        0xc4, 0xe2, 0x7d,  0x18,   0b00'001'101,   0xea,0xff,0xff,0xff,   // 0xffffffea == -22
1225cb93a386Sopenharmony_ci        0xc4, 0x62, 0x7d,  0x18,   0b00'000'101,   0xe1,0xff,0xff,0xff,   // 0xffffffe1 == -31
1226cb93a386Sopenharmony_ci        0xc4, 0x62, 0x7d,  0x18,   0b00'111'101,   0xd8,0xff,0xff,0xff,   // 0xffffffd8 == -40
1227cb93a386Sopenharmony_ci
1228cb93a386Sopenharmony_ci        0xc4, 0xe2, 0x65,  0x00,   0b00'100'101,   0xcf,0xff,0xff,0xff,   // 0xffffffcf == -49
1229cb93a386Sopenharmony_ci
1230cb93a386Sopenharmony_ci        0xc5, 0xe5,        0xfe,   0b00'100'101,   0xc7,0xff,0xff,0xff,   // 0xffffffc7 == -57
1231cb93a386Sopenharmony_ci        0xc5, 0xe5,        0xfa,   0b00'100'101,   0xbf,0xff,0xff,0xff,   // 0xffffffbf == -65
1232cb93a386Sopenharmony_ci
1233cb93a386Sopenharmony_ci        0xc4, 0xe2, 0x7d,  0x17,   0b00'100'101,   0xb6,0xff,0xff,0xff,   // 0xffffffb6 == -74
1234cb93a386Sopenharmony_ci
1235cb93a386Sopenharmony_ci        0xc5, 0xe4,        0x59,   0b00'100'101,   0xae,0xff,0xff,0xff,   // 0xffffffaf == -82
1236cb93a386Sopenharmony_ci    });
1237cb93a386Sopenharmony_ci
1238cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1239cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm0,  A::Mem{A::rdi,   0});
1240cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm13, A::Mem{A::r14,   7});
1241cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm8,  A::Mem{A::rdx, -12});
1242cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm8,  A::Mem{A::rdx, 400});
1243cb93a386Sopenharmony_ci
1244cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm8,  A::xmm0);
1245cb93a386Sopenharmony_ci        a.vbroadcastss(A::ymm0,  A::xmm13);
1246cb93a386Sopenharmony_ci    },{
1247cb93a386Sopenharmony_ci        /*   VEX    */ /*op*/     /*ModRM*/   /*offset*/
1248cb93a386Sopenharmony_ci        0xc4,0xe2,0x7d, 0x18,   0b00'000'111,
1249cb93a386Sopenharmony_ci        0xc4,0x42,0x7d, 0x18,   0b01'101'110,  0x07,
1250cb93a386Sopenharmony_ci        0xc4,0x62,0x7d, 0x18,   0b01'000'010,  0xf4,
1251cb93a386Sopenharmony_ci        0xc4,0x62,0x7d, 0x18,   0b10'000'010,  0x90,0x01,0x00,0x00,
1252cb93a386Sopenharmony_ci
1253cb93a386Sopenharmony_ci        0xc4,0x62,0x7d, 0x18,   0b11'000'000,
1254cb93a386Sopenharmony_ci        0xc4,0xc2,0x7d, 0x18,   0b11'000'101,
1255cb93a386Sopenharmony_ci    });
1256cb93a386Sopenharmony_ci
1257cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1258cb93a386Sopenharmony_ci        A::Label l;
1259cb93a386Sopenharmony_ci        a.label(&l);
1260cb93a386Sopenharmony_ci        a.jne(&l);
1261cb93a386Sopenharmony_ci        a.jne(&l);
1262cb93a386Sopenharmony_ci        a.je (&l);
1263cb93a386Sopenharmony_ci        a.jmp(&l);
1264cb93a386Sopenharmony_ci        a.jl (&l);
1265cb93a386Sopenharmony_ci        a.jc (&l);
1266cb93a386Sopenharmony_ci
1267cb93a386Sopenharmony_ci        a.cmp(A::rdx, 1);
1268cb93a386Sopenharmony_ci        a.cmp(A::rax, 12);
1269cb93a386Sopenharmony_ci        a.cmp(A::r14, 2000000000);
1270cb93a386Sopenharmony_ci    },{
1271cb93a386Sopenharmony_ci        0x0f,0x85, 0xfa,0xff,0xff,0xff,   // near jne -6 bytes
1272cb93a386Sopenharmony_ci        0x0f,0x85, 0xf4,0xff,0xff,0xff,   // near jne -12 bytes
1273cb93a386Sopenharmony_ci        0x0f,0x84, 0xee,0xff,0xff,0xff,   // near je  -18 bytes
1274cb93a386Sopenharmony_ci        0xe9,      0xe9,0xff,0xff,0xff,   // near jmp -23 bytes
1275cb93a386Sopenharmony_ci        0x0f,0x8c, 0xe3,0xff,0xff,0xff,   // near jl  -29 bytes
1276cb93a386Sopenharmony_ci        0x0f,0x82, 0xdd,0xff,0xff,0xff,   // near jc  -35 bytes
1277cb93a386Sopenharmony_ci
1278cb93a386Sopenharmony_ci        0x48,0x83,0xfa,0x01,
1279cb93a386Sopenharmony_ci        0x48,0x83,0xf8,0x0c,
1280cb93a386Sopenharmony_ci        0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
1281cb93a386Sopenharmony_ci    });
1282cb93a386Sopenharmony_ci
1283cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1284cb93a386Sopenharmony_ci        a.vmovups(A::ymm5, A::Mem{A::rsi});
1285cb93a386Sopenharmony_ci        a.vmovups(A::Mem{A::rsi}, A::ymm5);
1286cb93a386Sopenharmony_ci
1287cb93a386Sopenharmony_ci        a.vmovups(A::xmm5, A::Mem{A::rsi});
1288cb93a386Sopenharmony_ci        a.vmovups(A::Mem{A::rsi}, A::xmm5);
1289cb93a386Sopenharmony_ci
1290cb93a386Sopenharmony_ci        a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1291cb93a386Sopenharmony_ci        a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
1292cb93a386Sopenharmony_ci
1293cb93a386Sopenharmony_ci        a.vmovq(A::Mem{A::rdx}, A::xmm15);
1294cb93a386Sopenharmony_ci    },{
1295cb93a386Sopenharmony_ci        /*    VEX    */  /*Op*/  /*  ModRM  */
1296cb93a386Sopenharmony_ci        0xc5,     0xfc,   0x10,  0b00'101'110,
1297cb93a386Sopenharmony_ci        0xc5,     0xfc,   0x11,  0b00'101'110,
1298cb93a386Sopenharmony_ci
1299cb93a386Sopenharmony_ci        0xc5,     0xf8,   0x10,  0b00'101'110,
1300cb93a386Sopenharmony_ci        0xc5,     0xf8,   0x11,  0b00'101'110,
1301cb93a386Sopenharmony_ci
1302cb93a386Sopenharmony_ci        0xc4,0xe2,0x7d,   0x33,  0b00'100'110,
1303cb93a386Sopenharmony_ci        0xc4,0xe2,0x7d,   0x31,  0b00'100'110,
1304cb93a386Sopenharmony_ci
1305cb93a386Sopenharmony_ci        0xc5,     0x79,   0xd6,  0b00'111'010,
1306cb93a386Sopenharmony_ci    });
1307cb93a386Sopenharmony_ci
1308cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1309cb93a386Sopenharmony_ci        a.vmovups(A::ymm5, A::Mem{A::rsp,  0});
1310cb93a386Sopenharmony_ci        a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1311cb93a386Sopenharmony_ci        a.vmovups(A::ymm5, A::Mem{A::rsp,128});
1312cb93a386Sopenharmony_ci
1313cb93a386Sopenharmony_ci        a.vmovups(A::Mem{A::rsp,  0}, A::ymm5);
1314cb93a386Sopenharmony_ci        a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1315cb93a386Sopenharmony_ci        a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
1316cb93a386Sopenharmony_ci    },{
1317cb93a386Sopenharmony_ci        0xc5,0xfc,0x10,0x2c,0x24,
1318cb93a386Sopenharmony_ci        0xc5,0xfc,0x10,0x6c,0x24,0x40,
1319cb93a386Sopenharmony_ci        0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1320cb93a386Sopenharmony_ci
1321cb93a386Sopenharmony_ci        0xc5,0xfc,0x11,0x2c,0x24,
1322cb93a386Sopenharmony_ci        0xc5,0xfc,0x11,0x6c,0x24,0x40,
1323cb93a386Sopenharmony_ci        0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1324cb93a386Sopenharmony_ci    });
1325cb93a386Sopenharmony_ci
1326cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1327cb93a386Sopenharmony_ci        a.movzbq(A::rax, A::Mem{A::rsi});   // Low registers for src and dst.
1328cb93a386Sopenharmony_ci        a.movzbq(A::rax, A::Mem{A::r8,});   // High src register.
1329cb93a386Sopenharmony_ci        a.movzbq(A::r8 , A::Mem{A::rsi});   // High dst register.
1330cb93a386Sopenharmony_ci        a.movzbq(A::r8,  A::Mem{A::rsi, 12});
1331cb93a386Sopenharmony_ci        a.movzbq(A::r8,  A::Mem{A::rsi, 400});
1332cb93a386Sopenharmony_ci
1333cb93a386Sopenharmony_ci        a.movzwq(A::rax, A::Mem{A::rsi});   // Low registers for src and dst.
1334cb93a386Sopenharmony_ci        a.movzwq(A::rax, A::Mem{A::r8,});   // High src register.
1335cb93a386Sopenharmony_ci        a.movzwq(A::r8 , A::Mem{A::rsi});   // High dst register.
1336cb93a386Sopenharmony_ci        a.movzwq(A::r8,  A::Mem{A::rsi, 12});
1337cb93a386Sopenharmony_ci        a.movzwq(A::r8,  A::Mem{A::rsi, 400});
1338cb93a386Sopenharmony_ci
1339cb93a386Sopenharmony_ci        a.vmovd(A::Mem{A::rax}, A::xmm0);
1340cb93a386Sopenharmony_ci        a.vmovd(A::Mem{A::rax}, A::xmm8);
1341cb93a386Sopenharmony_ci        a.vmovd(A::Mem{A::r8 }, A::xmm0);
1342cb93a386Sopenharmony_ci
1343cb93a386Sopenharmony_ci        a.vmovd(A::xmm0, A::Mem{A::rax});
1344cb93a386Sopenharmony_ci        a.vmovd(A::xmm8, A::Mem{A::rax});
1345cb93a386Sopenharmony_ci        a.vmovd(A::xmm0, A::Mem{A::r8 });
1346cb93a386Sopenharmony_ci
1347cb93a386Sopenharmony_ci        a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1348cb93a386Sopenharmony_ci        a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8,  A::TWO });
1349cb93a386Sopenharmony_ci        a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1350cb93a386Sopenharmony_ci
1351cb93a386Sopenharmony_ci        a.vmovd(A::rax, A::xmm0);
1352cb93a386Sopenharmony_ci        a.vmovd(A::rax, A::xmm8);
1353cb93a386Sopenharmony_ci        a.vmovd(A::r8 ,  A::xmm0);
1354cb93a386Sopenharmony_ci
1355cb93a386Sopenharmony_ci        a.vmovd(A::xmm0, A::rax);
1356cb93a386Sopenharmony_ci        a.vmovd(A::xmm8, A::rax);
1357cb93a386Sopenharmony_ci        a.vmovd(A::xmm0, A::r8 );
1358cb93a386Sopenharmony_ci
1359cb93a386Sopenharmony_ci        a.movb(A::Mem{A::rdx}, A::rax);
1360cb93a386Sopenharmony_ci        a.movb(A::Mem{A::rdx}, A::r8 );
1361cb93a386Sopenharmony_ci        a.movb(A::Mem{A::r8 }, A::rax);
1362cb93a386Sopenharmony_ci
1363cb93a386Sopenharmony_ci        a.movb(A::rdx, A::Mem{A::rax});
1364cb93a386Sopenharmony_ci        a.movb(A::rdx, A::Mem{A::r8 });
1365cb93a386Sopenharmony_ci        a.movb(A::r8 , A::Mem{A::rax});
1366cb93a386Sopenharmony_ci
1367cb93a386Sopenharmony_ci        a.movb(A::rdx, 12);
1368cb93a386Sopenharmony_ci        a.movb(A::rax,  4);
1369cb93a386Sopenharmony_ci        a.movb(A::r8 , -1);
1370cb93a386Sopenharmony_ci
1371cb93a386Sopenharmony_ci        a.movb(A::Mem{A::rdx}, 12);
1372cb93a386Sopenharmony_ci        a.movb(A::Mem{A::rax},  4);
1373cb93a386Sopenharmony_ci        a.movb(A::Mem{A::r8 }, -1);
1374cb93a386Sopenharmony_ci    },{
1375cb93a386Sopenharmony_ci        0x48,0x0f,0xb6,0x06,     // movzbq (%rsi), %rax
1376cb93a386Sopenharmony_ci        0x49,0x0f,0xb6,0x00,
1377cb93a386Sopenharmony_ci        0x4c,0x0f,0xb6,0x06,
1378cb93a386Sopenharmony_ci        0x4c,0x0f,0xb6,0x46, 12,
1379cb93a386Sopenharmony_ci        0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1380cb93a386Sopenharmony_ci
1381cb93a386Sopenharmony_ci        0x48,0x0f,0xb7,0x06,    // movzwq (%rsi), %rax
1382cb93a386Sopenharmony_ci        0x49,0x0f,0xb7,0x00,
1383cb93a386Sopenharmony_ci        0x4c,0x0f,0xb7,0x06,
1384cb93a386Sopenharmony_ci        0x4c,0x0f,0xb7,0x46, 12,
1385cb93a386Sopenharmony_ci        0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
1386cb93a386Sopenharmony_ci
1387cb93a386Sopenharmony_ci        0xc5,0xf9,0x7e,0x00,
1388cb93a386Sopenharmony_ci        0xc5,0x79,0x7e,0x00,
1389cb93a386Sopenharmony_ci        0xc4,0xc1,0x79,0x7e,0x00,
1390cb93a386Sopenharmony_ci
1391cb93a386Sopenharmony_ci        0xc5,0xf9,0x6e,0x00,
1392cb93a386Sopenharmony_ci        0xc5,0x79,0x6e,0x00,
1393cb93a386Sopenharmony_ci        0xc4,0xc1,0x79,0x6e,0x00,
1394cb93a386Sopenharmony_ci
1395cb93a386Sopenharmony_ci        0xc5,0xf9,0x6e,0x04,0x88,
1396cb93a386Sopenharmony_ci        0xc4,0x21,0x79,0x6e,0x3c,0x40,
1397cb93a386Sopenharmony_ci        0xc4,0xc1,0x79,0x6e,0x04,0x08,
1398cb93a386Sopenharmony_ci
1399cb93a386Sopenharmony_ci        0xc5,0xf9,0x7e,0xc0,
1400cb93a386Sopenharmony_ci        0xc5,0x79,0x7e,0xc0,
1401cb93a386Sopenharmony_ci        0xc4,0xc1,0x79,0x7e,0xc0,
1402cb93a386Sopenharmony_ci
1403cb93a386Sopenharmony_ci        0xc5,0xf9,0x6e,0xc0,
1404cb93a386Sopenharmony_ci        0xc5,0x79,0x6e,0xc0,
1405cb93a386Sopenharmony_ci        0xc4,0xc1,0x79,0x6e,0xc0,
1406cb93a386Sopenharmony_ci
1407cb93a386Sopenharmony_ci        0x48 ,0x88, 0x02,
1408cb93a386Sopenharmony_ci        0x4c, 0x88, 0x02,
1409cb93a386Sopenharmony_ci        0x49, 0x88, 0x00,
1410cb93a386Sopenharmony_ci
1411cb93a386Sopenharmony_ci        0x48 ,0x8a, 0x10,
1412cb93a386Sopenharmony_ci        0x49, 0x8a, 0x10,
1413cb93a386Sopenharmony_ci        0x4c, 0x8a, 0x00,
1414cb93a386Sopenharmony_ci
1415cb93a386Sopenharmony_ci        0x48, 0xc6, 0xc2, 0x0c,
1416cb93a386Sopenharmony_ci        0x48, 0xc6, 0xc0, 0x04,
1417cb93a386Sopenharmony_ci        0x49, 0xc6, 0xc0, 0xff,
1418cb93a386Sopenharmony_ci
1419cb93a386Sopenharmony_ci        0x48, 0xc6, 0x02, 0x0c,
1420cb93a386Sopenharmony_ci        0x48, 0xc6, 0x00, 0x04,
1421cb93a386Sopenharmony_ci        0x49, 0xc6, 0x00, 0xff,
1422cb93a386Sopenharmony_ci    });
1423cb93a386Sopenharmony_ci
1424cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1425cb93a386Sopenharmony_ci        a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1);   // vpinsrd $1, (%rsi), %xmm8, %xmm1
1426cb93a386Sopenharmony_ci        a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3);   // vpinsrd $3, (%r8), %xmm1, %xmm8;
1427cb93a386Sopenharmony_ci
1428cb93a386Sopenharmony_ci        a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4);   // vpinsrw $4, (%rsi), %xmm8, %xmm1
1429cb93a386Sopenharmony_ci        a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12);  // vpinrsw $12, (%r8), %xmm1, %xmm8
1430cb93a386Sopenharmony_ci
1431cb93a386Sopenharmony_ci        a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4);   // vpinsrb $4, (%rsi), %xmm8, %xmm1
1432cb93a386Sopenharmony_ci        a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12);  // vpinsrb $12, (%r8), %xmm1, %xmm8
1433cb93a386Sopenharmony_ci
1434cb93a386Sopenharmony_ci        a.vextracti128(A::xmm1, A::ymm8, 1);  // vextracti128 $1, %ymm8, %xmm1
1435cb93a386Sopenharmony_ci        a.vextracti128(A::xmm8, A::ymm1, 0);  // vextracti128 $0, %ymm1, %xmm8
1436cb93a386Sopenharmony_ci
1437cb93a386Sopenharmony_ci        a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3);  // vpextrd  $3, %xmm8, (%rsi)
1438cb93a386Sopenharmony_ci        a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2);  // vpextrd  $2, %xmm1, (%r8)
1439cb93a386Sopenharmony_ci
1440cb93a386Sopenharmony_ci        a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1441cb93a386Sopenharmony_ci        a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
1442cb93a386Sopenharmony_ci
1443cb93a386Sopenharmony_ci        a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1444cb93a386Sopenharmony_ci        a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
1445cb93a386Sopenharmony_ci    },{
1446cb93a386Sopenharmony_ci        0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1447cb93a386Sopenharmony_ci        0xc4,0x43,0x71, 0x22, 0x00, 3,
1448cb93a386Sopenharmony_ci
1449cb93a386Sopenharmony_ci        0xc5,0xb9,      0xc4, 0x0e,  4,
1450cb93a386Sopenharmony_ci        0xc4,0x41,0x71, 0xc4, 0x00, 12,
1451cb93a386Sopenharmony_ci
1452cb93a386Sopenharmony_ci        0xc4,0xe3,0x39, 0x20, 0x0e,  4,
1453cb93a386Sopenharmony_ci        0xc4,0x43,0x71, 0x20, 0x00, 12,
1454cb93a386Sopenharmony_ci
1455cb93a386Sopenharmony_ci        0xc4,0x63,0x7d,0x39,0xc1, 1,
1456cb93a386Sopenharmony_ci        0xc4,0xc3,0x7d,0x39,0xc8, 0,
1457cb93a386Sopenharmony_ci
1458cb93a386Sopenharmony_ci        0xc4,0x63,0x79,0x16,0x06, 3,
1459cb93a386Sopenharmony_ci        0xc4,0xc3,0x79,0x16,0x08, 2,
1460cb93a386Sopenharmony_ci
1461cb93a386Sopenharmony_ci        0xc4,0x63,0x79, 0x15, 0x06,  7,
1462cb93a386Sopenharmony_ci        0xc4,0xc3,0x79, 0x15, 0x08, 15,
1463cb93a386Sopenharmony_ci
1464cb93a386Sopenharmony_ci        0xc4,0x63,0x79, 0x14, 0x06,  7,
1465cb93a386Sopenharmony_ci        0xc4,0xc3,0x79, 0x14, 0x08, 15,
1466cb93a386Sopenharmony_ci    });
1467cb93a386Sopenharmony_ci
1468cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1469cb93a386Sopenharmony_ci        a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1470cb93a386Sopenharmony_ci    },{
1471cb93a386Sopenharmony_ci        0xc5, 0x9d, 0xdf, 0xda,
1472cb93a386Sopenharmony_ci    });
1473cb93a386Sopenharmony_ci
1474cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1475cb93a386Sopenharmony_ci        A::Label l;
1476cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::ymm2);                                // vmovdqa %ymm2         , %ymm3
1477cb93a386Sopenharmony_ci
1478cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsi});                         // vmovdqa  (%rsi)       , %ymm3
1479cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsp});                         // vmovdqa  (%rsp)       , %ymm3
1480cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::r11});                         // vmovdqa  (%r11)       , %ymm3
1481cb93a386Sopenharmony_ci
1482cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsi,  4});                     // vmovdqa 4(%rsi)       , %ymm3
1483cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsp,  4});                     // vmovdqa 4(%rsp)       , %ymm3
1484cb93a386Sopenharmony_ci
1485cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsi,  4, A::rax, A::EIGHT});   // vmovdqa 4(%rsi,%rax,8), %ymm3
1486cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::r11,  4, A::rax, A::TWO  });   // vmovdqa 4(%r11,%rax,2), %ymm3
1487cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsi,  4, A::r11, A::FOUR });   // vmovdqa 4(%rsi,%r11,4), %ymm3
1488cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsi,  4, A::r11, A::ONE  });   // vmovdqa 4(%rsi,%r11,1), %ymm3
1489cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsi,  4, A::r11});             // vmovdqa 4(%rsi,%r11)  , %ymm3
1490cb93a386Sopenharmony_ci
1491cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsi,  64, A::r11});            // vmovdqa  64(%rsi,%r11), %ymm3
1492cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11});            // vmovdqa 128(%rsi,%r11), %ymm3
1493cb93a386Sopenharmony_ci        a.vmovdqa(A::ymm3, &l);                                     // vmovdqa  16(%rip)     , %ymm3
1494cb93a386Sopenharmony_ci
1495cb93a386Sopenharmony_ci        a.vcvttps2dq(A::ymm3, A::ymm2);
1496cb93a386Sopenharmony_ci        a.vcvtdq2ps (A::ymm3, A::ymm2);
1497cb93a386Sopenharmony_ci        a.vcvtps2dq (A::ymm3, A::ymm2);
1498cb93a386Sopenharmony_ci        a.vsqrtps   (A::ymm3, A::ymm2);
1499cb93a386Sopenharmony_ci        a.label(&l);
1500cb93a386Sopenharmony_ci    },{
1501cb93a386Sopenharmony_ci        0xc5,0xfd,0x6f,0xda,
1502cb93a386Sopenharmony_ci
1503cb93a386Sopenharmony_ci        0xc5,0xfd,0x6f,0x1e,
1504cb93a386Sopenharmony_ci        0xc5,0xfd,0x6f,0x1c,0x24,
1505cb93a386Sopenharmony_ci        0xc4,0xc1,0x7d,0x6f,0x1b,
1506cb93a386Sopenharmony_ci
1507cb93a386Sopenharmony_ci        0xc5,0xfd,0x6f,0x5e,0x04,
1508cb93a386Sopenharmony_ci        0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1509cb93a386Sopenharmony_ci
1510cb93a386Sopenharmony_ci        0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1511cb93a386Sopenharmony_ci        0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1512cb93a386Sopenharmony_ci        0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1513cb93a386Sopenharmony_ci        0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1514cb93a386Sopenharmony_ci        0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1515cb93a386Sopenharmony_ci
1516cb93a386Sopenharmony_ci        0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1517cb93a386Sopenharmony_ci        0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1518cb93a386Sopenharmony_ci
1519cb93a386Sopenharmony_ci        0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1520cb93a386Sopenharmony_ci
1521cb93a386Sopenharmony_ci        0xc5,0xfe,0x5b,0xda,
1522cb93a386Sopenharmony_ci        0xc5,0xfc,0x5b,0xda,
1523cb93a386Sopenharmony_ci        0xc5,0xfd,0x5b,0xda,
1524cb93a386Sopenharmony_ci        0xc5,0xfc,0x51,0xda,
1525cb93a386Sopenharmony_ci    });
1526cb93a386Sopenharmony_ci
1527cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1528cb93a386Sopenharmony_ci        a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1529cb93a386Sopenharmony_ci        a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1530cb93a386Sopenharmony_ci
1531cb93a386Sopenharmony_ci        a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1532cb93a386Sopenharmony_ci        a.vcvtph2ps(A::ymm2, A::xmm3);
1533cb93a386Sopenharmony_ci    },{
1534cb93a386Sopenharmony_ci        0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1535cb93a386Sopenharmony_ci        0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1536cb93a386Sopenharmony_ci
1537cb93a386Sopenharmony_ci        0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1538cb93a386Sopenharmony_ci        0xc4,0xe2,0x7d,0x13,0xd3,
1539cb93a386Sopenharmony_ci    });
1540cb93a386Sopenharmony_ci
1541cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1542cb93a386Sopenharmony_ci        a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1543cb93a386Sopenharmony_ci        a.vgatherdps(A::ymm0 , A::ONE  , A::ymm2 , A::rax, A::ymm1 );
1544cb93a386Sopenharmony_ci        a.vgatherdps(A::ymm10, A::ONE  , A::ymm2 , A::rax, A::ymm1 );
1545cb93a386Sopenharmony_ci        a.vgatherdps(A::ymm0 , A::ONE  , A::ymm12, A::rax, A::ymm1 );
1546cb93a386Sopenharmony_ci        a.vgatherdps(A::ymm0 , A::ONE  , A::ymm2 , A::r9 , A::ymm1 );
1547cb93a386Sopenharmony_ci        a.vgatherdps(A::ymm0 , A::ONE  , A::ymm2 , A::rax, A::ymm12);
1548cb93a386Sopenharmony_ci        a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1549cb93a386Sopenharmony_ci    },{
1550cb93a386Sopenharmony_ci        0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1551cb93a386Sopenharmony_ci        0xc4,0xe2,0x75,0x92,0x04,0x10,
1552cb93a386Sopenharmony_ci        0xc4,0x62,0x75,0x92,0x14,0x10,
1553cb93a386Sopenharmony_ci        0xc4,0xa2,0x75,0x92,0x04,0x20,
1554cb93a386Sopenharmony_ci        0xc4,0xc2,0x75,0x92,0x04,0x11,
1555cb93a386Sopenharmony_ci        0xc4,0xe2,0x1d,0x92,0x04,0x10,
1556cb93a386Sopenharmony_ci        0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1557cb93a386Sopenharmony_ci    });
1558cb93a386Sopenharmony_ci
1559cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1560cb93a386Sopenharmony_ci        a.mov(A::rax, A::Mem{A::rdi,   0});
1561cb93a386Sopenharmony_ci        a.mov(A::rax, A::Mem{A::rdi,   1});
1562cb93a386Sopenharmony_ci        a.mov(A::rax, A::Mem{A::rdi, 512});
1563cb93a386Sopenharmony_ci        a.mov(A::r15, A::Mem{A::r13,  42});
1564cb93a386Sopenharmony_ci        a.mov(A::rax, A::Mem{A::r13,  42});
1565cb93a386Sopenharmony_ci        a.mov(A::r15, A::Mem{A::rax,  42});
1566cb93a386Sopenharmony_ci        a.mov(A::rax, 1);
1567cb93a386Sopenharmony_ci        a.mov(A::rax, A::rcx);
1568cb93a386Sopenharmony_ci    },{
1569cb93a386Sopenharmony_ci        0x48, 0x8b, 0x07,
1570cb93a386Sopenharmony_ci        0x48, 0x8b, 0x47, 0x01,
1571cb93a386Sopenharmony_ci        0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1572cb93a386Sopenharmony_ci        0x4d, 0x8b, 0x7d, 0x2a,
1573cb93a386Sopenharmony_ci        0x49, 0x8b, 0x45, 0x2a,
1574cb93a386Sopenharmony_ci        0x4c, 0x8b, 0x78, 0x2a,
1575cb93a386Sopenharmony_ci        0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1576cb93a386Sopenharmony_ci        0x48, 0x89, 0xc8,
1577cb93a386Sopenharmony_ci    });
1578cb93a386Sopenharmony_ci
1579cb93a386Sopenharmony_ci    // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1580cb93a386Sopenharmony_ci
1581cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1582cb93a386Sopenharmony_ci        a.and16b(A::v4, A::v3, A::v1);
1583cb93a386Sopenharmony_ci        a.orr16b(A::v4, A::v3, A::v1);
1584cb93a386Sopenharmony_ci        a.eor16b(A::v4, A::v3, A::v1);
1585cb93a386Sopenharmony_ci        a.bic16b(A::v4, A::v3, A::v1);
1586cb93a386Sopenharmony_ci        a.bsl16b(A::v4, A::v3, A::v1);
1587cb93a386Sopenharmony_ci        a.not16b(A::v4, A::v3);
1588cb93a386Sopenharmony_ci
1589cb93a386Sopenharmony_ci        a.add4s(A::v4, A::v3, A::v1);
1590cb93a386Sopenharmony_ci        a.sub4s(A::v4, A::v3, A::v1);
1591cb93a386Sopenharmony_ci        a.mul4s(A::v4, A::v3, A::v1);
1592cb93a386Sopenharmony_ci
1593cb93a386Sopenharmony_ci        a.cmeq4s(A::v4, A::v3, A::v1);
1594cb93a386Sopenharmony_ci        a.cmgt4s(A::v4, A::v3, A::v1);
1595cb93a386Sopenharmony_ci
1596cb93a386Sopenharmony_ci        a.sub8h(A::v4, A::v3, A::v1);
1597cb93a386Sopenharmony_ci        a.mul8h(A::v4, A::v3, A::v1);
1598cb93a386Sopenharmony_ci
1599cb93a386Sopenharmony_ci        a.fadd4s(A::v4, A::v3, A::v1);
1600cb93a386Sopenharmony_ci        a.fsub4s(A::v4, A::v3, A::v1);
1601cb93a386Sopenharmony_ci        a.fmul4s(A::v4, A::v3, A::v1);
1602cb93a386Sopenharmony_ci        a.fdiv4s(A::v4, A::v3, A::v1);
1603cb93a386Sopenharmony_ci        a.fmin4s(A::v4, A::v3, A::v1);
1604cb93a386Sopenharmony_ci        a.fmax4s(A::v4, A::v3, A::v1);
1605cb93a386Sopenharmony_ci
1606cb93a386Sopenharmony_ci        a.fneg4s (A::v4, A::v3);
1607cb93a386Sopenharmony_ci        a.fsqrt4s(A::v4, A::v3);
1608cb93a386Sopenharmony_ci
1609cb93a386Sopenharmony_ci        a.fmla4s(A::v4, A::v3, A::v1);
1610cb93a386Sopenharmony_ci        a.fmls4s(A::v4, A::v3, A::v1);
1611cb93a386Sopenharmony_ci
1612cb93a386Sopenharmony_ci        a.fcmeq4s(A::v4, A::v3, A::v1);
1613cb93a386Sopenharmony_ci        a.fcmgt4s(A::v4, A::v3, A::v1);
1614cb93a386Sopenharmony_ci        a.fcmge4s(A::v4, A::v3, A::v1);
1615cb93a386Sopenharmony_ci    },{
1616cb93a386Sopenharmony_ci        0x64,0x1c,0x21,0x4e,
1617cb93a386Sopenharmony_ci        0x64,0x1c,0xa1,0x4e,
1618cb93a386Sopenharmony_ci        0x64,0x1c,0x21,0x6e,
1619cb93a386Sopenharmony_ci        0x64,0x1c,0x61,0x4e,
1620cb93a386Sopenharmony_ci        0x64,0x1c,0x61,0x6e,
1621cb93a386Sopenharmony_ci        0x64,0x58,0x20,0x6e,
1622cb93a386Sopenharmony_ci
1623cb93a386Sopenharmony_ci        0x64,0x84,0xa1,0x4e,
1624cb93a386Sopenharmony_ci        0x64,0x84,0xa1,0x6e,
1625cb93a386Sopenharmony_ci        0x64,0x9c,0xa1,0x4e,
1626cb93a386Sopenharmony_ci
1627cb93a386Sopenharmony_ci        0x64,0x8c,0xa1,0x6e,
1628cb93a386Sopenharmony_ci        0x64,0x34,0xa1,0x4e,
1629cb93a386Sopenharmony_ci
1630cb93a386Sopenharmony_ci        0x64,0x84,0x61,0x6e,
1631cb93a386Sopenharmony_ci        0x64,0x9c,0x61,0x4e,
1632cb93a386Sopenharmony_ci
1633cb93a386Sopenharmony_ci        0x64,0xd4,0x21,0x4e,
1634cb93a386Sopenharmony_ci        0x64,0xd4,0xa1,0x4e,
1635cb93a386Sopenharmony_ci        0x64,0xdc,0x21,0x6e,
1636cb93a386Sopenharmony_ci        0x64,0xfc,0x21,0x6e,
1637cb93a386Sopenharmony_ci        0x64,0xf4,0xa1,0x4e,
1638cb93a386Sopenharmony_ci        0x64,0xf4,0x21,0x4e,
1639cb93a386Sopenharmony_ci
1640cb93a386Sopenharmony_ci        0x64,0xf8,0xa0,0x6e,
1641cb93a386Sopenharmony_ci        0x64,0xf8,0xa1,0x6e,
1642cb93a386Sopenharmony_ci
1643cb93a386Sopenharmony_ci        0x64,0xcc,0x21,0x4e,
1644cb93a386Sopenharmony_ci        0x64,0xcc,0xa1,0x4e,
1645cb93a386Sopenharmony_ci
1646cb93a386Sopenharmony_ci        0x64,0xe4,0x21,0x4e,
1647cb93a386Sopenharmony_ci        0x64,0xe4,0xa1,0x6e,
1648cb93a386Sopenharmony_ci        0x64,0xe4,0x21,0x6e,
1649cb93a386Sopenharmony_ci    });
1650cb93a386Sopenharmony_ci
1651cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1652cb93a386Sopenharmony_ci        a.shl4s(A::v4, A::v3,  0);
1653cb93a386Sopenharmony_ci        a.shl4s(A::v4, A::v3,  1);
1654cb93a386Sopenharmony_ci        a.shl4s(A::v4, A::v3,  8);
1655cb93a386Sopenharmony_ci        a.shl4s(A::v4, A::v3, 16);
1656cb93a386Sopenharmony_ci        a.shl4s(A::v4, A::v3, 31);
1657cb93a386Sopenharmony_ci
1658cb93a386Sopenharmony_ci        a.sshr4s(A::v4, A::v3,  1);
1659cb93a386Sopenharmony_ci        a.sshr4s(A::v4, A::v3,  8);
1660cb93a386Sopenharmony_ci        a.sshr4s(A::v4, A::v3, 31);
1661cb93a386Sopenharmony_ci
1662cb93a386Sopenharmony_ci        a.ushr4s(A::v4, A::v3,  1);
1663cb93a386Sopenharmony_ci        a.ushr4s(A::v4, A::v3,  8);
1664cb93a386Sopenharmony_ci        a.ushr4s(A::v4, A::v3, 31);
1665cb93a386Sopenharmony_ci
1666cb93a386Sopenharmony_ci        a.ushr8h(A::v4, A::v3,  1);
1667cb93a386Sopenharmony_ci        a.ushr8h(A::v4, A::v3,  8);
1668cb93a386Sopenharmony_ci        a.ushr8h(A::v4, A::v3, 15);
1669cb93a386Sopenharmony_ci    },{
1670cb93a386Sopenharmony_ci        0x64,0x54,0x20,0x4f,
1671cb93a386Sopenharmony_ci        0x64,0x54,0x21,0x4f,
1672cb93a386Sopenharmony_ci        0x64,0x54,0x28,0x4f,
1673cb93a386Sopenharmony_ci        0x64,0x54,0x30,0x4f,
1674cb93a386Sopenharmony_ci        0x64,0x54,0x3f,0x4f,
1675cb93a386Sopenharmony_ci
1676cb93a386Sopenharmony_ci        0x64,0x04,0x3f,0x4f,
1677cb93a386Sopenharmony_ci        0x64,0x04,0x38,0x4f,
1678cb93a386Sopenharmony_ci        0x64,0x04,0x21,0x4f,
1679cb93a386Sopenharmony_ci
1680cb93a386Sopenharmony_ci        0x64,0x04,0x3f,0x6f,
1681cb93a386Sopenharmony_ci        0x64,0x04,0x38,0x6f,
1682cb93a386Sopenharmony_ci        0x64,0x04,0x21,0x6f,
1683cb93a386Sopenharmony_ci
1684cb93a386Sopenharmony_ci        0x64,0x04,0x1f,0x6f,
1685cb93a386Sopenharmony_ci        0x64,0x04,0x18,0x6f,
1686cb93a386Sopenharmony_ci        0x64,0x04,0x11,0x6f,
1687cb93a386Sopenharmony_ci    });
1688cb93a386Sopenharmony_ci
1689cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1690cb93a386Sopenharmony_ci        a.sli4s(A::v4, A::v3,  0);
1691cb93a386Sopenharmony_ci        a.sli4s(A::v4, A::v3,  1);
1692cb93a386Sopenharmony_ci        a.sli4s(A::v4, A::v3,  8);
1693cb93a386Sopenharmony_ci        a.sli4s(A::v4, A::v3, 16);
1694cb93a386Sopenharmony_ci        a.sli4s(A::v4, A::v3, 31);
1695cb93a386Sopenharmony_ci    },{
1696cb93a386Sopenharmony_ci        0x64,0x54,0x20,0x6f,
1697cb93a386Sopenharmony_ci        0x64,0x54,0x21,0x6f,
1698cb93a386Sopenharmony_ci        0x64,0x54,0x28,0x6f,
1699cb93a386Sopenharmony_ci        0x64,0x54,0x30,0x6f,
1700cb93a386Sopenharmony_ci        0x64,0x54,0x3f,0x6f,
1701cb93a386Sopenharmony_ci    });
1702cb93a386Sopenharmony_ci
1703cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1704cb93a386Sopenharmony_ci        a.scvtf4s (A::v4, A::v3);
1705cb93a386Sopenharmony_ci        a.fcvtzs4s(A::v4, A::v3);
1706cb93a386Sopenharmony_ci        a.fcvtns4s(A::v4, A::v3);
1707cb93a386Sopenharmony_ci        a.frintp4s(A::v4, A::v3);
1708cb93a386Sopenharmony_ci        a.frintm4s(A::v4, A::v3);
1709cb93a386Sopenharmony_ci        a.fcvtn   (A::v4, A::v3);
1710cb93a386Sopenharmony_ci        a.fcvtl   (A::v4, A::v3);
1711cb93a386Sopenharmony_ci    },{
1712cb93a386Sopenharmony_ci        0x64,0xd8,0x21,0x4e,
1713cb93a386Sopenharmony_ci        0x64,0xb8,0xa1,0x4e,
1714cb93a386Sopenharmony_ci        0x64,0xa8,0x21,0x4e,
1715cb93a386Sopenharmony_ci        0x64,0x88,0xa1,0x4e,
1716cb93a386Sopenharmony_ci        0x64,0x98,0x21,0x4e,
1717cb93a386Sopenharmony_ci        0x64,0x68,0x21,0x0e,
1718cb93a386Sopenharmony_ci        0x64,0x78,0x21,0x0e,
1719cb93a386Sopenharmony_ci    });
1720cb93a386Sopenharmony_ci
1721cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1722cb93a386Sopenharmony_ci        a.sub (A::sp, A::sp, 32);  // sub   sp, sp, #32
1723cb93a386Sopenharmony_ci        a.strq(A::v0, A::sp, 1);   // str   q0, [sp, #16]
1724cb93a386Sopenharmony_ci        a.strq(A::v1, A::sp);      // str   q1, [sp]
1725cb93a386Sopenharmony_ci        a.strd(A::v0, A::sp, 6);   // str   s0, [sp, #48]
1726cb93a386Sopenharmony_ci        a.strs(A::v0, A::sp, 6);   // str   s0, [sp, #24]
1727cb93a386Sopenharmony_ci        a.strh(A::v0, A::sp, 10);  // str   h0, [sp, #20]
1728cb93a386Sopenharmony_ci        a.strb(A::v0, A::sp, 47);  // str   b0, [sp, #47]
1729cb93a386Sopenharmony_ci        a.ldrb(A::v9, A::sp, 42);  // ldr   b9, [sp, #42]
1730cb93a386Sopenharmony_ci        a.ldrh(A::v9, A::sp, 47);  // ldr   h9, [sp, #94]
1731cb93a386Sopenharmony_ci        a.ldrs(A::v7, A::sp, 10);  // ldr   s7, [sp, #40]
1732cb93a386Sopenharmony_ci        a.ldrd(A::v7, A::sp,  1);  // ldr   d7, [sp, #8]
1733cb93a386Sopenharmony_ci        a.ldrq(A::v5, A::sp, 128); // ldr   q5, [sp, #2048]
1734cb93a386Sopenharmony_ci        a.add (A::sp, A::sp, 32);  // add   sp, sp, #32
1735cb93a386Sopenharmony_ci    },{
1736cb93a386Sopenharmony_ci         0xff,0x83,0x00,0xd1,
1737cb93a386Sopenharmony_ci         0xe0,0x07,0x80,0x3d,
1738cb93a386Sopenharmony_ci         0xe1,0x03,0x80,0x3d,
1739cb93a386Sopenharmony_ci         0xe0,0x1b,0x00,0xfd,
1740cb93a386Sopenharmony_ci         0xe0,0x1b,0x00,0xbd,
1741cb93a386Sopenharmony_ci         0xe0,0x2b,0x00,0x7d,
1742cb93a386Sopenharmony_ci         0xe0,0xbf,0x00,0x3d,
1743cb93a386Sopenharmony_ci         0xe9,0xab,0x40,0x3d,
1744cb93a386Sopenharmony_ci         0xe9,0xbf,0x40,0x7d,
1745cb93a386Sopenharmony_ci         0xe7,0x2b,0x40,0xbd,
1746cb93a386Sopenharmony_ci         0xe7,0x07,0x40,0xfd,
1747cb93a386Sopenharmony_ci         0xe5,0x03,0xc2,0x3d,
1748cb93a386Sopenharmony_ci         0xff,0x83,0x00,0x91,
1749cb93a386Sopenharmony_ci    });
1750cb93a386Sopenharmony_ci
1751cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1752cb93a386Sopenharmony_ci        a.brk(0);
1753cb93a386Sopenharmony_ci        a.brk(65535);
1754cb93a386Sopenharmony_ci
1755cb93a386Sopenharmony_ci        a.ret(A::x30);   // Conventional ret using link register.
1756cb93a386Sopenharmony_ci        a.ret(A::x13);   // Can really return using any register if we like.
1757cb93a386Sopenharmony_ci
1758cb93a386Sopenharmony_ci        a.add(A::x2, A::x2,  4);
1759cb93a386Sopenharmony_ci        a.add(A::x3, A::x2, 32);
1760cb93a386Sopenharmony_ci
1761cb93a386Sopenharmony_ci        a.sub(A::x2, A::x2, 4);
1762cb93a386Sopenharmony_ci        a.sub(A::x3, A::x2, 32);
1763cb93a386Sopenharmony_ci
1764cb93a386Sopenharmony_ci        a.subs(A::x2, A::x2,  4);
1765cb93a386Sopenharmony_ci        a.subs(A::x3, A::x2, 32);
1766cb93a386Sopenharmony_ci
1767cb93a386Sopenharmony_ci        a.subs(A::xzr, A::x2, 4);  // These are actually the same instruction!
1768cb93a386Sopenharmony_ci        a.cmp(A::x2, 4);
1769cb93a386Sopenharmony_ci
1770cb93a386Sopenharmony_ci        A::Label l;
1771cb93a386Sopenharmony_ci        a.label(&l);
1772cb93a386Sopenharmony_ci        a.bne(&l);
1773cb93a386Sopenharmony_ci        a.bne(&l);
1774cb93a386Sopenharmony_ci        a.blt(&l);
1775cb93a386Sopenharmony_ci        a.b(&l);
1776cb93a386Sopenharmony_ci        a.cbnz(A::x2, &l);
1777cb93a386Sopenharmony_ci        a.cbz(A::x2, &l);
1778cb93a386Sopenharmony_ci
1779cb93a386Sopenharmony_ci        a.add(A::x3, A::x2, A::x1);             // add x3,x2,x1
1780cb93a386Sopenharmony_ci        a.add(A::x3, A::x2, A::x1, A::ASR, 3);  // add x3,x2,x1, asr #3
1781cb93a386Sopenharmony_ci    },{
1782cb93a386Sopenharmony_ci        0x00,0x00,0x20,0xd4,
1783cb93a386Sopenharmony_ci        0xe0,0xff,0x3f,0xd4,
1784cb93a386Sopenharmony_ci
1785cb93a386Sopenharmony_ci        0xc0,0x03,0x5f,0xd6,
1786cb93a386Sopenharmony_ci        0xa0,0x01,0x5f,0xd6,
1787cb93a386Sopenharmony_ci
1788cb93a386Sopenharmony_ci        0x42,0x10,0x00,0x91,
1789cb93a386Sopenharmony_ci        0x43,0x80,0x00,0x91,
1790cb93a386Sopenharmony_ci
1791cb93a386Sopenharmony_ci        0x42,0x10,0x00,0xd1,
1792cb93a386Sopenharmony_ci        0x43,0x80,0x00,0xd1,
1793cb93a386Sopenharmony_ci
1794cb93a386Sopenharmony_ci        0x42,0x10,0x00,0xf1,
1795cb93a386Sopenharmony_ci        0x43,0x80,0x00,0xf1,
1796cb93a386Sopenharmony_ci
1797cb93a386Sopenharmony_ci        0x5f,0x10,0x00,0xf1,
1798cb93a386Sopenharmony_ci        0x5f,0x10,0x00,0xf1,
1799cb93a386Sopenharmony_ci
1800cb93a386Sopenharmony_ci        0x01,0x00,0x00,0x54,   // b.ne #0
1801cb93a386Sopenharmony_ci        0xe1,0xff,0xff,0x54,   // b.ne #-4
1802cb93a386Sopenharmony_ci        0xcb,0xff,0xff,0x54,   // b.lt #-8
1803cb93a386Sopenharmony_ci        0xae,0xff,0xff,0x54,   // b.al #-12
1804cb93a386Sopenharmony_ci        0x82,0xff,0xff,0xb5,   // cbnz x2, #-16
1805cb93a386Sopenharmony_ci        0x62,0xff,0xff,0xb4,   // cbz x2, #-20
1806cb93a386Sopenharmony_ci
1807cb93a386Sopenharmony_ci        0x43,0x00,0x01,0x8b,
1808cb93a386Sopenharmony_ci        0x43,0x0c,0x81,0x8b,
1809cb93a386Sopenharmony_ci    });
1810cb93a386Sopenharmony_ci
1811cb93a386Sopenharmony_ci    // Can we cbz() to a not-yet-defined label?
1812cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1813cb93a386Sopenharmony_ci        A::Label l;
1814cb93a386Sopenharmony_ci        a.cbz(A::x2, &l);
1815cb93a386Sopenharmony_ci        a.add(A::x3, A::x2, 32);
1816cb93a386Sopenharmony_ci        a.label(&l);
1817cb93a386Sopenharmony_ci        a.ret(A::x30);
1818cb93a386Sopenharmony_ci    },{
1819cb93a386Sopenharmony_ci        0x42,0x00,0x00,0xb4,  // cbz x2, #8
1820cb93a386Sopenharmony_ci        0x43,0x80,0x00,0x91,  // add x3, x2, #32
1821cb93a386Sopenharmony_ci        0xc0,0x03,0x5f,0xd6,  // ret
1822cb93a386Sopenharmony_ci    });
1823cb93a386Sopenharmony_ci
1824cb93a386Sopenharmony_ci    // If we start a label as a backward label,
1825cb93a386Sopenharmony_ci    // can we redefine it to be a future label?
1826cb93a386Sopenharmony_ci    // (Not sure this is useful... just want to test it works.)
1827cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1828cb93a386Sopenharmony_ci        A::Label l1;
1829cb93a386Sopenharmony_ci        a.label(&l1);
1830cb93a386Sopenharmony_ci        a.add(A::x3, A::x2, 32);
1831cb93a386Sopenharmony_ci        a.cbz(A::x2, &l1);          // This will jump backward... nothing sneaky.
1832cb93a386Sopenharmony_ci
1833cb93a386Sopenharmony_ci        A::Label l2;                // Start off the same...
1834cb93a386Sopenharmony_ci        a.label(&l2);
1835cb93a386Sopenharmony_ci        a.add(A::x3, A::x2, 32);
1836cb93a386Sopenharmony_ci        a.cbz(A::x2, &l2);          // Looks like this will go backward...
1837cb93a386Sopenharmony_ci        a.add(A::x2, A::x2, 4);
1838cb93a386Sopenharmony_ci        a.add(A::x3, A::x2, 32);
1839cb93a386Sopenharmony_ci        a.label(&l2);               // But no... actually forward!  What a switcheroo!
1840cb93a386Sopenharmony_ci    },{
1841cb93a386Sopenharmony_ci        0x43,0x80,0x00,0x91,  // add x3, x2, #32
1842cb93a386Sopenharmony_ci        0xe2,0xff,0xff,0xb4,  // cbz x2, #-4
1843cb93a386Sopenharmony_ci
1844cb93a386Sopenharmony_ci        0x43,0x80,0x00,0x91,  // add x3, x2, #32
1845cb93a386Sopenharmony_ci        0x62,0x00,0x00,0xb4,  // cbz x2, #12
1846cb93a386Sopenharmony_ci        0x42,0x10,0x00,0x91,  // add x2, x2, #4
1847cb93a386Sopenharmony_ci        0x43,0x80,0x00,0x91,  // add x3, x2, #32
1848cb93a386Sopenharmony_ci    });
1849cb93a386Sopenharmony_ci
1850cb93a386Sopenharmony_ci    // Loading from a label on ARM.
1851cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1852cb93a386Sopenharmony_ci        A::Label fore,aft;
1853cb93a386Sopenharmony_ci        a.label(&fore);
1854cb93a386Sopenharmony_ci        a.word(0x01234567);
1855cb93a386Sopenharmony_ci        a.ldrq(A::v1, &fore);
1856cb93a386Sopenharmony_ci        a.ldrq(A::v2, &aft);
1857cb93a386Sopenharmony_ci        a.label(&aft);
1858cb93a386Sopenharmony_ci        a.word(0x76543210);
1859cb93a386Sopenharmony_ci    },{
1860cb93a386Sopenharmony_ci        0x67,0x45,0x23,0x01,
1861cb93a386Sopenharmony_ci        0xe1,0xff,0xff,0x9c,  // ldr q1, #-4
1862cb93a386Sopenharmony_ci        0x22,0x00,0x00,0x9c,  // ldr q2, #4
1863cb93a386Sopenharmony_ci        0x10,0x32,0x54,0x76,
1864cb93a386Sopenharmony_ci    });
1865cb93a386Sopenharmony_ci
1866cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1867cb93a386Sopenharmony_ci        a.ldrq(A::v0, A::x8);
1868cb93a386Sopenharmony_ci        a.strq(A::v0, A::x8);
1869cb93a386Sopenharmony_ci    },{
1870cb93a386Sopenharmony_ci        0x00,0x01,0xc0,0x3d,
1871cb93a386Sopenharmony_ci        0x00,0x01,0x80,0x3d,
1872cb93a386Sopenharmony_ci    });
1873cb93a386Sopenharmony_ci
1874cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1875cb93a386Sopenharmony_ci        a.dup4s  (A::v0, A::x8);
1876cb93a386Sopenharmony_ci        a.ld1r4s (A::v0, A::x8);  // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding
1877cb93a386Sopenharmony_ci        a.ld1r8h (A::v0, A::x8);
1878cb93a386Sopenharmony_ci        a.ld1r16b(A::v0, A::x8);
1879cb93a386Sopenharmony_ci    },{
1880cb93a386Sopenharmony_ci        0x00,0x0d,0x04,0x4e,
1881cb93a386Sopenharmony_ci        0x00,0xc9,0x40,0x4d,
1882cb93a386Sopenharmony_ci        0x00,0xc5,0x40,0x4d,
1883cb93a386Sopenharmony_ci        0x00,0xc1,0x40,0x4d,
1884cb93a386Sopenharmony_ci    });
1885cb93a386Sopenharmony_ci
1886cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1887cb93a386Sopenharmony_ci        a.ld24s(A::v0, A::x8);  // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding
1888cb93a386Sopenharmony_ci        a.ld44s(A::v0, A::x8);
1889cb93a386Sopenharmony_ci        a.st24s(A::v0, A::x8);
1890cb93a386Sopenharmony_ci        a.st44s(A::v0, A::x8);  // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding
1891cb93a386Sopenharmony_ci
1892cb93a386Sopenharmony_ci        a.ld24s(A::v0, A::x8, 0);  //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding
1893cb93a386Sopenharmony_ci        a.ld24s(A::v0, A::x8, 1);
1894cb93a386Sopenharmony_ci        a.ld24s(A::v0, A::x8, 2);
1895cb93a386Sopenharmony_ci        a.ld24s(A::v0, A::x8, 3);
1896cb93a386Sopenharmony_ci
1897cb93a386Sopenharmony_ci        a.ld44s(A::v0, A::x8, 0);  // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8]
1898cb93a386Sopenharmony_ci        a.ld44s(A::v0, A::x8, 1);
1899cb93a386Sopenharmony_ci        a.ld44s(A::v0, A::x8, 2);
1900cb93a386Sopenharmony_ci        a.ld44s(A::v0, A::x8, 3);
1901cb93a386Sopenharmony_ci    },{
1902cb93a386Sopenharmony_ci        0x00,0x89,0x40,0x4c,
1903cb93a386Sopenharmony_ci        0x00,0x09,0x40,0x4c,
1904cb93a386Sopenharmony_ci        0x00,0x89,0x00,0x4c,
1905cb93a386Sopenharmony_ci        0x00,0x09,0x00,0x4c,
1906cb93a386Sopenharmony_ci
1907cb93a386Sopenharmony_ci        0x00,0x81,0x60,0x0d,
1908cb93a386Sopenharmony_ci        0x00,0x91,0x60,0x0d,
1909cb93a386Sopenharmony_ci        0x00,0x81,0x60,0x4d,
1910cb93a386Sopenharmony_ci        0x00,0x91,0x60,0x4d,
1911cb93a386Sopenharmony_ci
1912cb93a386Sopenharmony_ci        0x00,0xa1,0x60,0x0d,
1913cb93a386Sopenharmony_ci        0x00,0xb1,0x60,0x0d,
1914cb93a386Sopenharmony_ci        0x00,0xa1,0x60,0x4d,
1915cb93a386Sopenharmony_ci        0x00,0xb1,0x60,0x4d,
1916cb93a386Sopenharmony_ci    });
1917cb93a386Sopenharmony_ci
1918cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1919cb93a386Sopenharmony_ci        a.xtns2h(A::v0, A::v0);
1920cb93a386Sopenharmony_ci        a.xtnh2b(A::v0, A::v0);
1921cb93a386Sopenharmony_ci        a.strs  (A::v0, A::x0);
1922cb93a386Sopenharmony_ci
1923cb93a386Sopenharmony_ci        a.ldrs   (A::v0, A::x0);
1924cb93a386Sopenharmony_ci        a.uxtlb2h(A::v0, A::v0);
1925cb93a386Sopenharmony_ci        a.uxtlh2s(A::v0, A::v0);
1926cb93a386Sopenharmony_ci
1927cb93a386Sopenharmony_ci        a.uminv4s(A::v3, A::v4);
1928cb93a386Sopenharmony_ci        a.movs   (A::x3, A::v4,0);  // mov.s w3,v4[0]
1929cb93a386Sopenharmony_ci        a.movs   (A::x3, A::v4,1);  // mov.s w3,v4[1]
1930cb93a386Sopenharmony_ci        a.inss   (A::v4, A::x3,3);  // ins.s v4[3],w3
1931cb93a386Sopenharmony_ci    },{
1932cb93a386Sopenharmony_ci        0x00,0x28,0x61,0x0e,
1933cb93a386Sopenharmony_ci        0x00,0x28,0x21,0x0e,
1934cb93a386Sopenharmony_ci        0x00,0x00,0x00,0xbd,
1935cb93a386Sopenharmony_ci
1936cb93a386Sopenharmony_ci        0x00,0x00,0x40,0xbd,
1937cb93a386Sopenharmony_ci        0x00,0xa4,0x08,0x2f,
1938cb93a386Sopenharmony_ci        0x00,0xa4,0x10,0x2f,
1939cb93a386Sopenharmony_ci
1940cb93a386Sopenharmony_ci        0x83,0xa8,0xb1,0x6e,
1941cb93a386Sopenharmony_ci        0x83,0x3c,0x04,0x0e,
1942cb93a386Sopenharmony_ci        0x83,0x3c,0x0c,0x0e,
1943cb93a386Sopenharmony_ci        0x64,0x1c,0x1c,0x4e,
1944cb93a386Sopenharmony_ci    });
1945cb93a386Sopenharmony_ci
1946cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1947cb93a386Sopenharmony_ci        a.ldrb(A::v0, A::x8);
1948cb93a386Sopenharmony_ci        a.strb(A::v0, A::x8);
1949cb93a386Sopenharmony_ci    },{
1950cb93a386Sopenharmony_ci        0x00,0x01,0x40,0x3d,
1951cb93a386Sopenharmony_ci        0x00,0x01,0x00,0x3d,
1952cb93a386Sopenharmony_ci    });
1953cb93a386Sopenharmony_ci
1954cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1955cb93a386Sopenharmony_ci        a.ldrd(A::x0, A::x1, 3);   // ldr  x0, [x1, #24]
1956cb93a386Sopenharmony_ci        a.ldrs(A::x0, A::x1, 3);   // ldr  w0, [x1, #12]
1957cb93a386Sopenharmony_ci        a.ldrh(A::x0, A::x1, 3);   // ldrh w0, [x1, #6]
1958cb93a386Sopenharmony_ci        a.ldrb(A::x0, A::x1, 3);   // ldrb w0, [x1, #3]
1959cb93a386Sopenharmony_ci
1960cb93a386Sopenharmony_ci        a.strs(A::x0, A::x1, 3);   // str  w0, [x1, #12]
1961cb93a386Sopenharmony_ci    },{
1962cb93a386Sopenharmony_ci        0x20,0x0c,0x40,0xf9,
1963cb93a386Sopenharmony_ci        0x20,0x0c,0x40,0xb9,
1964cb93a386Sopenharmony_ci        0x20,0x0c,0x40,0x79,
1965cb93a386Sopenharmony_ci        0x20,0x0c,0x40,0x39,
1966cb93a386Sopenharmony_ci
1967cb93a386Sopenharmony_ci        0x20,0x0c,0x00,0xb9,
1968cb93a386Sopenharmony_ci    });
1969cb93a386Sopenharmony_ci
1970cb93a386Sopenharmony_ci    test_asm(r, [&](A& a) {
1971cb93a386Sopenharmony_ci        a.tbl   (A::v0, A::v1, A::v2);
1972cb93a386Sopenharmony_ci        a.uzp14s(A::v0, A::v1, A::v2);
1973cb93a386Sopenharmony_ci        a.uzp24s(A::v0, A::v1, A::v2);
1974cb93a386Sopenharmony_ci        a.zip14s(A::v0, A::v1, A::v2);
1975cb93a386Sopenharmony_ci        a.zip24s(A::v0, A::v1, A::v2);
1976cb93a386Sopenharmony_ci    },{
1977cb93a386Sopenharmony_ci        0x20,0x00,0x02,0x4e,
1978cb93a386Sopenharmony_ci        0x20,0x18,0x82,0x4e,
1979cb93a386Sopenharmony_ci        0x20,0x58,0x82,0x4e,
1980cb93a386Sopenharmony_ci        0x20,0x38,0x82,0x4e,
1981cb93a386Sopenharmony_ci        0x20,0x78,0x82,0x4e,
1982cb93a386Sopenharmony_ci    });
1983cb93a386Sopenharmony_ci}
1984cb93a386Sopenharmony_ci
1985cb93a386Sopenharmony_ciDEF_TEST(SkVM_approx_math, r) {
1986cb93a386Sopenharmony_ci    auto eval = [](int N, float values[], auto fn) {
1987cb93a386Sopenharmony_ci        skvm::Builder b;
1988cb93a386Sopenharmony_ci        skvm::Ptr inout  = b.varying<float>();
1989cb93a386Sopenharmony_ci
1990cb93a386Sopenharmony_ci        b.storeF(inout, fn(&b, b.loadF(inout)));
1991cb93a386Sopenharmony_ci
1992cb93a386Sopenharmony_ci        b.done().eval(N, values);
1993cb93a386Sopenharmony_ci    };
1994cb93a386Sopenharmony_ci
1995cb93a386Sopenharmony_ci    auto compare = [r](int N, const float values[], const float expected[]) {
1996cb93a386Sopenharmony_ci        for (int i = 0; i < N; ++i) {
1997cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1998cb93a386Sopenharmony_ci        }
1999cb93a386Sopenharmony_ci    };
2000cb93a386Sopenharmony_ci
2001cb93a386Sopenharmony_ci    // log2
2002cb93a386Sopenharmony_ci    {
2003cb93a386Sopenharmony_ci        float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
2004cb93a386Sopenharmony_ci        constexpr int N = SK_ARRAY_COUNT(values);
2005cb93a386Sopenharmony_ci        eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2006cb93a386Sopenharmony_ci            return b->approx_log2(v);
2007cb93a386Sopenharmony_ci        });
2008cb93a386Sopenharmony_ci        const float expected[] = {-2, -1, 0, 1, 2, 3};
2009cb93a386Sopenharmony_ci        compare(N, values, expected);
2010cb93a386Sopenharmony_ci    }
2011cb93a386Sopenharmony_ci
2012cb93a386Sopenharmony_ci    // pow2
2013cb93a386Sopenharmony_ci    {
2014cb93a386Sopenharmony_ci        float values[] = {-2, -1, 0, 1, 2, 3};
2015cb93a386Sopenharmony_ci        constexpr int N = SK_ARRAY_COUNT(values);
2016cb93a386Sopenharmony_ci        eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2017cb93a386Sopenharmony_ci            return b->approx_pow2(v);
2018cb93a386Sopenharmony_ci        });
2019cb93a386Sopenharmony_ci        const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
2020cb93a386Sopenharmony_ci        compare(N, values, expected);
2021cb93a386Sopenharmony_ci    }
2022cb93a386Sopenharmony_ci
2023cb93a386Sopenharmony_ci    // powf -- x^0.5
2024cb93a386Sopenharmony_ci    {
2025cb93a386Sopenharmony_ci        float bases[] = {0, 1, 4, 9, 16};
2026cb93a386Sopenharmony_ci        constexpr int N = SK_ARRAY_COUNT(bases);
2027cb93a386Sopenharmony_ci        eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2028cb93a386Sopenharmony_ci            return b->approx_powf(base, b->splat(0.5f));
2029cb93a386Sopenharmony_ci        });
2030cb93a386Sopenharmony_ci        const float expected[] = {0, 1, 2, 3, 4};
2031cb93a386Sopenharmony_ci        compare(N, bases, expected);
2032cb93a386Sopenharmony_ci    }
2033cb93a386Sopenharmony_ci    // powf -- 3^x
2034cb93a386Sopenharmony_ci    {
2035cb93a386Sopenharmony_ci        float exps[] = {-2, -1, 0, 1, 2};
2036cb93a386Sopenharmony_ci        constexpr int N = SK_ARRAY_COUNT(exps);
2037cb93a386Sopenharmony_ci        eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2038cb93a386Sopenharmony_ci            return b->approx_powf(b->splat(3.0f), exp);
2039cb93a386Sopenharmony_ci        });
2040cb93a386Sopenharmony_ci        const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
2041cb93a386Sopenharmony_ci        compare(N, exps, expected);
2042cb93a386Sopenharmony_ci    }
2043cb93a386Sopenharmony_ci
2044cb93a386Sopenharmony_ci    auto test = [r](float arg, float expected, float tolerance, auto prog) {
2045cb93a386Sopenharmony_ci        skvm::Builder b;
2046cb93a386Sopenharmony_ci        skvm::Ptr inout  = b.varying<float>();
2047cb93a386Sopenharmony_ci        b.storeF(inout, prog(b.loadF(inout)));
2048cb93a386Sopenharmony_ci        float actual = arg;
2049cb93a386Sopenharmony_ci        b.done().eval(1, &actual);
2050cb93a386Sopenharmony_ci
2051cb93a386Sopenharmony_ci        float err = std::abs(actual - expected);
2052cb93a386Sopenharmony_ci
2053cb93a386Sopenharmony_ci        if (err > tolerance) {
2054cb93a386Sopenharmony_ci    //        SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
2055cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, true);
2056cb93a386Sopenharmony_ci        }
2057cb93a386Sopenharmony_ci        return err;
2058cb93a386Sopenharmony_ci    };
2059cb93a386Sopenharmony_ci
2060cb93a386Sopenharmony_ci    auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
2061cb93a386Sopenharmony_ci        skvm::Builder b;
2062cb93a386Sopenharmony_ci        skvm::Ptr in0  = b.varying<float>();
2063cb93a386Sopenharmony_ci        skvm::Ptr in1  = b.varying<float>();
2064cb93a386Sopenharmony_ci        skvm::Ptr out  = b.varying<float>();
2065cb93a386Sopenharmony_ci        b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2066cb93a386Sopenharmony_ci        float actual;
2067cb93a386Sopenharmony_ci        b.done().eval(1, &arg0, &arg1, &actual);
2068cb93a386Sopenharmony_ci
2069cb93a386Sopenharmony_ci        float err = std::abs(actual - expected);
2070cb93a386Sopenharmony_ci
2071cb93a386Sopenharmony_ci        if (err > tolerance) {
2072cb93a386Sopenharmony_ci    //        SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2073cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, true);
2074cb93a386Sopenharmony_ci        }
2075cb93a386Sopenharmony_ci        return err;
2076cb93a386Sopenharmony_ci    };
2077cb93a386Sopenharmony_ci
2078cb93a386Sopenharmony_ci    // sine, cosine, tangent
2079cb93a386Sopenharmony_ci    {
2080cb93a386Sopenharmony_ci        constexpr float P = SK_ScalarPI;
2081cb93a386Sopenharmony_ci        constexpr float tol = 0.00175f;
2082cb93a386Sopenharmony_ci        for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2083cb93a386Sopenharmony_ci            test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2084cb93a386Sopenharmony_ci                return approx_sin(x);
2085cb93a386Sopenharmony_ci            });
2086cb93a386Sopenharmony_ci            test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2087cb93a386Sopenharmony_ci                return approx_cos(x);
2088cb93a386Sopenharmony_ci            });
2089cb93a386Sopenharmony_ci        }
2090cb93a386Sopenharmony_ci
2091cb93a386Sopenharmony_ci        // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2092cb93a386Sopenharmony_ci        // so bring in the domain a little.
2093cb93a386Sopenharmony_ci        constexpr float eps = 0.16f;
2094cb93a386Sopenharmony_ci        float err = 0;
2095cb93a386Sopenharmony_ci        for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2096cb93a386Sopenharmony_ci            err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2097cb93a386Sopenharmony_ci                return approx_tan(x);
2098cb93a386Sopenharmony_ci            });
2099cb93a386Sopenharmony_ci            // try again with some multiples of P, to check our periodicity
2100cb93a386Sopenharmony_ci            test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2101cb93a386Sopenharmony_ci                return approx_tan(x + 3*P);
2102cb93a386Sopenharmony_ci            });
2103cb93a386Sopenharmony_ci            test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2104cb93a386Sopenharmony_ci                return approx_tan(x - 3*P);
2105cb93a386Sopenharmony_ci            });
2106cb93a386Sopenharmony_ci        }
2107cb93a386Sopenharmony_ci        if (0) { SkDebugf("tan error %g\n", err); }
2108cb93a386Sopenharmony_ci    }
2109cb93a386Sopenharmony_ci
2110cb93a386Sopenharmony_ci    // asin, acos, atan
2111cb93a386Sopenharmony_ci    {
2112cb93a386Sopenharmony_ci        constexpr float tol = 0.00175f;
2113cb93a386Sopenharmony_ci        float err = 0;
2114cb93a386Sopenharmony_ci        for (float x = -1; x <= 1; x += 1.0f/64) {
2115cb93a386Sopenharmony_ci            err += test(x, asin(x), tol, [](skvm::F32 x) {
2116cb93a386Sopenharmony_ci                return approx_asin(x);
2117cb93a386Sopenharmony_ci            });
2118cb93a386Sopenharmony_ci            test(x, acos(x), tol, [](skvm::F32 x) {
2119cb93a386Sopenharmony_ci                return approx_acos(x);
2120cb93a386Sopenharmony_ci            });
2121cb93a386Sopenharmony_ci        }
2122cb93a386Sopenharmony_ci        if (0) { SkDebugf("asin error %g\n", err); }
2123cb93a386Sopenharmony_ci
2124cb93a386Sopenharmony_ci        err = 0;
2125cb93a386Sopenharmony_ci        for (float x = -10; x <= 10; x += 1.0f/16) {
2126cb93a386Sopenharmony_ci            err += test(x, atan(x), tol, [](skvm::F32 x) {
2127cb93a386Sopenharmony_ci                return approx_atan(x);
2128cb93a386Sopenharmony_ci            });
2129cb93a386Sopenharmony_ci        }
2130cb93a386Sopenharmony_ci        if (0) { SkDebugf("atan error %g\n", err); }
2131cb93a386Sopenharmony_ci
2132cb93a386Sopenharmony_ci        for (float y = -3; y <= 3; y += 1) {
2133cb93a386Sopenharmony_ci            for (float x = -3; x <= 3; x += 1) {
2134cb93a386Sopenharmony_ci                err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
2135cb93a386Sopenharmony_ci                    return approx_atan2(y,x);
2136cb93a386Sopenharmony_ci                });
2137cb93a386Sopenharmony_ci            }
2138cb93a386Sopenharmony_ci        }
2139cb93a386Sopenharmony_ci        if (0) { SkDebugf("atan2 error %g\n", err); }
2140cb93a386Sopenharmony_ci    }
2141cb93a386Sopenharmony_ci}
2142cb93a386Sopenharmony_ci
2143cb93a386Sopenharmony_ciDEF_TEST(SkVM_min_max, r) {
2144cb93a386Sopenharmony_ci    // min() and max() have subtle behavior when one argument is NaN and
2145cb93a386Sopenharmony_ci    // the other isn't.  It's not sound to blindly swap their arguments.
2146cb93a386Sopenharmony_ci    //
2147cb93a386Sopenharmony_ci    // All backends must behave like std::min() and std::max(), which are
2148cb93a386Sopenharmony_ci    //
2149cb93a386Sopenharmony_ci    //    min(x,y) = y<x ? y : x
2150cb93a386Sopenharmony_ci    //    max(x,y) = x<y ? y : x
2151cb93a386Sopenharmony_ci
2152cb93a386Sopenharmony_ci    // ±NaN, ±0, ±1, ±inf
2153cb93a386Sopenharmony_ci    const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2154cb93a386Sopenharmony_ci                             0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2155cb93a386Sopenharmony_ci
2156cb93a386Sopenharmony_ci    float f[8];
2157cb93a386Sopenharmony_ci    memcpy(f, bits, sizeof(bits));
2158cb93a386Sopenharmony_ci
2159cb93a386Sopenharmony_ci    auto identical = [&](float x, float y) {
2160cb93a386Sopenharmony_ci        uint32_t X,Y;
2161cb93a386Sopenharmony_ci        memcpy(&X, &x, 4);
2162cb93a386Sopenharmony_ci        memcpy(&Y, &y, 4);
2163cb93a386Sopenharmony_ci        return X == Y;
2164cb93a386Sopenharmony_ci    };
2165cb93a386Sopenharmony_ci
2166cb93a386Sopenharmony_ci    // Test min/max with non-constant x, non-constant y.
2167cb93a386Sopenharmony_ci    // (Whether x and y are varying or uniform shouldn't make any difference.)
2168cb93a386Sopenharmony_ci    {
2169cb93a386Sopenharmony_ci        skvm::Builder b;
2170cb93a386Sopenharmony_ci        {
2171cb93a386Sopenharmony_ci            skvm::Ptr src = b.varying<float>(),
2172cb93a386Sopenharmony_ci                       mn = b.varying<float>(),
2173cb93a386Sopenharmony_ci                       mx = b.varying<float>();
2174cb93a386Sopenharmony_ci
2175cb93a386Sopenharmony_ci            skvm::F32 x = b.loadF(src),
2176cb93a386Sopenharmony_ci                      y = b.uniformF(b.uniform(), 0);
2177cb93a386Sopenharmony_ci
2178cb93a386Sopenharmony_ci            b.storeF(mn, b.min(x,y));
2179cb93a386Sopenharmony_ci            b.storeF(mx, b.max(x,y));
2180cb93a386Sopenharmony_ci        }
2181cb93a386Sopenharmony_ci
2182cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2183cb93a386Sopenharmony_ci            float mn[8], mx[8];
2184cb93a386Sopenharmony_ci            for (int i = 0; i < 8; i++) {
2185cb93a386Sopenharmony_ci                // min() and max() everything with f[i].
2186cb93a386Sopenharmony_ci                program.eval(8, f,mn,mx, &f[i]);
2187cb93a386Sopenharmony_ci
2188cb93a386Sopenharmony_ci                for (int j = 0; j < 8; j++) {
2189cb93a386Sopenharmony_ci                    REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2190cb93a386Sopenharmony_ci                    REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2191cb93a386Sopenharmony_ci                }
2192cb93a386Sopenharmony_ci            }
2193cb93a386Sopenharmony_ci        });
2194cb93a386Sopenharmony_ci    }
2195cb93a386Sopenharmony_ci
2196cb93a386Sopenharmony_ci    // Test each with constant on the right.
2197cb93a386Sopenharmony_ci    for (int i = 0; i < 8; i++) {
2198cb93a386Sopenharmony_ci        skvm::Builder b;
2199cb93a386Sopenharmony_ci        {
2200cb93a386Sopenharmony_ci            skvm::Ptr src = b.varying<float>(),
2201cb93a386Sopenharmony_ci                       mn = b.varying<float>(),
2202cb93a386Sopenharmony_ci                       mx = b.varying<float>();
2203cb93a386Sopenharmony_ci
2204cb93a386Sopenharmony_ci            skvm::F32 x = b.loadF(src),
2205cb93a386Sopenharmony_ci                      y = b.splat(f[i]);
2206cb93a386Sopenharmony_ci
2207cb93a386Sopenharmony_ci            b.storeF(mn, b.min(x,y));
2208cb93a386Sopenharmony_ci            b.storeF(mx, b.max(x,y));
2209cb93a386Sopenharmony_ci        }
2210cb93a386Sopenharmony_ci
2211cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2212cb93a386Sopenharmony_ci            float mn[8], mx[8];
2213cb93a386Sopenharmony_ci            program.eval(8, f,mn,mx);
2214cb93a386Sopenharmony_ci            for (int j = 0; j < 8; j++) {
2215cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2216cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2217cb93a386Sopenharmony_ci            }
2218cb93a386Sopenharmony_ci        });
2219cb93a386Sopenharmony_ci    }
2220cb93a386Sopenharmony_ci
2221cb93a386Sopenharmony_ci    // Test each with constant on the left.
2222cb93a386Sopenharmony_ci    for (int i = 0; i < 8; i++) {
2223cb93a386Sopenharmony_ci        skvm::Builder b;
2224cb93a386Sopenharmony_ci        {
2225cb93a386Sopenharmony_ci            skvm::Ptr src = b.varying<float>(),
2226cb93a386Sopenharmony_ci                       mn = b.varying<float>(),
2227cb93a386Sopenharmony_ci                       mx = b.varying<float>();
2228cb93a386Sopenharmony_ci
2229cb93a386Sopenharmony_ci            skvm::F32 x = b.splat(f[i]),
2230cb93a386Sopenharmony_ci                      y = b.loadF(src);
2231cb93a386Sopenharmony_ci
2232cb93a386Sopenharmony_ci            b.storeF(mn, b.min(x,y));
2233cb93a386Sopenharmony_ci            b.storeF(mx, b.max(x,y));
2234cb93a386Sopenharmony_ci        }
2235cb93a386Sopenharmony_ci
2236cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2237cb93a386Sopenharmony_ci            float mn[8], mx[8];
2238cb93a386Sopenharmony_ci            program.eval(8, f,mn,mx);
2239cb93a386Sopenharmony_ci            for (int j = 0; j < 8; j++) {
2240cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2241cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2242cb93a386Sopenharmony_ci            }
2243cb93a386Sopenharmony_ci        });
2244cb93a386Sopenharmony_ci    }
2245cb93a386Sopenharmony_ci}
2246cb93a386Sopenharmony_ci
2247cb93a386Sopenharmony_ciDEF_TEST(SkVM_halfs, r) {
2248cb93a386Sopenharmony_ci    const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2249cb93a386Sopenharmony_ci                           0xc400,0xb800,0xbc00,0xc000};
2250cb93a386Sopenharmony_ci    const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2251cb93a386Sopenharmony_ci                        -4.0f,-0.5f,-1.0f,-2.0f};
2252cb93a386Sopenharmony_ci    {
2253cb93a386Sopenharmony_ci        skvm::Builder b;
2254cb93a386Sopenharmony_ci        skvm::Ptr src = b.varying<uint16_t>(),
2255cb93a386Sopenharmony_ci                  dst = b.varying<float>();
2256cb93a386Sopenharmony_ci        b.storeF(dst, b.from_fp16(b.load16(src)));
2257cb93a386Sopenharmony_ci
2258cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2259cb93a386Sopenharmony_ci            float dst[8];
2260cb93a386Sopenharmony_ci            program.eval(8, hs, dst);
2261cb93a386Sopenharmony_ci            for (int i = 0; i < 8; i++) {
2262cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, dst[i] == fs[i]);
2263cb93a386Sopenharmony_ci            }
2264cb93a386Sopenharmony_ci        });
2265cb93a386Sopenharmony_ci    }
2266cb93a386Sopenharmony_ci    {
2267cb93a386Sopenharmony_ci        skvm::Builder b;
2268cb93a386Sopenharmony_ci        skvm::Ptr src = b.varying<float>(),
2269cb93a386Sopenharmony_ci                  dst = b.varying<uint16_t>();
2270cb93a386Sopenharmony_ci        b.store16(dst, b.to_fp16(b.loadF(src)));
2271cb93a386Sopenharmony_ci
2272cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2273cb93a386Sopenharmony_ci            uint16_t dst[8];
2274cb93a386Sopenharmony_ci            program.eval(8, fs, dst);
2275cb93a386Sopenharmony_ci            for (int i = 0; i < 8; i++) {
2276cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, dst[i] == hs[i]);
2277cb93a386Sopenharmony_ci            }
2278cb93a386Sopenharmony_ci        });
2279cb93a386Sopenharmony_ci    }
2280cb93a386Sopenharmony_ci}
2281cb93a386Sopenharmony_ci
2282cb93a386Sopenharmony_ciDEF_TEST(SkVM_64bit, r) {
2283cb93a386Sopenharmony_ci    uint32_t lo[65],
2284cb93a386Sopenharmony_ci             hi[65];
2285cb93a386Sopenharmony_ci    uint64_t wide[65];
2286cb93a386Sopenharmony_ci    for (int i = 0; i < 65; i++) {
2287cb93a386Sopenharmony_ci        lo[i] = 2*i+0;
2288cb93a386Sopenharmony_ci        hi[i] = 2*i+1;
2289cb93a386Sopenharmony_ci        wide[i] = ((uint64_t)lo[i] <<  0)
2290cb93a386Sopenharmony_ci                | ((uint64_t)hi[i] << 32);
2291cb93a386Sopenharmony_ci    }
2292cb93a386Sopenharmony_ci
2293cb93a386Sopenharmony_ci    {
2294cb93a386Sopenharmony_ci        skvm::Builder b;
2295cb93a386Sopenharmony_ci        {
2296cb93a386Sopenharmony_ci            skvm::Ptr widePtr = b.varying<uint64_t>(),
2297cb93a386Sopenharmony_ci                        loPtr = b.varying<int>(),
2298cb93a386Sopenharmony_ci                        hiPtr = b.varying<int>();
2299cb93a386Sopenharmony_ci            b.store32(loPtr, b.load64(widePtr, 0));
2300cb93a386Sopenharmony_ci            b.store32(hiPtr, b.load64(widePtr, 1));
2301cb93a386Sopenharmony_ci        }
2302cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2303cb93a386Sopenharmony_ci            uint32_t l[65], h[65];
2304cb93a386Sopenharmony_ci            program.eval(65, wide,l,h);
2305cb93a386Sopenharmony_ci            for (int i = 0; i < 65; i++) {
2306cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, l[i] == lo[i]);
2307cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, h[i] == hi[i]);
2308cb93a386Sopenharmony_ci            }
2309cb93a386Sopenharmony_ci        });
2310cb93a386Sopenharmony_ci    }
2311cb93a386Sopenharmony_ci
2312cb93a386Sopenharmony_ci    {
2313cb93a386Sopenharmony_ci        skvm::Builder b;
2314cb93a386Sopenharmony_ci        {
2315cb93a386Sopenharmony_ci            skvm::Ptr widePtr = b.varying<uint64_t>(),
2316cb93a386Sopenharmony_ci                        loPtr = b.varying<int>(),
2317cb93a386Sopenharmony_ci                        hiPtr = b.varying<int>();
2318cb93a386Sopenharmony_ci            b.store64(widePtr, b.load32(loPtr), b.load32(hiPtr));
2319cb93a386Sopenharmony_ci        }
2320cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2321cb93a386Sopenharmony_ci            uint64_t w[65];
2322cb93a386Sopenharmony_ci            program.eval(65, w,lo,hi);
2323cb93a386Sopenharmony_ci            for (int i = 0; i < 65; i++) {
2324cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, w[i] == wide[i]);
2325cb93a386Sopenharmony_ci            }
2326cb93a386Sopenharmony_ci        });
2327cb93a386Sopenharmony_ci    }
2328cb93a386Sopenharmony_ci}
2329cb93a386Sopenharmony_ci
2330cb93a386Sopenharmony_ciDEF_TEST(SkVM_128bit, r) {
2331cb93a386Sopenharmony_ci    float   floats[4*63];
2332cb93a386Sopenharmony_ci    uint8_t packed[4*63];
2333cb93a386Sopenharmony_ci
2334cb93a386Sopenharmony_ci    for (int i = 0; i < 4*63; i++) {
2335cb93a386Sopenharmony_ci        floats[i] = i * (1/255.0f);
2336cb93a386Sopenharmony_ci    }
2337cb93a386Sopenharmony_ci
2338cb93a386Sopenharmony_ci    skvm::PixelFormat rgba_ffff = skvm::SkColorType_to_PixelFormat(kRGBA_F32_SkColorType),
2339cb93a386Sopenharmony_ci                      rgba_8888 = skvm::SkColorType_to_PixelFormat(kRGBA_8888_SkColorType);
2340cb93a386Sopenharmony_ci
2341cb93a386Sopenharmony_ci    {  // Convert RGBA F32 to RGBA 8888, testing 128-bit loads.
2342cb93a386Sopenharmony_ci        skvm::Builder b;
2343cb93a386Sopenharmony_ci        {
2344cb93a386Sopenharmony_ci            skvm::Ptr dst = b.varying(4),
2345cb93a386Sopenharmony_ci                      src = b.varying(16);
2346cb93a386Sopenharmony_ci
2347cb93a386Sopenharmony_ci            skvm::Color c = b.load(rgba_ffff, src);
2348cb93a386Sopenharmony_ci            b.store(rgba_8888, dst, c);
2349cb93a386Sopenharmony_ci        }
2350cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2351cb93a386Sopenharmony_ci            memset(packed, 0, sizeof(packed));
2352cb93a386Sopenharmony_ci            program.eval(63, packed, floats);
2353cb93a386Sopenharmony_ci            for (int i = 0; i < 4*63; i++) {
2354cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, packed[i] == i);
2355cb93a386Sopenharmony_ci            }
2356cb93a386Sopenharmony_ci        });
2357cb93a386Sopenharmony_ci    }
2358cb93a386Sopenharmony_ci
2359cb93a386Sopenharmony_ci
2360cb93a386Sopenharmony_ci    {  // Convert RGBA 8888 to RGBA F32, testing 128-bit stores.
2361cb93a386Sopenharmony_ci        skvm::Builder b;
2362cb93a386Sopenharmony_ci        {
2363cb93a386Sopenharmony_ci            skvm::Ptr dst = b.varying(16),
2364cb93a386Sopenharmony_ci                      src = b.varying(4);
2365cb93a386Sopenharmony_ci
2366cb93a386Sopenharmony_ci            skvm::Color c = b.load(rgba_8888, src);
2367cb93a386Sopenharmony_ci            b.store(rgba_ffff, dst, c);
2368cb93a386Sopenharmony_ci        }
2369cb93a386Sopenharmony_ci        test_jit_and_interpreter(b, [&](const skvm::Program& program){
2370cb93a386Sopenharmony_ci            memset(floats, 0, sizeof(floats));
2371cb93a386Sopenharmony_ci            program.eval(63, floats, packed);
2372cb93a386Sopenharmony_ci            for (int i = 0; i < 4*63; i++) {
2373cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, floats[i] == i * (1/255.0f));
2374cb93a386Sopenharmony_ci            }
2375cb93a386Sopenharmony_ci        });
2376cb93a386Sopenharmony_ci    }
2377cb93a386Sopenharmony_ci
2378cb93a386Sopenharmony_ci}
2379cb93a386Sopenharmony_ci
2380cb93a386Sopenharmony_ciDEF_TEST(SkVM_is_NaN_is_finite, r) {
2381cb93a386Sopenharmony_ci    skvm::Builder b;
2382cb93a386Sopenharmony_ci    {
2383cb93a386Sopenharmony_ci        skvm::Ptr src = b.varying<float>(),
2384cb93a386Sopenharmony_ci                  nan = b.varying<int>(),
2385cb93a386Sopenharmony_ci                  fin = b.varying<int>();
2386cb93a386Sopenharmony_ci        b.store32(nan, is_NaN   (b.loadF(src)));
2387cb93a386Sopenharmony_ci        b.store32(fin, is_finite(b.loadF(src)));
2388cb93a386Sopenharmony_ci    }
2389cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program){
2390cb93a386Sopenharmony_ci        // ±NaN, ±0, ±1, ±inf
2391cb93a386Sopenharmony_ci        const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2392cb93a386Sopenharmony_ci                                 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2393cb93a386Sopenharmony_ci        uint32_t nan[8], fin[8];
2394cb93a386Sopenharmony_ci        program.eval(8, bits, nan,fin);
2395cb93a386Sopenharmony_ci
2396cb93a386Sopenharmony_ci        for (int i = 0; i < 8; i++) {
2397cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2398cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2399cb93a386Sopenharmony_ci                                           i == 4 || i == 5) ? 0xffffffff : 0));
2400cb93a386Sopenharmony_ci        }
2401cb93a386Sopenharmony_ci    });
2402cb93a386Sopenharmony_ci}
2403cb93a386Sopenharmony_ci
2404cb93a386Sopenharmony_ciDEF_TEST(SkVM_args, r) {
2405cb93a386Sopenharmony_ci    // Test we can handle at least six arguments.
2406cb93a386Sopenharmony_ci    skvm::Builder b;
2407cb93a386Sopenharmony_ci    {
2408cb93a386Sopenharmony_ci        skvm::Ptr dst = b.varying<float>(),
2409cb93a386Sopenharmony_ci                    A = b.varying<float>(),
2410cb93a386Sopenharmony_ci                    B = b.varying<float>(),
2411cb93a386Sopenharmony_ci                    C = b.varying<float>(),
2412cb93a386Sopenharmony_ci                    D = b.varying<float>(),
2413cb93a386Sopenharmony_ci                    E = b.varying<float>();
2414cb93a386Sopenharmony_ci        storeF(dst, b.loadF(A)
2415cb93a386Sopenharmony_ci                  + b.loadF(B)
2416cb93a386Sopenharmony_ci                  + b.loadF(C)
2417cb93a386Sopenharmony_ci                  + b.loadF(D)
2418cb93a386Sopenharmony_ci                  + b.loadF(E));
2419cb93a386Sopenharmony_ci    }
2420cb93a386Sopenharmony_ci
2421cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program){
2422cb93a386Sopenharmony_ci        float dst[17],A[17],B[17],C[17],D[17],E[17];
2423cb93a386Sopenharmony_ci        for (int i = 0; i < 17; i++) {
2424cb93a386Sopenharmony_ci            A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2425cb93a386Sopenharmony_ci        }
2426cb93a386Sopenharmony_ci        program.eval(17, dst,A,B,C,D,E);
2427cb93a386Sopenharmony_ci        for (int i = 0; i < 17; i++) {
2428cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2429cb93a386Sopenharmony_ci        }
2430cb93a386Sopenharmony_ci    });
2431cb93a386Sopenharmony_ci}
2432cb93a386Sopenharmony_ci
2433cb93a386Sopenharmony_ciDEF_TEST(SkVM_badpack, reporter) {
2434cb93a386Sopenharmony_ci    // Test case distilled from actual failing draw,
2435cb93a386Sopenharmony_ci    // originally with a bad arm64 implementation of pack().
2436cb93a386Sopenharmony_ci    skvm::Builder p;
2437cb93a386Sopenharmony_ci    {
2438cb93a386Sopenharmony_ci        skvm::UPtr uniforms = p.uniform();
2439cb93a386Sopenharmony_ci        skvm::Ptr dst = p.varying<uint16_t>();
2440cb93a386Sopenharmony_ci
2441cb93a386Sopenharmony_ci        skvm::I32 r = round(p.uniformF(uniforms, 8) * 15),
2442cb93a386Sopenharmony_ci                  a = p.splat(0xf);
2443cb93a386Sopenharmony_ci
2444cb93a386Sopenharmony_ci        skvm::I32 _4444 = p.splat(0);
2445cb93a386Sopenharmony_ci        _4444 = pack(_4444, r, 12);
2446cb93a386Sopenharmony_ci        _4444 = pack(_4444, a,  0);
2447cb93a386Sopenharmony_ci        store16(dst, _4444);
2448cb93a386Sopenharmony_ci    }
2449cb93a386Sopenharmony_ci
2450cb93a386Sopenharmony_ci    test_jit_and_interpreter(p, [&](const skvm::Program& program){
2451cb93a386Sopenharmony_ci        const float uniforms[] = { 0.0f, 0.0f,
2452cb93a386Sopenharmony_ci                                   1.0f, 0.0f, 0.0f, 1.0f };
2453cb93a386Sopenharmony_ci
2454cb93a386Sopenharmony_ci        uint16_t dst[17] = {0};
2455cb93a386Sopenharmony_ci        program.eval(17, uniforms,dst);
2456cb93a386Sopenharmony_ci        for (int i = 0; i < 17; i++) {
2457cb93a386Sopenharmony_ci            REPORTER_ASSERT(reporter, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f);
2458cb93a386Sopenharmony_ci        }
2459cb93a386Sopenharmony_ci    });
2460cb93a386Sopenharmony_ci}
2461cb93a386Sopenharmony_ci
2462cb93a386Sopenharmony_ciDEF_TEST(SkVM_features, r) {
2463cb93a386Sopenharmony_ci    auto build_program = [](skvm::Builder* b) {
2464cb93a386Sopenharmony_ci        skvm::F32 x = b->loadF(b->varying<float>());
2465cb93a386Sopenharmony_ci        b->storeF(b->varying<float>(), x*x+x);
2466cb93a386Sopenharmony_ci    };
2467cb93a386Sopenharmony_ci
2468cb93a386Sopenharmony_ci    {   // load-fma-store with FMA available.
2469cb93a386Sopenharmony_ci        skvm::Features features;
2470cb93a386Sopenharmony_ci        features.fma = true;
2471cb93a386Sopenharmony_ci        skvm::Builder b(features);
2472cb93a386Sopenharmony_ci        build_program(&b);
2473cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, b.optimize().size() == 3);
2474cb93a386Sopenharmony_ci    }
2475cb93a386Sopenharmony_ci
2476cb93a386Sopenharmony_ci    {   // load-mul-add-store without FMA.
2477cb93a386Sopenharmony_ci        skvm::Features features;
2478cb93a386Sopenharmony_ci        features.fma = false;
2479cb93a386Sopenharmony_ci        skvm::Builder b(features);
2480cb93a386Sopenharmony_ci        build_program(&b);
2481cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, b.optimize().size() == 4);
2482cb93a386Sopenharmony_ci    }
2483cb93a386Sopenharmony_ci
2484cb93a386Sopenharmony_ci    {   // Auto-detected, could be either.
2485cb93a386Sopenharmony_ci        skvm::Builder b;
2486cb93a386Sopenharmony_ci        build_program(&b);
2487cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, b.optimize().size() == 3
2488cb93a386Sopenharmony_ci                        || b.optimize().size() == 4);
2489cb93a386Sopenharmony_ci    }
2490cb93a386Sopenharmony_ci}
2491cb93a386Sopenharmony_ci
2492cb93a386Sopenharmony_ciDEF_TEST(SkVM_gather_can_hoist, r) {
2493cb93a386Sopenharmony_ci    // A gather instruction isn't necessarily varying... it's whatever its index is.
2494cb93a386Sopenharmony_ci    // First a typical gather scenario with varying index.
2495cb93a386Sopenharmony_ci    {
2496cb93a386Sopenharmony_ci        skvm::Builder b;
2497cb93a386Sopenharmony_ci        skvm::UPtr uniforms = b.uniform();
2498cb93a386Sopenharmony_ci        skvm::Ptr buf = b.varying<int>();
2499cb93a386Sopenharmony_ci        skvm::I32 ix = b.load32(buf);
2500cb93a386Sopenharmony_ci        b.store32(buf, b.gather32(uniforms,0, ix));
2501cb93a386Sopenharmony_ci
2502cb93a386Sopenharmony_ci        skvm::Program p = b.done();
2503cb93a386Sopenharmony_ci
2504cb93a386Sopenharmony_ci        // ix is varying, so the gather is too.
2505cb93a386Sopenharmony_ci        //
2506cb93a386Sopenharmony_ci        // loop:
2507cb93a386Sopenharmony_ci        //     v0 = load32 buf
2508cb93a386Sopenharmony_ci        //     v1 = gather32 uniforms+0 v0
2509cb93a386Sopenharmony_ci        //     store32 buf v1
2510cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, p.instructions().size() == 3);
2511cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, p.loop() == 0);
2512cb93a386Sopenharmony_ci    }
2513cb93a386Sopenharmony_ci
2514cb93a386Sopenharmony_ci    // Now the same but with a uniform index instead.
2515cb93a386Sopenharmony_ci    {
2516cb93a386Sopenharmony_ci        skvm::Builder b;
2517cb93a386Sopenharmony_ci        skvm::UPtr uniforms = b.uniform();
2518cb93a386Sopenharmony_ci        skvm::Ptr buf = b.varying<int>();
2519cb93a386Sopenharmony_ci        skvm::I32 ix = b.uniform32(uniforms,8);
2520cb93a386Sopenharmony_ci        b.store32(buf, b.gather32(uniforms,0, ix));
2521cb93a386Sopenharmony_ci
2522cb93a386Sopenharmony_ci        skvm::Program p = b.done();
2523cb93a386Sopenharmony_ci
2524cb93a386Sopenharmony_ci        // ix is uniform, so the gather is too.
2525cb93a386Sopenharmony_ci        //
2526cb93a386Sopenharmony_ci        // v0 = uniform32 uniforms+8
2527cb93a386Sopenharmony_ci        // v1 = gather32 uniforms+0 v0
2528cb93a386Sopenharmony_ci        // loop:
2529cb93a386Sopenharmony_ci        //     store32 buf v1
2530cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, p.instructions().size() == 3);
2531cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, p.loop() == 2);
2532cb93a386Sopenharmony_ci    }
2533cb93a386Sopenharmony_ci}
2534cb93a386Sopenharmony_ci
2535cb93a386Sopenharmony_ciDEF_TEST(SkVM_dont_dedup_loads, r) {
2536cb93a386Sopenharmony_ci    // We've been assuming that all Ops with the same arguments produce the same value
2537cb93a386Sopenharmony_ci    // and deduplicating them, which results in a simple common subexpression eliminator.
2538cb93a386Sopenharmony_ci    //
2539cb93a386Sopenharmony_ci    // But we can't soundly dedup two identical loads with a store between.
2540cb93a386Sopenharmony_ci    // If we dedup the loads in this test program it will always increment by 1, not K.
2541cb93a386Sopenharmony_ci    constexpr int K = 2;
2542cb93a386Sopenharmony_ci    skvm::Builder b;
2543cb93a386Sopenharmony_ci    {
2544cb93a386Sopenharmony_ci        skvm::Ptr buf = b.varying<int>();
2545cb93a386Sopenharmony_ci        for (int i = 0; i < K; i++) {
2546cb93a386Sopenharmony_ci            b.store32(buf, b.load32(buf) + 1);
2547cb93a386Sopenharmony_ci        }
2548cb93a386Sopenharmony_ci    }
2549cb93a386Sopenharmony_ci
2550cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program){
2551cb93a386Sopenharmony_ci        int buf[] = { 0,1,2,3,4 };
2552cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(buf), buf);
2553cb93a386Sopenharmony_ci        for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
2554cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, buf[i] == i+K);
2555cb93a386Sopenharmony_ci        }
2556cb93a386Sopenharmony_ci    });
2557cb93a386Sopenharmony_ci}
2558cb93a386Sopenharmony_ci
2559cb93a386Sopenharmony_ciDEF_TEST(SkVM_dont_dedup_stores, r) {
2560cb93a386Sopenharmony_ci    // Following a similar line of reasoning to SkVM_dont_dedup_loads,
2561cb93a386Sopenharmony_ci    // we cannot dedup stores either.  A different store between two identical stores
2562cb93a386Sopenharmony_ci    // will invalidate the first store, meaning we do need to reissue that store operation.
2563cb93a386Sopenharmony_ci    skvm::Builder b;
2564cb93a386Sopenharmony_ci    {
2565cb93a386Sopenharmony_ci        skvm::Ptr buf = b.varying<int>();
2566cb93a386Sopenharmony_ci        b.store32(buf, b.splat(4));
2567cb93a386Sopenharmony_ci        b.store32(buf, b.splat(5));
2568cb93a386Sopenharmony_ci        b.store32(buf, b.splat(4));   // If we dedup'd, we'd skip this store.
2569cb93a386Sopenharmony_ci    }
2570cb93a386Sopenharmony_ci
2571cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program){
2572cb93a386Sopenharmony_ci        int buf[42];
2573cb93a386Sopenharmony_ci        program.eval(SK_ARRAY_COUNT(buf), buf);
2574cb93a386Sopenharmony_ci        for (int x : buf) {
2575cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, x == 4);
2576cb93a386Sopenharmony_ci        }
2577cb93a386Sopenharmony_ci    });
2578cb93a386Sopenharmony_ci}
2579cb93a386Sopenharmony_ci
2580cb93a386Sopenharmony_ciDEF_TEST(SkVM_fast_mul, r) {
2581cb93a386Sopenharmony_ci    skvm::Builder b;
2582cb93a386Sopenharmony_ci    {
2583cb93a386Sopenharmony_ci        skvm::Ptr src = b.varying<float>(),
2584cb93a386Sopenharmony_ci                 fast = b.varying<float>(),
2585cb93a386Sopenharmony_ci                 slow = b.varying<float>();
2586cb93a386Sopenharmony_ci        skvm::F32 x = b.loadF(src);
2587cb93a386Sopenharmony_ci        b.storeF(fast, fast_mul(0.0f, x));
2588cb93a386Sopenharmony_ci        b.storeF(slow, 0.0f * x);
2589cb93a386Sopenharmony_ci    }
2590cb93a386Sopenharmony_ci    test_jit_and_interpreter(b, [&](const skvm::Program& program){
2591cb93a386Sopenharmony_ci        const uint32_t bits[] = {
2592cb93a386Sopenharmony_ci            0x0000'0000, 0x8000'0000, //±0
2593cb93a386Sopenharmony_ci            0x3f80'0000, 0xbf80'0000, //±1
2594cb93a386Sopenharmony_ci            0x7f80'0000, 0xff80'0000, //±inf
2595cb93a386Sopenharmony_ci            0x7f80'0001, 0xff80'0001, //±NaN
2596cb93a386Sopenharmony_ci        };
2597cb93a386Sopenharmony_ci        float fast[8],
2598cb93a386Sopenharmony_ci              slow[8];
2599cb93a386Sopenharmony_ci        program.eval(8,bits,fast,slow);
2600cb93a386Sopenharmony_ci
2601cb93a386Sopenharmony_ci        for (int i = 0; i < 8; i++) {
2602cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, fast[i] == 0.0f);
2603cb93a386Sopenharmony_ci
2604cb93a386Sopenharmony_ci            if (i < 4) {
2605cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, slow[i] == 0.0f);
2606cb93a386Sopenharmony_ci            } else {
2607cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, isnan(slow[i]));
2608cb93a386Sopenharmony_ci            }
2609cb93a386Sopenharmony_ci        }
2610cb93a386Sopenharmony_ci    });
2611cb93a386Sopenharmony_ci}
2612