1cb93a386Sopenharmony_ci// Copyright 2020 Google LLC.
2cb93a386Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
3cb93a386Sopenharmony_ci
4cb93a386Sopenharmony_ci#ifndef SkVM_opts_DEFINED
5cb93a386Sopenharmony_ci#define SkVM_opts_DEFINED
6cb93a386Sopenharmony_ci
7cb93a386Sopenharmony_ci#include "include/private/SkVx.h"
8cb93a386Sopenharmony_ci#include "src/core/SkVM.h"
9cb93a386Sopenharmony_ci
10cb93a386Sopenharmony_citemplate <int N>
11cb93a386Sopenharmony_cistatic inline skvx::Vec<N,int> gather32(const int* ptr, const skvx::Vec<N,int>& ix) {
12cb93a386Sopenharmony_ci#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
13cb93a386Sopenharmony_ci    if constexpr (N == 8) {
14cb93a386Sopenharmony_ci        return skvx::bit_pun<skvx::Vec<N,int>>(
15cb93a386Sopenharmony_ci                _mm256_i32gather_epi32(ptr, skvx::bit_pun<__m256i>(ix), 4));
16cb93a386Sopenharmony_ci    }
17cb93a386Sopenharmony_ci#endif
18cb93a386Sopenharmony_ci    // Try to recurse on specializations, falling back on standard scalar map()-based impl.
19cb93a386Sopenharmony_ci    if constexpr (N > 8) {
20cb93a386Sopenharmony_ci        return join(gather32(ptr, ix.lo),
21cb93a386Sopenharmony_ci                    gather32(ptr, ix.hi));
22cb93a386Sopenharmony_ci    }
23cb93a386Sopenharmony_ci    return map([&](int i) { return ptr[i]; }, ix);
24cb93a386Sopenharmony_ci}
25cb93a386Sopenharmony_ci
26cb93a386Sopenharmony_cinamespace SK_OPTS_NS {
27cb93a386Sopenharmony_ci
28cb93a386Sopenharmony_cinamespace SkVMInterpreterTypes {
29cb93a386Sopenharmony_ci#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
30cb93a386Sopenharmony_ci    constexpr inline int K = 32;  // 1024-bit: 4 ymm or 2 zmm at a time
31cb93a386Sopenharmony_ci#else
32cb93a386Sopenharmony_ci    constexpr inline int K = 8;   // 256-bit: 2 xmm, 2 v-registers, etc.
33cb93a386Sopenharmony_ci#endif
34cb93a386Sopenharmony_ci    using I32 = skvx::Vec<K, int>;
35cb93a386Sopenharmony_ci    using I16 = skvx::Vec<K, int16_t>;
36cb93a386Sopenharmony_ci    using F32 = skvx::Vec<K, float>;
37cb93a386Sopenharmony_ci    using U64 = skvx::Vec<K, uint64_t>;
38cb93a386Sopenharmony_ci    using U32 = skvx::Vec<K, uint32_t>;
39cb93a386Sopenharmony_ci    using U16 = skvx::Vec<K, uint16_t>;
40cb93a386Sopenharmony_ci    using  U8 = skvx::Vec<K, uint8_t>;
41cb93a386Sopenharmony_ci    union Slot {
42cb93a386Sopenharmony_ci        F32   f32;
43cb93a386Sopenharmony_ci        I32   i32;
44cb93a386Sopenharmony_ci        U32   u32;
45cb93a386Sopenharmony_ci        I16   i16;
46cb93a386Sopenharmony_ci        U16   u16;
47cb93a386Sopenharmony_ci    };
48cb93a386Sopenharmony_ci}  // namespace SkVMInterpreterTypes
49cb93a386Sopenharmony_ci
50cb93a386Sopenharmony_ci    inline void interpret_skvm(const skvm::InterpreterInstruction insts[], const int ninsts,
51cb93a386Sopenharmony_ci                               const int nregs, const int loop,
52cb93a386Sopenharmony_ci                               const int strides[], const int nargs,
53cb93a386Sopenharmony_ci                               int n, void* args[]) {
54cb93a386Sopenharmony_ci        using namespace skvm;
55cb93a386Sopenharmony_ci
56cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::K;
57cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::I32;
58cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::I16;
59cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::F32;
60cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::U64;
61cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::U32;
62cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::U16;
63cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::U8;
64cb93a386Sopenharmony_ci        using SkVMInterpreterTypes::Slot;
65cb93a386Sopenharmony_ci
66cb93a386Sopenharmony_ci        // We'll operate in SIMT style, knocking off K-size chunks from n while possible.
67cb93a386Sopenharmony_ci
68cb93a386Sopenharmony_ci        Slot                     few_regs[16];
69cb93a386Sopenharmony_ci        std::unique_ptr<char[]> many_regs;
70cb93a386Sopenharmony_ci
71cb93a386Sopenharmony_ci        Slot* r = few_regs;
72cb93a386Sopenharmony_ci
73cb93a386Sopenharmony_ci        if (nregs > (int)SK_ARRAY_COUNT(few_regs)) {
74cb93a386Sopenharmony_ci            // Annoyingly we can't trust that malloc() or new will work with Slot because
75cb93a386Sopenharmony_ci            // the skvx::Vec types may have alignment greater than what they provide.
76cb93a386Sopenharmony_ci            // We'll overallocate one extra register so we can align manually.
77cb93a386Sopenharmony_ci            many_regs.reset(new char[ sizeof(Slot) * (nregs + 1) ]);
78cb93a386Sopenharmony_ci
79cb93a386Sopenharmony_ci            uintptr_t addr = (uintptr_t)many_regs.get();
80cb93a386Sopenharmony_ci            addr += alignof(Slot) -
81cb93a386Sopenharmony_ci                     (addr & (alignof(Slot) - 1));
82cb93a386Sopenharmony_ci            SkASSERT((addr & (alignof(Slot) - 1)) == 0);
83cb93a386Sopenharmony_ci            r = (Slot*)addr;
84cb93a386Sopenharmony_ci        }
85cb93a386Sopenharmony_ci
86cb93a386Sopenharmony_ci
87cb93a386Sopenharmony_ci        // Step each argument pointer ahead by its stride a number of times.
88cb93a386Sopenharmony_ci        auto step_args = [&](int times) {
89cb93a386Sopenharmony_ci            for (int i = 0; i < nargs; i++) {
90cb93a386Sopenharmony_ci                args[i] = (void*)( (char*)args[i] + times * strides[i] );
91cb93a386Sopenharmony_ci            }
92cb93a386Sopenharmony_ci        };
93cb93a386Sopenharmony_ci
94cb93a386Sopenharmony_ci        int start = 0,
95cb93a386Sopenharmony_ci            stride;
96cb93a386Sopenharmony_ci        for ( ; n > 0; start = loop, n -= stride, step_args(stride)) {
97cb93a386Sopenharmony_ci            stride = n >= K ? K : 1;
98cb93a386Sopenharmony_ci
99cb93a386Sopenharmony_ci            for (int instIdx = start; instIdx < ninsts; instIdx++) {
100cb93a386Sopenharmony_ci                InterpreterInstruction inst = insts[instIdx];
101cb93a386Sopenharmony_ci
102cb93a386Sopenharmony_ci                // d = op(x,y,z,w, immA,immB)
103cb93a386Sopenharmony_ci                Reg   d = inst.d,
104cb93a386Sopenharmony_ci                      x = inst.x,
105cb93a386Sopenharmony_ci                      y = inst.y,
106cb93a386Sopenharmony_ci                      z = inst.z,
107cb93a386Sopenharmony_ci                      w = inst.w;
108cb93a386Sopenharmony_ci                int immA = inst.immA,
109cb93a386Sopenharmony_ci                    immB = inst.immB,
110cb93a386Sopenharmony_ci                    immC = inst.immC;
111cb93a386Sopenharmony_ci
112cb93a386Sopenharmony_ci                // Ops that interact with memory need to know whether we're stride=1 or K,
113cb93a386Sopenharmony_ci                // but all non-memory ops can run the same code no matter the stride.
114cb93a386Sopenharmony_ci                switch (2*(int)inst.op + (stride == K ? 1 : 0)) {
115cb93a386Sopenharmony_ci                    default: SkUNREACHABLE;
116cb93a386Sopenharmony_ci
117cb93a386Sopenharmony_ci                #define STRIDE_1(op) case 2*(int)op
118cb93a386Sopenharmony_ci                #define STRIDE_K(op) case 2*(int)op + 1
119cb93a386Sopenharmony_ci                    STRIDE_1(Op::store8 ): memcpy(args[immA], &r[x].i32, 1); break;
120cb93a386Sopenharmony_ci                    STRIDE_1(Op::store16): memcpy(args[immA], &r[x].i32, 2); break;
121cb93a386Sopenharmony_ci                    STRIDE_1(Op::store32): memcpy(args[immA], &r[x].i32, 4); break;
122cb93a386Sopenharmony_ci                    STRIDE_1(Op::store64): memcpy((char*)args[immA]+0, &r[x].i32, 4);
123cb93a386Sopenharmony_ci                                           memcpy((char*)args[immA]+4, &r[y].i32, 4); break;
124cb93a386Sopenharmony_ci
125cb93a386Sopenharmony_ci                    STRIDE_K(Op::store8 ): skvx::cast<uint8_t> (r[x].i32).store(args[immA]); break;
126cb93a386Sopenharmony_ci                    STRIDE_K(Op::store16): skvx::cast<uint16_t>(r[x].i32).store(args[immA]); break;
127cb93a386Sopenharmony_ci                    STRIDE_K(Op::store32):                     (r[x].i32).store(args[immA]); break;
128cb93a386Sopenharmony_ci                    STRIDE_K(Op::store64): (skvx::cast<uint64_t>(r[x].u32) << 0 |
129cb93a386Sopenharmony_ci                                            skvx::cast<uint64_t>(r[y].u32) << 32).store(args[immA]);
130cb93a386Sopenharmony_ci                                           break;
131cb93a386Sopenharmony_ci
132cb93a386Sopenharmony_ci                    STRIDE_1(Op::load8 ): r[d].i32 = 0; memcpy(&r[d].i32, args[immA], 1); break;
133cb93a386Sopenharmony_ci                    STRIDE_1(Op::load16): r[d].i32 = 0; memcpy(&r[d].i32, args[immA], 2); break;
134cb93a386Sopenharmony_ci                    STRIDE_1(Op::load32): r[d].i32 = 0; memcpy(&r[d].i32, args[immA], 4); break;
135cb93a386Sopenharmony_ci                    STRIDE_1(Op::load64):
136cb93a386Sopenharmony_ci                        r[d].i32 = 0; memcpy(&r[d].i32, (char*)args[immA] + 4*immB, 4); break;
137cb93a386Sopenharmony_ci
138cb93a386Sopenharmony_ci                    STRIDE_K(Op::load8 ): r[d].i32= skvx::cast<int>(U8 ::Load(args[immA])); break;
139cb93a386Sopenharmony_ci                    STRIDE_K(Op::load16): r[d].i32= skvx::cast<int>(U16::Load(args[immA])); break;
140cb93a386Sopenharmony_ci                    STRIDE_K(Op::load32): r[d].i32=                 I32::Load(args[immA]) ; break;
141cb93a386Sopenharmony_ci                    STRIDE_K(Op::load64):
142cb93a386Sopenharmony_ci                        // Low 32 bits if immB=0, or high 32 bits if immB=1.
143cb93a386Sopenharmony_ci                        r[d].i32 = skvx::cast<int>(U64::Load(args[immA]) >> (32*immB)); break;
144cb93a386Sopenharmony_ci
145cb93a386Sopenharmony_ci                    // The pointer we base our gather on is loaded indirectly from a uniform:
146cb93a386Sopenharmony_ci                    //     - args[immA] is the uniform holding our gather base pointer somewhere;
147cb93a386Sopenharmony_ci                    //     - (const uint8_t*)args[immA] + immB points to the gather base pointer;
148cb93a386Sopenharmony_ci                    //     - memcpy() loads the gather base and into a pointer of the right type.
149cb93a386Sopenharmony_ci                    // After all that we have an ordinary (uniform) pointer `ptr` to load from,
150cb93a386Sopenharmony_ci                    // and we then gather from it using the varying indices in r[x].
151cb93a386Sopenharmony_ci                    STRIDE_1(Op::gather8): {
152cb93a386Sopenharmony_ci                        const uint8_t* ptr;
153cb93a386Sopenharmony_ci                        memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr));
154cb93a386Sopenharmony_ci                        r[d].i32 = ptr[ r[x].i32[0] ];
155cb93a386Sopenharmony_ci                    } break;
156cb93a386Sopenharmony_ci                    STRIDE_1(Op::gather16): {
157cb93a386Sopenharmony_ci                        const uint16_t* ptr;
158cb93a386Sopenharmony_ci                        memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr));
159cb93a386Sopenharmony_ci                        r[d].i32 = ptr[ r[x].i32[0] ];
160cb93a386Sopenharmony_ci                    } break;
161cb93a386Sopenharmony_ci                    STRIDE_1(Op::gather32): {
162cb93a386Sopenharmony_ci                        const int* ptr;
163cb93a386Sopenharmony_ci                        memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr));
164cb93a386Sopenharmony_ci                        r[d].i32 = ptr[ r[x].i32[0] ];
165cb93a386Sopenharmony_ci                    } break;
166cb93a386Sopenharmony_ci
167cb93a386Sopenharmony_ci                    STRIDE_K(Op::gather8): {
168cb93a386Sopenharmony_ci                        const uint8_t* ptr;
169cb93a386Sopenharmony_ci                        memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr));
170cb93a386Sopenharmony_ci                        r[d].i32 = map([&](int ix) { return (int)ptr[ix]; }, r[x].i32);
171cb93a386Sopenharmony_ci                    } break;
172cb93a386Sopenharmony_ci                    STRIDE_K(Op::gather16): {
173cb93a386Sopenharmony_ci                        const uint16_t* ptr;
174cb93a386Sopenharmony_ci                        memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr));
175cb93a386Sopenharmony_ci                        r[d].i32 = map([&](int ix) { return (int)ptr[ix]; }, r[x].i32);
176cb93a386Sopenharmony_ci                    } break;
177cb93a386Sopenharmony_ci                    STRIDE_K(Op::gather32): {
178cb93a386Sopenharmony_ci                        const int* ptr;
179cb93a386Sopenharmony_ci                        memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr));
180cb93a386Sopenharmony_ci                        r[d].i32 = gather32(ptr, r[x].i32);
181cb93a386Sopenharmony_ci                    } break;
182cb93a386Sopenharmony_ci
183cb93a386Sopenharmony_ci                #undef STRIDE_1
184cb93a386Sopenharmony_ci                #undef STRIDE_K
185cb93a386Sopenharmony_ci
186cb93a386Sopenharmony_ci                    // Ops that don't interact with memory should never care about the stride.
187cb93a386Sopenharmony_ci                #define CASE(op) case 2*(int)op: /*fallthrough*/ case 2*(int)op+1
188cb93a386Sopenharmony_ci
189cb93a386Sopenharmony_ci                    // These 128-bit ops are implemented serially for simplicity.
190cb93a386Sopenharmony_ci                    CASE(Op::store128): {
191cb93a386Sopenharmony_ci                        U64 lo = (skvx::cast<uint64_t>(r[x].u32) << 0 |
192cb93a386Sopenharmony_ci                                  skvx::cast<uint64_t>(r[y].u32) << 32),
193cb93a386Sopenharmony_ci                            hi = (skvx::cast<uint64_t>(r[z].u32) << 0 |
194cb93a386Sopenharmony_ci                                  skvx::cast<uint64_t>(r[w].u32) << 32);
195cb93a386Sopenharmony_ci                        for (int i = 0; i < stride; i++) {
196cb93a386Sopenharmony_ci                            memcpy((char*)args[immA] + 16*i + 0, &lo[i], 8);
197cb93a386Sopenharmony_ci                            memcpy((char*)args[immA] + 16*i + 8, &hi[i], 8);
198cb93a386Sopenharmony_ci                        }
199cb93a386Sopenharmony_ci                    } break;
200cb93a386Sopenharmony_ci
201cb93a386Sopenharmony_ci                    CASE(Op::load128):
202cb93a386Sopenharmony_ci                        r[d].i32 = 0;
203cb93a386Sopenharmony_ci                        for (int i = 0; i < stride; i++) {
204cb93a386Sopenharmony_ci                            memcpy(&r[d].i32[i], (const char*)args[immA] + 16*i+ 4*immB, 4);
205cb93a386Sopenharmony_ci                        } break;
206cb93a386Sopenharmony_ci
207cb93a386Sopenharmony_ci                    CASE(Op::assert_true):
208cb93a386Sopenharmony_ci                    #ifdef SK_DEBUG
209cb93a386Sopenharmony_ci                        if (!all(r[x].i32)) {
210cb93a386Sopenharmony_ci                            SkDebugf("inst %d, register %d\n", instIdx, y);
211cb93a386Sopenharmony_ci                            for (int i = 0; i < K; i++) {
212cb93a386Sopenharmony_ci                                SkDebugf("\t%2d: %08x (%g)\n",
213cb93a386Sopenharmony_ci                                         instIdx, r[y].i32[instIdx], r[y].f32[instIdx]);
214cb93a386Sopenharmony_ci                            }
215cb93a386Sopenharmony_ci                            SkASSERT(false);
216cb93a386Sopenharmony_ci                        }
217cb93a386Sopenharmony_ci                    #endif
218cb93a386Sopenharmony_ci                    break;
219cb93a386Sopenharmony_ci
220cb93a386Sopenharmony_ci                    CASE(Op::trace_line):
221cb93a386Sopenharmony_ci                    #ifdef SK_DEBUG
222cb93a386Sopenharmony_ci                    // TODO(skia:12614): this opcode will check the mask; if it's set, we write the
223cb93a386Sopenharmony_ci                    // line number from immA into the trace buffer.
224cb93a386Sopenharmony_ci                    #endif
225cb93a386Sopenharmony_ci                    break;
226cb93a386Sopenharmony_ci
227cb93a386Sopenharmony_ci                    CASE(Op::trace_var):
228cb93a386Sopenharmony_ci                    #ifdef SK_DEBUG
229cb93a386Sopenharmony_ci                    // TODO(skia:12614): this opcode will check the mask; if it's set, we write the
230cb93a386Sopenharmony_ci                    // variable-assignment slot and value to the trace buffer.
231cb93a386Sopenharmony_ci                    #endif
232cb93a386Sopenharmony_ci                    break;
233cb93a386Sopenharmony_ci
234cb93a386Sopenharmony_ci                    CASE(Op::trace_call):
235cb93a386Sopenharmony_ci                    #ifdef SK_DEBUG
236cb93a386Sopenharmony_ci                    // TODO(skia:12614): this opcode will be used to keep track of function entrance
237cb93a386Sopenharmony_ci                    // and exits, enabling step-over of function calls.
238cb93a386Sopenharmony_ci                    #endif
239cb93a386Sopenharmony_ci                    break;
240cb93a386Sopenharmony_ci
241cb93a386Sopenharmony_ci                    CASE(Op::index): {
242cb93a386Sopenharmony_ci                        const int iota[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
243cb93a386Sopenharmony_ci                                            16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
244cb93a386Sopenharmony_ci                                            32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
245cb93a386Sopenharmony_ci                                            48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 };
246cb93a386Sopenharmony_ci                        static_assert(K <= SK_ARRAY_COUNT(iota), "");
247cb93a386Sopenharmony_ci
248cb93a386Sopenharmony_ci                        r[d].i32 = n - I32::Load(iota);
249cb93a386Sopenharmony_ci                    } break;
250cb93a386Sopenharmony_ci
251cb93a386Sopenharmony_ci                    CASE(Op::uniform32):
252cb93a386Sopenharmony_ci                        r[d].i32 = *(const int*)( (const char*)args[immA] + immB );
253cb93a386Sopenharmony_ci                        break;
254cb93a386Sopenharmony_ci
255cb93a386Sopenharmony_ci                    CASE(Op::array32):
256cb93a386Sopenharmony_ci                        const int* ptr;
257cb93a386Sopenharmony_ci                        memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr));
258cb93a386Sopenharmony_ci                        r[d].i32 = ptr[immC/sizeof(int)];
259cb93a386Sopenharmony_ci                        break;
260cb93a386Sopenharmony_ci
261cb93a386Sopenharmony_ci                    CASE(Op::splat): r[d].i32 = immA; break;
262cb93a386Sopenharmony_ci
263cb93a386Sopenharmony_ci                    CASE(Op::add_f32): r[d].f32 = r[x].f32 + r[y].f32; break;
264cb93a386Sopenharmony_ci                    CASE(Op::sub_f32): r[d].f32 = r[x].f32 - r[y].f32; break;
265cb93a386Sopenharmony_ci                    CASE(Op::mul_f32): r[d].f32 = r[x].f32 * r[y].f32; break;
266cb93a386Sopenharmony_ci                    CASE(Op::div_f32): r[d].f32 = r[x].f32 / r[y].f32; break;
267cb93a386Sopenharmony_ci                    CASE(Op::min_f32): r[d].f32 = min(r[x].f32, r[y].f32); break;
268cb93a386Sopenharmony_ci                    CASE(Op::max_f32): r[d].f32 = max(r[x].f32, r[y].f32); break;
269cb93a386Sopenharmony_ci
270cb93a386Sopenharmony_ci                    CASE(Op::fma_f32):  r[d].f32 = fma( r[x].f32, r[y].f32,  r[z].f32); break;
271cb93a386Sopenharmony_ci                    CASE(Op::fms_f32):  r[d].f32 = fma( r[x].f32, r[y].f32, -r[z].f32); break;
272cb93a386Sopenharmony_ci                    CASE(Op::fnma_f32): r[d].f32 = fma(-r[x].f32, r[y].f32,  r[z].f32); break;
273cb93a386Sopenharmony_ci
274cb93a386Sopenharmony_ci                    CASE(Op::sqrt_f32): r[d].f32 = sqrt(r[x].f32); break;
275cb93a386Sopenharmony_ci
276cb93a386Sopenharmony_ci                    CASE(Op::add_i32): r[d].i32 = r[x].i32 + r[y].i32; break;
277cb93a386Sopenharmony_ci                    CASE(Op::sub_i32): r[d].i32 = r[x].i32 - r[y].i32; break;
278cb93a386Sopenharmony_ci                    CASE(Op::mul_i32): r[d].i32 = r[x].i32 * r[y].i32; break;
279cb93a386Sopenharmony_ci
280cb93a386Sopenharmony_ci                    CASE(Op::shl_i32): r[d].i32 = r[x].i32 << immA; break;
281cb93a386Sopenharmony_ci                    CASE(Op::sra_i32): r[d].i32 = r[x].i32 >> immA; break;
282cb93a386Sopenharmony_ci                    CASE(Op::shr_i32): r[d].u32 = r[x].u32 >> immA; break;
283cb93a386Sopenharmony_ci
284cb93a386Sopenharmony_ci                    CASE(Op:: eq_f32): r[d].i32 = r[x].f32 == r[y].f32; break;
285cb93a386Sopenharmony_ci                    CASE(Op::neq_f32): r[d].i32 = r[x].f32 != r[y].f32; break;
286cb93a386Sopenharmony_ci                    CASE(Op:: gt_f32): r[d].i32 = r[x].f32 >  r[y].f32; break;
287cb93a386Sopenharmony_ci                    CASE(Op::gte_f32): r[d].i32 = r[x].f32 >= r[y].f32; break;
288cb93a386Sopenharmony_ci
289cb93a386Sopenharmony_ci                    CASE(Op:: eq_i32): r[d].i32 = r[x].i32 == r[y].i32; break;
290cb93a386Sopenharmony_ci                    CASE(Op:: gt_i32): r[d].i32 = r[x].i32 >  r[y].i32; break;
291cb93a386Sopenharmony_ci
292cb93a386Sopenharmony_ci                    CASE(Op::bit_and  ): r[d].i32 = r[x].i32 &  r[y].i32; break;
293cb93a386Sopenharmony_ci                    CASE(Op::bit_or   ): r[d].i32 = r[x].i32 |  r[y].i32; break;
294cb93a386Sopenharmony_ci                    CASE(Op::bit_xor  ): r[d].i32 = r[x].i32 ^  r[y].i32; break;
295cb93a386Sopenharmony_ci                    CASE(Op::bit_clear): r[d].i32 = r[x].i32 & ~r[y].i32; break;
296cb93a386Sopenharmony_ci
297cb93a386Sopenharmony_ci                    CASE(Op::select): r[d].i32 = skvx::if_then_else(r[x].i32, r[y].i32, r[z].i32);
298cb93a386Sopenharmony_ci                                      break;
299cb93a386Sopenharmony_ci
300cb93a386Sopenharmony_ci                    CASE(Op::ceil):   r[d].f32 =                    skvx::ceil(r[x].f32) ; break;
301cb93a386Sopenharmony_ci                    CASE(Op::floor):  r[d].f32 =                   skvx::floor(r[x].f32) ; break;
302cb93a386Sopenharmony_ci                    CASE(Op::to_f32): r[d].f32 = skvx::cast<float>(            r[x].i32 ); break;
303cb93a386Sopenharmony_ci                    CASE(Op::trunc):  r[d].i32 = skvx::cast<int>  (            r[x].f32 ); break;
304cb93a386Sopenharmony_ci                    CASE(Op::round):  r[d].i32 = skvx::cast<int>  (skvx::lrint(r[x].f32)); break;
305cb93a386Sopenharmony_ci
306cb93a386Sopenharmony_ci                    CASE(Op::to_fp16):
307cb93a386Sopenharmony_ci                        r[d].i32 = skvx::cast<int>(skvx::to_half(r[x].f32));
308cb93a386Sopenharmony_ci                        break;
309cb93a386Sopenharmony_ci                    CASE(Op::from_fp16):
310cb93a386Sopenharmony_ci                        r[d].f32 = skvx::from_half(skvx::cast<uint16_t>(r[x].i32));
311cb93a386Sopenharmony_ci                        break;
312cb93a386Sopenharmony_ci
313cb93a386Sopenharmony_ci                #undef CASE
314cb93a386Sopenharmony_ci                }
315cb93a386Sopenharmony_ci            }
316cb93a386Sopenharmony_ci        }
317cb93a386Sopenharmony_ci    }
318cb93a386Sopenharmony_ci
319cb93a386Sopenharmony_ci}  // namespace SK_OPTS_NS
320cb93a386Sopenharmony_ci
321cb93a386Sopenharmony_ci#endif//SkVM_opts_DEFINED
322