1cb93a386Sopenharmony_ci// Copyright 2020 Google LLC. 2cb93a386Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. 3cb93a386Sopenharmony_ci 4cb93a386Sopenharmony_ci#ifndef SkVM_opts_DEFINED 5cb93a386Sopenharmony_ci#define SkVM_opts_DEFINED 6cb93a386Sopenharmony_ci 7cb93a386Sopenharmony_ci#include "include/private/SkVx.h" 8cb93a386Sopenharmony_ci#include "src/core/SkVM.h" 9cb93a386Sopenharmony_ci 10cb93a386Sopenharmony_citemplate <int N> 11cb93a386Sopenharmony_cistatic inline skvx::Vec<N,int> gather32(const int* ptr, const skvx::Vec<N,int>& ix) { 12cb93a386Sopenharmony_ci#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 13cb93a386Sopenharmony_ci if constexpr (N == 8) { 14cb93a386Sopenharmony_ci return skvx::bit_pun<skvx::Vec<N,int>>( 15cb93a386Sopenharmony_ci _mm256_i32gather_epi32(ptr, skvx::bit_pun<__m256i>(ix), 4)); 16cb93a386Sopenharmony_ci } 17cb93a386Sopenharmony_ci#endif 18cb93a386Sopenharmony_ci // Try to recurse on specializations, falling back on standard scalar map()-based impl. 19cb93a386Sopenharmony_ci if constexpr (N > 8) { 20cb93a386Sopenharmony_ci return join(gather32(ptr, ix.lo), 21cb93a386Sopenharmony_ci gather32(ptr, ix.hi)); 22cb93a386Sopenharmony_ci } 23cb93a386Sopenharmony_ci return map([&](int i) { return ptr[i]; }, ix); 24cb93a386Sopenharmony_ci} 25cb93a386Sopenharmony_ci 26cb93a386Sopenharmony_cinamespace SK_OPTS_NS { 27cb93a386Sopenharmony_ci 28cb93a386Sopenharmony_cinamespace SkVMInterpreterTypes { 29cb93a386Sopenharmony_ci#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 30cb93a386Sopenharmony_ci constexpr inline int K = 32; // 1024-bit: 4 ymm or 2 zmm at a time 31cb93a386Sopenharmony_ci#else 32cb93a386Sopenharmony_ci constexpr inline int K = 8; // 256-bit: 2 xmm, 2 v-registers, etc. 33cb93a386Sopenharmony_ci#endif 34cb93a386Sopenharmony_ci using I32 = skvx::Vec<K, int>; 35cb93a386Sopenharmony_ci using I16 = skvx::Vec<K, int16_t>; 36cb93a386Sopenharmony_ci using F32 = skvx::Vec<K, float>; 37cb93a386Sopenharmony_ci using U64 = skvx::Vec<K, uint64_t>; 38cb93a386Sopenharmony_ci using U32 = skvx::Vec<K, uint32_t>; 39cb93a386Sopenharmony_ci using U16 = skvx::Vec<K, uint16_t>; 40cb93a386Sopenharmony_ci using U8 = skvx::Vec<K, uint8_t>; 41cb93a386Sopenharmony_ci union Slot { 42cb93a386Sopenharmony_ci F32 f32; 43cb93a386Sopenharmony_ci I32 i32; 44cb93a386Sopenharmony_ci U32 u32; 45cb93a386Sopenharmony_ci I16 i16; 46cb93a386Sopenharmony_ci U16 u16; 47cb93a386Sopenharmony_ci }; 48cb93a386Sopenharmony_ci} // namespace SkVMInterpreterTypes 49cb93a386Sopenharmony_ci 50cb93a386Sopenharmony_ci inline void interpret_skvm(const skvm::InterpreterInstruction insts[], const int ninsts, 51cb93a386Sopenharmony_ci const int nregs, const int loop, 52cb93a386Sopenharmony_ci const int strides[], const int nargs, 53cb93a386Sopenharmony_ci int n, void* args[]) { 54cb93a386Sopenharmony_ci using namespace skvm; 55cb93a386Sopenharmony_ci 56cb93a386Sopenharmony_ci using SkVMInterpreterTypes::K; 57cb93a386Sopenharmony_ci using SkVMInterpreterTypes::I32; 58cb93a386Sopenharmony_ci using SkVMInterpreterTypes::I16; 59cb93a386Sopenharmony_ci using SkVMInterpreterTypes::F32; 60cb93a386Sopenharmony_ci using SkVMInterpreterTypes::U64; 61cb93a386Sopenharmony_ci using SkVMInterpreterTypes::U32; 62cb93a386Sopenharmony_ci using SkVMInterpreterTypes::U16; 63cb93a386Sopenharmony_ci using SkVMInterpreterTypes::U8; 64cb93a386Sopenharmony_ci using SkVMInterpreterTypes::Slot; 65cb93a386Sopenharmony_ci 66cb93a386Sopenharmony_ci // We'll operate in SIMT style, knocking off K-size chunks from n while possible. 67cb93a386Sopenharmony_ci 68cb93a386Sopenharmony_ci Slot few_regs[16]; 69cb93a386Sopenharmony_ci std::unique_ptr<char[]> many_regs; 70cb93a386Sopenharmony_ci 71cb93a386Sopenharmony_ci Slot* r = few_regs; 72cb93a386Sopenharmony_ci 73cb93a386Sopenharmony_ci if (nregs > (int)SK_ARRAY_COUNT(few_regs)) { 74cb93a386Sopenharmony_ci // Annoyingly we can't trust that malloc() or new will work with Slot because 75cb93a386Sopenharmony_ci // the skvx::Vec types may have alignment greater than what they provide. 76cb93a386Sopenharmony_ci // We'll overallocate one extra register so we can align manually. 77cb93a386Sopenharmony_ci many_regs.reset(new char[ sizeof(Slot) * (nregs + 1) ]); 78cb93a386Sopenharmony_ci 79cb93a386Sopenharmony_ci uintptr_t addr = (uintptr_t)many_regs.get(); 80cb93a386Sopenharmony_ci addr += alignof(Slot) - 81cb93a386Sopenharmony_ci (addr & (alignof(Slot) - 1)); 82cb93a386Sopenharmony_ci SkASSERT((addr & (alignof(Slot) - 1)) == 0); 83cb93a386Sopenharmony_ci r = (Slot*)addr; 84cb93a386Sopenharmony_ci } 85cb93a386Sopenharmony_ci 86cb93a386Sopenharmony_ci 87cb93a386Sopenharmony_ci // Step each argument pointer ahead by its stride a number of times. 88cb93a386Sopenharmony_ci auto step_args = [&](int times) { 89cb93a386Sopenharmony_ci for (int i = 0; i < nargs; i++) { 90cb93a386Sopenharmony_ci args[i] = (void*)( (char*)args[i] + times * strides[i] ); 91cb93a386Sopenharmony_ci } 92cb93a386Sopenharmony_ci }; 93cb93a386Sopenharmony_ci 94cb93a386Sopenharmony_ci int start = 0, 95cb93a386Sopenharmony_ci stride; 96cb93a386Sopenharmony_ci for ( ; n > 0; start = loop, n -= stride, step_args(stride)) { 97cb93a386Sopenharmony_ci stride = n >= K ? K : 1; 98cb93a386Sopenharmony_ci 99cb93a386Sopenharmony_ci for (int instIdx = start; instIdx < ninsts; instIdx++) { 100cb93a386Sopenharmony_ci InterpreterInstruction inst = insts[instIdx]; 101cb93a386Sopenharmony_ci 102cb93a386Sopenharmony_ci // d = op(x,y,z,w, immA,immB) 103cb93a386Sopenharmony_ci Reg d = inst.d, 104cb93a386Sopenharmony_ci x = inst.x, 105cb93a386Sopenharmony_ci y = inst.y, 106cb93a386Sopenharmony_ci z = inst.z, 107cb93a386Sopenharmony_ci w = inst.w; 108cb93a386Sopenharmony_ci int immA = inst.immA, 109cb93a386Sopenharmony_ci immB = inst.immB, 110cb93a386Sopenharmony_ci immC = inst.immC; 111cb93a386Sopenharmony_ci 112cb93a386Sopenharmony_ci // Ops that interact with memory need to know whether we're stride=1 or K, 113cb93a386Sopenharmony_ci // but all non-memory ops can run the same code no matter the stride. 114cb93a386Sopenharmony_ci switch (2*(int)inst.op + (stride == K ? 1 : 0)) { 115cb93a386Sopenharmony_ci default: SkUNREACHABLE; 116cb93a386Sopenharmony_ci 117cb93a386Sopenharmony_ci #define STRIDE_1(op) case 2*(int)op 118cb93a386Sopenharmony_ci #define STRIDE_K(op) case 2*(int)op + 1 119cb93a386Sopenharmony_ci STRIDE_1(Op::store8 ): memcpy(args[immA], &r[x].i32, 1); break; 120cb93a386Sopenharmony_ci STRIDE_1(Op::store16): memcpy(args[immA], &r[x].i32, 2); break; 121cb93a386Sopenharmony_ci STRIDE_1(Op::store32): memcpy(args[immA], &r[x].i32, 4); break; 122cb93a386Sopenharmony_ci STRIDE_1(Op::store64): memcpy((char*)args[immA]+0, &r[x].i32, 4); 123cb93a386Sopenharmony_ci memcpy((char*)args[immA]+4, &r[y].i32, 4); break; 124cb93a386Sopenharmony_ci 125cb93a386Sopenharmony_ci STRIDE_K(Op::store8 ): skvx::cast<uint8_t> (r[x].i32).store(args[immA]); break; 126cb93a386Sopenharmony_ci STRIDE_K(Op::store16): skvx::cast<uint16_t>(r[x].i32).store(args[immA]); break; 127cb93a386Sopenharmony_ci STRIDE_K(Op::store32): (r[x].i32).store(args[immA]); break; 128cb93a386Sopenharmony_ci STRIDE_K(Op::store64): (skvx::cast<uint64_t>(r[x].u32) << 0 | 129cb93a386Sopenharmony_ci skvx::cast<uint64_t>(r[y].u32) << 32).store(args[immA]); 130cb93a386Sopenharmony_ci break; 131cb93a386Sopenharmony_ci 132cb93a386Sopenharmony_ci STRIDE_1(Op::load8 ): r[d].i32 = 0; memcpy(&r[d].i32, args[immA], 1); break; 133cb93a386Sopenharmony_ci STRIDE_1(Op::load16): r[d].i32 = 0; memcpy(&r[d].i32, args[immA], 2); break; 134cb93a386Sopenharmony_ci STRIDE_1(Op::load32): r[d].i32 = 0; memcpy(&r[d].i32, args[immA], 4); break; 135cb93a386Sopenharmony_ci STRIDE_1(Op::load64): 136cb93a386Sopenharmony_ci r[d].i32 = 0; memcpy(&r[d].i32, (char*)args[immA] + 4*immB, 4); break; 137cb93a386Sopenharmony_ci 138cb93a386Sopenharmony_ci STRIDE_K(Op::load8 ): r[d].i32= skvx::cast<int>(U8 ::Load(args[immA])); break; 139cb93a386Sopenharmony_ci STRIDE_K(Op::load16): r[d].i32= skvx::cast<int>(U16::Load(args[immA])); break; 140cb93a386Sopenharmony_ci STRIDE_K(Op::load32): r[d].i32= I32::Load(args[immA]) ; break; 141cb93a386Sopenharmony_ci STRIDE_K(Op::load64): 142cb93a386Sopenharmony_ci // Low 32 bits if immB=0, or high 32 bits if immB=1. 143cb93a386Sopenharmony_ci r[d].i32 = skvx::cast<int>(U64::Load(args[immA]) >> (32*immB)); break; 144cb93a386Sopenharmony_ci 145cb93a386Sopenharmony_ci // The pointer we base our gather on is loaded indirectly from a uniform: 146cb93a386Sopenharmony_ci // - args[immA] is the uniform holding our gather base pointer somewhere; 147cb93a386Sopenharmony_ci // - (const uint8_t*)args[immA] + immB points to the gather base pointer; 148cb93a386Sopenharmony_ci // - memcpy() loads the gather base and into a pointer of the right type. 149cb93a386Sopenharmony_ci // After all that we have an ordinary (uniform) pointer `ptr` to load from, 150cb93a386Sopenharmony_ci // and we then gather from it using the varying indices in r[x]. 151cb93a386Sopenharmony_ci STRIDE_1(Op::gather8): { 152cb93a386Sopenharmony_ci const uint8_t* ptr; 153cb93a386Sopenharmony_ci memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr)); 154cb93a386Sopenharmony_ci r[d].i32 = ptr[ r[x].i32[0] ]; 155cb93a386Sopenharmony_ci } break; 156cb93a386Sopenharmony_ci STRIDE_1(Op::gather16): { 157cb93a386Sopenharmony_ci const uint16_t* ptr; 158cb93a386Sopenharmony_ci memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr)); 159cb93a386Sopenharmony_ci r[d].i32 = ptr[ r[x].i32[0] ]; 160cb93a386Sopenharmony_ci } break; 161cb93a386Sopenharmony_ci STRIDE_1(Op::gather32): { 162cb93a386Sopenharmony_ci const int* ptr; 163cb93a386Sopenharmony_ci memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr)); 164cb93a386Sopenharmony_ci r[d].i32 = ptr[ r[x].i32[0] ]; 165cb93a386Sopenharmony_ci } break; 166cb93a386Sopenharmony_ci 167cb93a386Sopenharmony_ci STRIDE_K(Op::gather8): { 168cb93a386Sopenharmony_ci const uint8_t* ptr; 169cb93a386Sopenharmony_ci memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr)); 170cb93a386Sopenharmony_ci r[d].i32 = map([&](int ix) { return (int)ptr[ix]; }, r[x].i32); 171cb93a386Sopenharmony_ci } break; 172cb93a386Sopenharmony_ci STRIDE_K(Op::gather16): { 173cb93a386Sopenharmony_ci const uint16_t* ptr; 174cb93a386Sopenharmony_ci memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr)); 175cb93a386Sopenharmony_ci r[d].i32 = map([&](int ix) { return (int)ptr[ix]; }, r[x].i32); 176cb93a386Sopenharmony_ci } break; 177cb93a386Sopenharmony_ci STRIDE_K(Op::gather32): { 178cb93a386Sopenharmony_ci const int* ptr; 179cb93a386Sopenharmony_ci memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr)); 180cb93a386Sopenharmony_ci r[d].i32 = gather32(ptr, r[x].i32); 181cb93a386Sopenharmony_ci } break; 182cb93a386Sopenharmony_ci 183cb93a386Sopenharmony_ci #undef STRIDE_1 184cb93a386Sopenharmony_ci #undef STRIDE_K 185cb93a386Sopenharmony_ci 186cb93a386Sopenharmony_ci // Ops that don't interact with memory should never care about the stride. 187cb93a386Sopenharmony_ci #define CASE(op) case 2*(int)op: /*fallthrough*/ case 2*(int)op+1 188cb93a386Sopenharmony_ci 189cb93a386Sopenharmony_ci // These 128-bit ops are implemented serially for simplicity. 190cb93a386Sopenharmony_ci CASE(Op::store128): { 191cb93a386Sopenharmony_ci U64 lo = (skvx::cast<uint64_t>(r[x].u32) << 0 | 192cb93a386Sopenharmony_ci skvx::cast<uint64_t>(r[y].u32) << 32), 193cb93a386Sopenharmony_ci hi = (skvx::cast<uint64_t>(r[z].u32) << 0 | 194cb93a386Sopenharmony_ci skvx::cast<uint64_t>(r[w].u32) << 32); 195cb93a386Sopenharmony_ci for (int i = 0; i < stride; i++) { 196cb93a386Sopenharmony_ci memcpy((char*)args[immA] + 16*i + 0, &lo[i], 8); 197cb93a386Sopenharmony_ci memcpy((char*)args[immA] + 16*i + 8, &hi[i], 8); 198cb93a386Sopenharmony_ci } 199cb93a386Sopenharmony_ci } break; 200cb93a386Sopenharmony_ci 201cb93a386Sopenharmony_ci CASE(Op::load128): 202cb93a386Sopenharmony_ci r[d].i32 = 0; 203cb93a386Sopenharmony_ci for (int i = 0; i < stride; i++) { 204cb93a386Sopenharmony_ci memcpy(&r[d].i32[i], (const char*)args[immA] + 16*i+ 4*immB, 4); 205cb93a386Sopenharmony_ci } break; 206cb93a386Sopenharmony_ci 207cb93a386Sopenharmony_ci CASE(Op::assert_true): 208cb93a386Sopenharmony_ci #ifdef SK_DEBUG 209cb93a386Sopenharmony_ci if (!all(r[x].i32)) { 210cb93a386Sopenharmony_ci SkDebugf("inst %d, register %d\n", instIdx, y); 211cb93a386Sopenharmony_ci for (int i = 0; i < K; i++) { 212cb93a386Sopenharmony_ci SkDebugf("\t%2d: %08x (%g)\n", 213cb93a386Sopenharmony_ci instIdx, r[y].i32[instIdx], r[y].f32[instIdx]); 214cb93a386Sopenharmony_ci } 215cb93a386Sopenharmony_ci SkASSERT(false); 216cb93a386Sopenharmony_ci } 217cb93a386Sopenharmony_ci #endif 218cb93a386Sopenharmony_ci break; 219cb93a386Sopenharmony_ci 220cb93a386Sopenharmony_ci CASE(Op::trace_line): 221cb93a386Sopenharmony_ci #ifdef SK_DEBUG 222cb93a386Sopenharmony_ci // TODO(skia:12614): this opcode will check the mask; if it's set, we write the 223cb93a386Sopenharmony_ci // line number from immA into the trace buffer. 224cb93a386Sopenharmony_ci #endif 225cb93a386Sopenharmony_ci break; 226cb93a386Sopenharmony_ci 227cb93a386Sopenharmony_ci CASE(Op::trace_var): 228cb93a386Sopenharmony_ci #ifdef SK_DEBUG 229cb93a386Sopenharmony_ci // TODO(skia:12614): this opcode will check the mask; if it's set, we write the 230cb93a386Sopenharmony_ci // variable-assignment slot and value to the trace buffer. 231cb93a386Sopenharmony_ci #endif 232cb93a386Sopenharmony_ci break; 233cb93a386Sopenharmony_ci 234cb93a386Sopenharmony_ci CASE(Op::trace_call): 235cb93a386Sopenharmony_ci #ifdef SK_DEBUG 236cb93a386Sopenharmony_ci // TODO(skia:12614): this opcode will be used to keep track of function entrance 237cb93a386Sopenharmony_ci // and exits, enabling step-over of function calls. 238cb93a386Sopenharmony_ci #endif 239cb93a386Sopenharmony_ci break; 240cb93a386Sopenharmony_ci 241cb93a386Sopenharmony_ci CASE(Op::index): { 242cb93a386Sopenharmony_ci const int iota[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 243cb93a386Sopenharmony_ci 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, 244cb93a386Sopenharmony_ci 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, 245cb93a386Sopenharmony_ci 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 }; 246cb93a386Sopenharmony_ci static_assert(K <= SK_ARRAY_COUNT(iota), ""); 247cb93a386Sopenharmony_ci 248cb93a386Sopenharmony_ci r[d].i32 = n - I32::Load(iota); 249cb93a386Sopenharmony_ci } break; 250cb93a386Sopenharmony_ci 251cb93a386Sopenharmony_ci CASE(Op::uniform32): 252cb93a386Sopenharmony_ci r[d].i32 = *(const int*)( (const char*)args[immA] + immB ); 253cb93a386Sopenharmony_ci break; 254cb93a386Sopenharmony_ci 255cb93a386Sopenharmony_ci CASE(Op::array32): 256cb93a386Sopenharmony_ci const int* ptr; 257cb93a386Sopenharmony_ci memcpy(&ptr, (const uint8_t*)args[immA] + immB, sizeof(ptr)); 258cb93a386Sopenharmony_ci r[d].i32 = ptr[immC/sizeof(int)]; 259cb93a386Sopenharmony_ci break; 260cb93a386Sopenharmony_ci 261cb93a386Sopenharmony_ci CASE(Op::splat): r[d].i32 = immA; break; 262cb93a386Sopenharmony_ci 263cb93a386Sopenharmony_ci CASE(Op::add_f32): r[d].f32 = r[x].f32 + r[y].f32; break; 264cb93a386Sopenharmony_ci CASE(Op::sub_f32): r[d].f32 = r[x].f32 - r[y].f32; break; 265cb93a386Sopenharmony_ci CASE(Op::mul_f32): r[d].f32 = r[x].f32 * r[y].f32; break; 266cb93a386Sopenharmony_ci CASE(Op::div_f32): r[d].f32 = r[x].f32 / r[y].f32; break; 267cb93a386Sopenharmony_ci CASE(Op::min_f32): r[d].f32 = min(r[x].f32, r[y].f32); break; 268cb93a386Sopenharmony_ci CASE(Op::max_f32): r[d].f32 = max(r[x].f32, r[y].f32); break; 269cb93a386Sopenharmony_ci 270cb93a386Sopenharmony_ci CASE(Op::fma_f32): r[d].f32 = fma( r[x].f32, r[y].f32, r[z].f32); break; 271cb93a386Sopenharmony_ci CASE(Op::fms_f32): r[d].f32 = fma( r[x].f32, r[y].f32, -r[z].f32); break; 272cb93a386Sopenharmony_ci CASE(Op::fnma_f32): r[d].f32 = fma(-r[x].f32, r[y].f32, r[z].f32); break; 273cb93a386Sopenharmony_ci 274cb93a386Sopenharmony_ci CASE(Op::sqrt_f32): r[d].f32 = sqrt(r[x].f32); break; 275cb93a386Sopenharmony_ci 276cb93a386Sopenharmony_ci CASE(Op::add_i32): r[d].i32 = r[x].i32 + r[y].i32; break; 277cb93a386Sopenharmony_ci CASE(Op::sub_i32): r[d].i32 = r[x].i32 - r[y].i32; break; 278cb93a386Sopenharmony_ci CASE(Op::mul_i32): r[d].i32 = r[x].i32 * r[y].i32; break; 279cb93a386Sopenharmony_ci 280cb93a386Sopenharmony_ci CASE(Op::shl_i32): r[d].i32 = r[x].i32 << immA; break; 281cb93a386Sopenharmony_ci CASE(Op::sra_i32): r[d].i32 = r[x].i32 >> immA; break; 282cb93a386Sopenharmony_ci CASE(Op::shr_i32): r[d].u32 = r[x].u32 >> immA; break; 283cb93a386Sopenharmony_ci 284cb93a386Sopenharmony_ci CASE(Op:: eq_f32): r[d].i32 = r[x].f32 == r[y].f32; break; 285cb93a386Sopenharmony_ci CASE(Op::neq_f32): r[d].i32 = r[x].f32 != r[y].f32; break; 286cb93a386Sopenharmony_ci CASE(Op:: gt_f32): r[d].i32 = r[x].f32 > r[y].f32; break; 287cb93a386Sopenharmony_ci CASE(Op::gte_f32): r[d].i32 = r[x].f32 >= r[y].f32; break; 288cb93a386Sopenharmony_ci 289cb93a386Sopenharmony_ci CASE(Op:: eq_i32): r[d].i32 = r[x].i32 == r[y].i32; break; 290cb93a386Sopenharmony_ci CASE(Op:: gt_i32): r[d].i32 = r[x].i32 > r[y].i32; break; 291cb93a386Sopenharmony_ci 292cb93a386Sopenharmony_ci CASE(Op::bit_and ): r[d].i32 = r[x].i32 & r[y].i32; break; 293cb93a386Sopenharmony_ci CASE(Op::bit_or ): r[d].i32 = r[x].i32 | r[y].i32; break; 294cb93a386Sopenharmony_ci CASE(Op::bit_xor ): r[d].i32 = r[x].i32 ^ r[y].i32; break; 295cb93a386Sopenharmony_ci CASE(Op::bit_clear): r[d].i32 = r[x].i32 & ~r[y].i32; break; 296cb93a386Sopenharmony_ci 297cb93a386Sopenharmony_ci CASE(Op::select): r[d].i32 = skvx::if_then_else(r[x].i32, r[y].i32, r[z].i32); 298cb93a386Sopenharmony_ci break; 299cb93a386Sopenharmony_ci 300cb93a386Sopenharmony_ci CASE(Op::ceil): r[d].f32 = skvx::ceil(r[x].f32) ; break; 301cb93a386Sopenharmony_ci CASE(Op::floor): r[d].f32 = skvx::floor(r[x].f32) ; break; 302cb93a386Sopenharmony_ci CASE(Op::to_f32): r[d].f32 = skvx::cast<float>( r[x].i32 ); break; 303cb93a386Sopenharmony_ci CASE(Op::trunc): r[d].i32 = skvx::cast<int> ( r[x].f32 ); break; 304cb93a386Sopenharmony_ci CASE(Op::round): r[d].i32 = skvx::cast<int> (skvx::lrint(r[x].f32)); break; 305cb93a386Sopenharmony_ci 306cb93a386Sopenharmony_ci CASE(Op::to_fp16): 307cb93a386Sopenharmony_ci r[d].i32 = skvx::cast<int>(skvx::to_half(r[x].f32)); 308cb93a386Sopenharmony_ci break; 309cb93a386Sopenharmony_ci CASE(Op::from_fp16): 310cb93a386Sopenharmony_ci r[d].f32 = skvx::from_half(skvx::cast<uint16_t>(r[x].i32)); 311cb93a386Sopenharmony_ci break; 312cb93a386Sopenharmony_ci 313cb93a386Sopenharmony_ci #undef CASE 314cb93a386Sopenharmony_ci } 315cb93a386Sopenharmony_ci } 316cb93a386Sopenharmony_ci } 317cb93a386Sopenharmony_ci } 318cb93a386Sopenharmony_ci 319cb93a386Sopenharmony_ci} // namespace SK_OPTS_NS 320cb93a386Sopenharmony_ci 321cb93a386Sopenharmony_ci#endif//SkVM_opts_DEFINED 322