1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Red Hat 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark (robdclark@gmail.com) 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "math.h" 28bf215546Sopenharmony_ci#include "nir/nir_builtin_builder.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "util/u_printf.h" 31bf215546Sopenharmony_ci#include "vtn_private.h" 32bf215546Sopenharmony_ci#include "OpenCL.std.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_citypedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b, 35bf215546Sopenharmony_ci uint32_t opcode, 36bf215546Sopenharmony_ci unsigned num_srcs, nir_ssa_def **srcs, 37bf215546Sopenharmony_ci struct vtn_type **src_types, 38bf215546Sopenharmony_ci const struct vtn_type *dest_type); 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistatic int to_llvm_address_space(SpvStorageClass mode) 41bf215546Sopenharmony_ci{ 42bf215546Sopenharmony_ci switch (mode) { 43bf215546Sopenharmony_ci case SpvStorageClassPrivate: 44bf215546Sopenharmony_ci case SpvStorageClassFunction: return 0; 45bf215546Sopenharmony_ci case SpvStorageClassCrossWorkgroup: return 1; 46bf215546Sopenharmony_ci case SpvStorageClassUniform: 47bf215546Sopenharmony_ci case SpvStorageClassUniformConstant: return 2; 48bf215546Sopenharmony_ci case SpvStorageClassWorkgroup: return 3; 49bf215546Sopenharmony_ci case SpvStorageClassGeneric: return 4; 50bf215546Sopenharmony_ci default: return -1; 51bf215546Sopenharmony_ci } 52bf215546Sopenharmony_ci} 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_cistatic void 56bf215546Sopenharmony_civtn_opencl_mangle(const char *in_name, 57bf215546Sopenharmony_ci uint32_t const_mask, 58bf215546Sopenharmony_ci int ntypes, struct vtn_type **src_types, 59bf215546Sopenharmony_ci char **outstring) 60bf215546Sopenharmony_ci{ 61bf215546Sopenharmony_ci char local_name[256] = ""; 62bf215546Sopenharmony_ci char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name); 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci for (unsigned i = 0; i < ntypes; ++i) { 65bf215546Sopenharmony_ci const struct glsl_type *type = src_types[i]->type; 66bf215546Sopenharmony_ci enum vtn_base_type base_type = src_types[i]->base_type; 67bf215546Sopenharmony_ci if (src_types[i]->base_type == vtn_base_type_pointer) { 68bf215546Sopenharmony_ci *(args_str++) = 'P'; 69bf215546Sopenharmony_ci int address_space = to_llvm_address_space(src_types[i]->storage_class); 70bf215546Sopenharmony_ci if (address_space > 0) 71bf215546Sopenharmony_ci args_str += sprintf(args_str, "U3AS%d", address_space); 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci type = src_types[i]->deref->type; 74bf215546Sopenharmony_ci base_type = src_types[i]->deref->base_type; 75bf215546Sopenharmony_ci } 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci if (const_mask & (1 << i)) 78bf215546Sopenharmony_ci *(args_str++) = 'K'; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci unsigned num_elements = glsl_get_components(type); 81bf215546Sopenharmony_ci if (num_elements > 1) { 82bf215546Sopenharmony_ci /* Vectors are not treated as built-ins for mangling, so check for substitution. 83bf215546Sopenharmony_ci * In theory, we'd need to know which substitution value this is. In practice, 84bf215546Sopenharmony_ci * the functions we need from libclc only support 1 85bf215546Sopenharmony_ci */ 86bf215546Sopenharmony_ci bool substitution = false; 87bf215546Sopenharmony_ci for (unsigned j = 0; j < i; ++j) { 88bf215546Sopenharmony_ci const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ? 89bf215546Sopenharmony_ci src_types[j]->deref->type : src_types[j]->type; 90bf215546Sopenharmony_ci if (type == other_type) { 91bf215546Sopenharmony_ci substitution = true; 92bf215546Sopenharmony_ci break; 93bf215546Sopenharmony_ci } 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci if (substitution) { 97bf215546Sopenharmony_ci args_str += sprintf(args_str, "S_"); 98bf215546Sopenharmony_ci continue; 99bf215546Sopenharmony_ci } else 100bf215546Sopenharmony_ci args_str += sprintf(args_str, "Dv%d_", num_elements); 101bf215546Sopenharmony_ci } 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci const char *suffix = NULL; 104bf215546Sopenharmony_ci switch (base_type) { 105bf215546Sopenharmony_ci case vtn_base_type_sampler: suffix = "11ocl_sampler"; break; 106bf215546Sopenharmony_ci case vtn_base_type_event: suffix = "9ocl_event"; break; 107bf215546Sopenharmony_ci default: { 108bf215546Sopenharmony_ci const char *primitives[] = { 109bf215546Sopenharmony_ci [GLSL_TYPE_UINT] = "j", 110bf215546Sopenharmony_ci [GLSL_TYPE_INT] = "i", 111bf215546Sopenharmony_ci [GLSL_TYPE_FLOAT] = "f", 112bf215546Sopenharmony_ci [GLSL_TYPE_FLOAT16] = "Dh", 113bf215546Sopenharmony_ci [GLSL_TYPE_DOUBLE] = "d", 114bf215546Sopenharmony_ci [GLSL_TYPE_UINT8] = "h", 115bf215546Sopenharmony_ci [GLSL_TYPE_INT8] = "c", 116bf215546Sopenharmony_ci [GLSL_TYPE_UINT16] = "t", 117bf215546Sopenharmony_ci [GLSL_TYPE_INT16] = "s", 118bf215546Sopenharmony_ci [GLSL_TYPE_UINT64] = "m", 119bf215546Sopenharmony_ci [GLSL_TYPE_INT64] = "l", 120bf215546Sopenharmony_ci [GLSL_TYPE_BOOL] = "b", 121bf215546Sopenharmony_ci [GLSL_TYPE_ERROR] = NULL, 122bf215546Sopenharmony_ci }; 123bf215546Sopenharmony_ci enum glsl_base_type glsl_base_type = glsl_get_base_type(type); 124bf215546Sopenharmony_ci assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]); 125bf215546Sopenharmony_ci suffix = primitives[glsl_base_type]; 126bf215546Sopenharmony_ci break; 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci } 129bf215546Sopenharmony_ci args_str += sprintf(args_str, "%s", suffix); 130bf215546Sopenharmony_ci } 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci *outstring = strdup(local_name); 133bf215546Sopenharmony_ci} 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_cistatic nir_function *mangle_and_find(struct vtn_builder *b, 136bf215546Sopenharmony_ci const char *name, 137bf215546Sopenharmony_ci uint32_t const_mask, 138bf215546Sopenharmony_ci uint32_t num_srcs, 139bf215546Sopenharmony_ci struct vtn_type **src_types) 140bf215546Sopenharmony_ci{ 141bf215546Sopenharmony_ci char *mname; 142bf215546Sopenharmony_ci nir_function *found = NULL; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname); 145bf215546Sopenharmony_ci /* try and find in current shader first. */ 146bf215546Sopenharmony_ci nir_foreach_function(funcs, b->shader) { 147bf215546Sopenharmony_ci if (!strcmp(funcs->name, mname)) { 148bf215546Sopenharmony_ci found = funcs; 149bf215546Sopenharmony_ci break; 150bf215546Sopenharmony_ci } 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci /* if not found here find in clc shader and create a decl mirroring it */ 153bf215546Sopenharmony_ci if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) { 154bf215546Sopenharmony_ci nir_foreach_function(funcs, b->options->clc_shader) { 155bf215546Sopenharmony_ci if (!strcmp(funcs->name, mname)) { 156bf215546Sopenharmony_ci found = funcs; 157bf215546Sopenharmony_ci break; 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci if (found) { 161bf215546Sopenharmony_ci nir_function *decl = nir_function_create(b->shader, mname); 162bf215546Sopenharmony_ci decl->num_params = found->num_params; 163bf215546Sopenharmony_ci decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params); 164bf215546Sopenharmony_ci for (unsigned i = 0; i < decl->num_params; i++) { 165bf215546Sopenharmony_ci decl->params[i] = found->params[i]; 166bf215546Sopenharmony_ci } 167bf215546Sopenharmony_ci found = decl; 168bf215546Sopenharmony_ci } 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci if (!found) 171bf215546Sopenharmony_ci vtn_fail("Can't find clc function %s\n", mname); 172bf215546Sopenharmony_ci free(mname); 173bf215546Sopenharmony_ci return found; 174bf215546Sopenharmony_ci} 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_cistatic bool call_mangled_function(struct vtn_builder *b, 177bf215546Sopenharmony_ci const char *name, 178bf215546Sopenharmony_ci uint32_t const_mask, 179bf215546Sopenharmony_ci uint32_t num_srcs, 180bf215546Sopenharmony_ci struct vtn_type **src_types, 181bf215546Sopenharmony_ci const struct vtn_type *dest_type, 182bf215546Sopenharmony_ci nir_ssa_def **srcs, 183bf215546Sopenharmony_ci nir_deref_instr **ret_deref_ptr) 184bf215546Sopenharmony_ci{ 185bf215546Sopenharmony_ci nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types); 186bf215546Sopenharmony_ci if (!found) 187bf215546Sopenharmony_ci return false; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci nir_call_instr *call = nir_call_instr_create(b->shader, found); 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci nir_deref_instr *ret_deref = NULL; 192bf215546Sopenharmony_ci uint32_t param_idx = 0; 193bf215546Sopenharmony_ci if (dest_type) { 194bf215546Sopenharmony_ci nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl, 195bf215546Sopenharmony_ci glsl_get_bare_type(dest_type->type), 196bf215546Sopenharmony_ci "return_tmp"); 197bf215546Sopenharmony_ci ret_deref = nir_build_deref_var(&b->nb, ret_tmp); 198bf215546Sopenharmony_ci call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa); 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) 202bf215546Sopenharmony_ci call->params[param_idx++] = nir_src_for_ssa(srcs[i]); 203bf215546Sopenharmony_ci nir_builder_instr_insert(&b->nb, &call->instr); 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci *ret_deref_ptr = ret_deref; 206bf215546Sopenharmony_ci return true; 207bf215546Sopenharmony_ci} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_cistatic void 210bf215546Sopenharmony_cihandle_instr(struct vtn_builder *b, uint32_t opcode, 211bf215546Sopenharmony_ci const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler) 212bf215546Sopenharmony_ci{ 213bf215546Sopenharmony_ci struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci nir_ssa_def *srcs[5] = { NULL }; 216bf215546Sopenharmony_ci struct vtn_type *src_types[5] = { NULL }; 217bf215546Sopenharmony_ci vtn_assert(num_srcs <= ARRAY_SIZE(srcs)); 218bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) { 219bf215546Sopenharmony_ci struct vtn_value *val = vtn_untyped_value(b, w_src[i]); 220bf215546Sopenharmony_ci struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]); 221bf215546Sopenharmony_ci srcs[i] = ssa->def; 222bf215546Sopenharmony_ci src_types[i] = val->type; 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type); 226bf215546Sopenharmony_ci if (result) { 227bf215546Sopenharmony_ci vtn_push_nir_ssa(b, w_dest[1], result); 228bf215546Sopenharmony_ci } else { 229bf215546Sopenharmony_ci vtn_assert(dest_type == NULL); 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci} 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_cistatic nir_op 234bf215546Sopenharmony_cinir_alu_op_for_opencl_opcode(struct vtn_builder *b, 235bf215546Sopenharmony_ci enum OpenCLstd_Entrypoints opcode) 236bf215546Sopenharmony_ci{ 237bf215546Sopenharmony_ci switch (opcode) { 238bf215546Sopenharmony_ci case OpenCLstd_Fabs: return nir_op_fabs; 239bf215546Sopenharmony_ci case OpenCLstd_SAbs: return nir_op_iabs; 240bf215546Sopenharmony_ci case OpenCLstd_SAdd_sat: return nir_op_iadd_sat; 241bf215546Sopenharmony_ci case OpenCLstd_UAdd_sat: return nir_op_uadd_sat; 242bf215546Sopenharmony_ci case OpenCLstd_Ceil: return nir_op_fceil; 243bf215546Sopenharmony_ci case OpenCLstd_Floor: return nir_op_ffloor; 244bf215546Sopenharmony_ci case OpenCLstd_SHadd: return nir_op_ihadd; 245bf215546Sopenharmony_ci case OpenCLstd_UHadd: return nir_op_uhadd; 246bf215546Sopenharmony_ci case OpenCLstd_Fmax: return nir_op_fmax; 247bf215546Sopenharmony_ci case OpenCLstd_SMax: return nir_op_imax; 248bf215546Sopenharmony_ci case OpenCLstd_UMax: return nir_op_umax; 249bf215546Sopenharmony_ci case OpenCLstd_Fmin: return nir_op_fmin; 250bf215546Sopenharmony_ci case OpenCLstd_SMin: return nir_op_imin; 251bf215546Sopenharmony_ci case OpenCLstd_UMin: return nir_op_umin; 252bf215546Sopenharmony_ci case OpenCLstd_Mix: return nir_op_flrp; 253bf215546Sopenharmony_ci case OpenCLstd_Native_cos: return nir_op_fcos; 254bf215546Sopenharmony_ci case OpenCLstd_Native_divide: return nir_op_fdiv; 255bf215546Sopenharmony_ci case OpenCLstd_Native_exp2: return nir_op_fexp2; 256bf215546Sopenharmony_ci case OpenCLstd_Native_log2: return nir_op_flog2; 257bf215546Sopenharmony_ci case OpenCLstd_Native_powr: return nir_op_fpow; 258bf215546Sopenharmony_ci case OpenCLstd_Native_recip: return nir_op_frcp; 259bf215546Sopenharmony_ci case OpenCLstd_Native_rsqrt: return nir_op_frsq; 260bf215546Sopenharmony_ci case OpenCLstd_Native_sin: return nir_op_fsin; 261bf215546Sopenharmony_ci case OpenCLstd_Native_sqrt: return nir_op_fsqrt; 262bf215546Sopenharmony_ci case OpenCLstd_SMul_hi: return nir_op_imul_high; 263bf215546Sopenharmony_ci case OpenCLstd_UMul_hi: return nir_op_umul_high; 264bf215546Sopenharmony_ci case OpenCLstd_Popcount: return nir_op_bit_count; 265bf215546Sopenharmony_ci case OpenCLstd_SRhadd: return nir_op_irhadd; 266bf215546Sopenharmony_ci case OpenCLstd_URhadd: return nir_op_urhadd; 267bf215546Sopenharmony_ci case OpenCLstd_Rsqrt: return nir_op_frsq; 268bf215546Sopenharmony_ci case OpenCLstd_Sign: return nir_op_fsign; 269bf215546Sopenharmony_ci case OpenCLstd_Sqrt: return nir_op_fsqrt; 270bf215546Sopenharmony_ci case OpenCLstd_SSub_sat: return nir_op_isub_sat; 271bf215546Sopenharmony_ci case OpenCLstd_USub_sat: return nir_op_usub_sat; 272bf215546Sopenharmony_ci case OpenCLstd_Trunc: return nir_op_ftrunc; 273bf215546Sopenharmony_ci case OpenCLstd_Rint: return nir_op_fround_even; 274bf215546Sopenharmony_ci case OpenCLstd_Half_divide: return nir_op_fdiv; 275bf215546Sopenharmony_ci case OpenCLstd_Half_recip: return nir_op_frcp; 276bf215546Sopenharmony_ci /* uhm... */ 277bf215546Sopenharmony_ci case OpenCLstd_UAbs: return nir_op_mov; 278bf215546Sopenharmony_ci default: 279bf215546Sopenharmony_ci vtn_fail("No NIR equivalent"); 280bf215546Sopenharmony_ci } 281bf215546Sopenharmony_ci} 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_cistatic nir_ssa_def * 284bf215546Sopenharmony_cihandle_alu(struct vtn_builder *b, uint32_t opcode, 285bf215546Sopenharmony_ci unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types, 286bf215546Sopenharmony_ci const struct vtn_type *dest_type) 287bf215546Sopenharmony_ci{ 288bf215546Sopenharmony_ci nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode), 289bf215546Sopenharmony_ci srcs[0], srcs[1], srcs[2], NULL); 290bf215546Sopenharmony_ci if (opcode == OpenCLstd_Popcount) 291bf215546Sopenharmony_ci ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type)); 292bf215546Sopenharmony_ci return ret; 293bf215546Sopenharmony_ci} 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci#define REMAP(op, str) [OpenCLstd_##op] = { str } 296bf215546Sopenharmony_cistatic const struct { 297bf215546Sopenharmony_ci const char *fn; 298bf215546Sopenharmony_ci} remap_table[] = { 299bf215546Sopenharmony_ci REMAP(Distance, "distance"), 300bf215546Sopenharmony_ci REMAP(Fast_distance, "fast_distance"), 301bf215546Sopenharmony_ci REMAP(Fast_length, "fast_length"), 302bf215546Sopenharmony_ci REMAP(Fast_normalize, "fast_normalize"), 303bf215546Sopenharmony_ci REMAP(Half_rsqrt, "half_rsqrt"), 304bf215546Sopenharmony_ci REMAP(Half_sqrt, "half_sqrt"), 305bf215546Sopenharmony_ci REMAP(Length, "length"), 306bf215546Sopenharmony_ci REMAP(Normalize, "normalize"), 307bf215546Sopenharmony_ci REMAP(Degrees, "degrees"), 308bf215546Sopenharmony_ci REMAP(Radians, "radians"), 309bf215546Sopenharmony_ci REMAP(Rotate, "rotate"), 310bf215546Sopenharmony_ci REMAP(Smoothstep, "smoothstep"), 311bf215546Sopenharmony_ci REMAP(Step, "step"), 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci REMAP(Pow, "pow"), 314bf215546Sopenharmony_ci REMAP(Pown, "pown"), 315bf215546Sopenharmony_ci REMAP(Powr, "powr"), 316bf215546Sopenharmony_ci REMAP(Rootn, "rootn"), 317bf215546Sopenharmony_ci REMAP(Modf, "modf"), 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci REMAP(Acos, "acos"), 320bf215546Sopenharmony_ci REMAP(Acosh, "acosh"), 321bf215546Sopenharmony_ci REMAP(Acospi, "acospi"), 322bf215546Sopenharmony_ci REMAP(Asin, "asin"), 323bf215546Sopenharmony_ci REMAP(Asinh, "asinh"), 324bf215546Sopenharmony_ci REMAP(Asinpi, "asinpi"), 325bf215546Sopenharmony_ci REMAP(Atan, "atan"), 326bf215546Sopenharmony_ci REMAP(Atan2, "atan2"), 327bf215546Sopenharmony_ci REMAP(Atanh, "atanh"), 328bf215546Sopenharmony_ci REMAP(Atanpi, "atanpi"), 329bf215546Sopenharmony_ci REMAP(Atan2pi, "atan2pi"), 330bf215546Sopenharmony_ci REMAP(Cos, "cos"), 331bf215546Sopenharmony_ci REMAP(Cosh, "cosh"), 332bf215546Sopenharmony_ci REMAP(Cospi, "cospi"), 333bf215546Sopenharmony_ci REMAP(Sin, "sin"), 334bf215546Sopenharmony_ci REMAP(Sinh, "sinh"), 335bf215546Sopenharmony_ci REMAP(Sinpi, "sinpi"), 336bf215546Sopenharmony_ci REMAP(Tan, "tan"), 337bf215546Sopenharmony_ci REMAP(Tanh, "tanh"), 338bf215546Sopenharmony_ci REMAP(Tanpi, "tanpi"), 339bf215546Sopenharmony_ci REMAP(Sincos, "sincos"), 340bf215546Sopenharmony_ci REMAP(Fract, "fract"), 341bf215546Sopenharmony_ci REMAP(Frexp, "frexp"), 342bf215546Sopenharmony_ci REMAP(Fma, "fma"), 343bf215546Sopenharmony_ci REMAP(Fmod, "fmod"), 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci REMAP(Half_cos, "cos"), 346bf215546Sopenharmony_ci REMAP(Half_exp, "exp"), 347bf215546Sopenharmony_ci REMAP(Half_exp2, "exp2"), 348bf215546Sopenharmony_ci REMAP(Half_exp10, "exp10"), 349bf215546Sopenharmony_ci REMAP(Half_log, "log"), 350bf215546Sopenharmony_ci REMAP(Half_log2, "log2"), 351bf215546Sopenharmony_ci REMAP(Half_log10, "log10"), 352bf215546Sopenharmony_ci REMAP(Half_powr, "powr"), 353bf215546Sopenharmony_ci REMAP(Half_sin, "sin"), 354bf215546Sopenharmony_ci REMAP(Half_tan, "tan"), 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci REMAP(Remainder, "remainder"), 357bf215546Sopenharmony_ci REMAP(Remquo, "remquo"), 358bf215546Sopenharmony_ci REMAP(Hypot, "hypot"), 359bf215546Sopenharmony_ci REMAP(Exp, "exp"), 360bf215546Sopenharmony_ci REMAP(Exp2, "exp2"), 361bf215546Sopenharmony_ci REMAP(Exp10, "exp10"), 362bf215546Sopenharmony_ci REMAP(Expm1, "expm1"), 363bf215546Sopenharmony_ci REMAP(Ldexp, "ldexp"), 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci REMAP(Ilogb, "ilogb"), 366bf215546Sopenharmony_ci REMAP(Log, "log"), 367bf215546Sopenharmony_ci REMAP(Log2, "log2"), 368bf215546Sopenharmony_ci REMAP(Log10, "log10"), 369bf215546Sopenharmony_ci REMAP(Log1p, "log1p"), 370bf215546Sopenharmony_ci REMAP(Logb, "logb"), 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci REMAP(Cbrt, "cbrt"), 373bf215546Sopenharmony_ci REMAP(Erfc, "erfc"), 374bf215546Sopenharmony_ci REMAP(Erf, "erf"), 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci REMAP(Lgamma, "lgamma"), 377bf215546Sopenharmony_ci REMAP(Lgamma_r, "lgamma_r"), 378bf215546Sopenharmony_ci REMAP(Tgamma, "tgamma"), 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci REMAP(UMad_sat, "mad_sat"), 381bf215546Sopenharmony_ci REMAP(SMad_sat, "mad_sat"), 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci REMAP(Shuffle, "shuffle"), 384bf215546Sopenharmony_ci REMAP(Shuffle2, "shuffle2"), 385bf215546Sopenharmony_ci}; 386bf215546Sopenharmony_ci#undef REMAP 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_cistatic const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode) 389bf215546Sopenharmony_ci{ 390bf215546Sopenharmony_ci if (opcode >= (sizeof(remap_table) / sizeof(const char *))) 391bf215546Sopenharmony_ci return NULL; 392bf215546Sopenharmony_ci return remap_table[opcode].fn; 393bf215546Sopenharmony_ci} 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_cistatic struct vtn_type * 396bf215546Sopenharmony_ciget_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type) 397bf215546Sopenharmony_ci{ 398bf215546Sopenharmony_ci struct vtn_type *ret = rzalloc(b, struct vtn_type); 399bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(type)); 400bf215546Sopenharmony_ci ret->type = type; 401bf215546Sopenharmony_ci ret->length = glsl_get_vector_elements(type); 402bf215546Sopenharmony_ci ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar; 403bf215546Sopenharmony_ci return ret; 404bf215546Sopenharmony_ci} 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_cistatic struct vtn_type * 407bf215546Sopenharmony_ciget_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class) 408bf215546Sopenharmony_ci{ 409bf215546Sopenharmony_ci struct vtn_type *ret = rzalloc(b, struct vtn_type); 410bf215546Sopenharmony_ci ret->type = nir_address_format_to_glsl_type( 411bf215546Sopenharmony_ci vtn_mode_to_address_format( 412bf215546Sopenharmony_ci b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL))); 413bf215546Sopenharmony_ci ret->base_type = vtn_base_type_pointer; 414bf215546Sopenharmony_ci ret->storage_class = storage_class; 415bf215546Sopenharmony_ci ret->deref = t; 416bf215546Sopenharmony_ci return ret; 417bf215546Sopenharmony_ci} 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_cistatic struct vtn_type * 420bf215546Sopenharmony_ciget_signed_type(struct vtn_builder *b, struct vtn_type *t) 421bf215546Sopenharmony_ci{ 422bf215546Sopenharmony_ci if (t->base_type == vtn_base_type_pointer) { 423bf215546Sopenharmony_ci return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class); 424bf215546Sopenharmony_ci } 425bf215546Sopenharmony_ci return get_vtn_type_for_glsl_type( 426bf215546Sopenharmony_ci b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)), 427bf215546Sopenharmony_ci glsl_get_vector_elements(t->type))); 428bf215546Sopenharmony_ci} 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_cistatic nir_ssa_def * 431bf215546Sopenharmony_cihandle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode, 432bf215546Sopenharmony_ci int num_srcs, 433bf215546Sopenharmony_ci nir_ssa_def **srcs, 434bf215546Sopenharmony_ci struct vtn_type **src_types, 435bf215546Sopenharmony_ci const struct vtn_type *dest_type) 436bf215546Sopenharmony_ci{ 437bf215546Sopenharmony_ci const char *name = remap_clc_opcode(opcode); 438bf215546Sopenharmony_ci if (!name) 439bf215546Sopenharmony_ci return NULL; 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci /* Some functions which take params end up with uint (or pointer-to-uint) being passed, 442bf215546Sopenharmony_ci * which doesn't mangle correctly when the function expects int or pointer-to-int. 443bf215546Sopenharmony_ci * See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers 444bf215546Sopenharmony_ci */ 445bf215546Sopenharmony_ci int signed_param = -1; 446bf215546Sopenharmony_ci switch (opcode) { 447bf215546Sopenharmony_ci case OpenCLstd_Frexp: 448bf215546Sopenharmony_ci case OpenCLstd_Lgamma_r: 449bf215546Sopenharmony_ci case OpenCLstd_Pown: 450bf215546Sopenharmony_ci case OpenCLstd_Rootn: 451bf215546Sopenharmony_ci case OpenCLstd_Ldexp: 452bf215546Sopenharmony_ci signed_param = 1; 453bf215546Sopenharmony_ci break; 454bf215546Sopenharmony_ci case OpenCLstd_Remquo: 455bf215546Sopenharmony_ci signed_param = 2; 456bf215546Sopenharmony_ci break; 457bf215546Sopenharmony_ci case OpenCLstd_SMad_sat: { 458bf215546Sopenharmony_ci /* All parameters need to be converted to signed */ 459bf215546Sopenharmony_ci src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]); 460bf215546Sopenharmony_ci break; 461bf215546Sopenharmony_ci } 462bf215546Sopenharmony_ci default: break; 463bf215546Sopenharmony_ci } 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci if (signed_param >= 0) { 466bf215546Sopenharmony_ci src_types[signed_param] = get_signed_type(b, src_types[signed_param]); 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci nir_deref_instr *ret_deref = NULL; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci if (!call_mangled_function(b, name, 0, num_srcs, src_types, 472bf215546Sopenharmony_ci dest_type, srcs, &ret_deref)) 473bf215546Sopenharmony_ci return NULL; 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL; 476bf215546Sopenharmony_ci} 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_cistatic nir_ssa_def * 479bf215546Sopenharmony_cihandle_special(struct vtn_builder *b, uint32_t opcode, 480bf215546Sopenharmony_ci unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types, 481bf215546Sopenharmony_ci const struct vtn_type *dest_type) 482bf215546Sopenharmony_ci{ 483bf215546Sopenharmony_ci nir_builder *nb = &b->nb; 484bf215546Sopenharmony_ci enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode; 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci switch (cl_opcode) { 487bf215546Sopenharmony_ci case OpenCLstd_SAbs_diff: 488bf215546Sopenharmony_ci /* these works easier in direct NIR */ 489bf215546Sopenharmony_ci return nir_iabs_diff(nb, srcs[0], srcs[1]); 490bf215546Sopenharmony_ci case OpenCLstd_UAbs_diff: 491bf215546Sopenharmony_ci return nir_uabs_diff(nb, srcs[0], srcs[1]); 492bf215546Sopenharmony_ci case OpenCLstd_Bitselect: 493bf215546Sopenharmony_ci return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]); 494bf215546Sopenharmony_ci case OpenCLstd_SMad_hi: 495bf215546Sopenharmony_ci return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]); 496bf215546Sopenharmony_ci case OpenCLstd_UMad_hi: 497bf215546Sopenharmony_ci return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]); 498bf215546Sopenharmony_ci case OpenCLstd_SMul24: 499bf215546Sopenharmony_ci return nir_imul24_relaxed(nb, srcs[0], srcs[1]); 500bf215546Sopenharmony_ci case OpenCLstd_UMul24: 501bf215546Sopenharmony_ci return nir_umul24_relaxed(nb, srcs[0], srcs[1]); 502bf215546Sopenharmony_ci case OpenCLstd_SMad24: 503bf215546Sopenharmony_ci return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]); 504bf215546Sopenharmony_ci case OpenCLstd_UMad24: 505bf215546Sopenharmony_ci return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]); 506bf215546Sopenharmony_ci case OpenCLstd_FClamp: 507bf215546Sopenharmony_ci return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]); 508bf215546Sopenharmony_ci case OpenCLstd_SClamp: 509bf215546Sopenharmony_ci return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]); 510bf215546Sopenharmony_ci case OpenCLstd_UClamp: 511bf215546Sopenharmony_ci return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]); 512bf215546Sopenharmony_ci case OpenCLstd_Copysign: 513bf215546Sopenharmony_ci return nir_copysign(nb, srcs[0], srcs[1]); 514bf215546Sopenharmony_ci case OpenCLstd_Cross: 515bf215546Sopenharmony_ci if (dest_type->length == 4) 516bf215546Sopenharmony_ci return nir_cross4(nb, srcs[0], srcs[1]); 517bf215546Sopenharmony_ci return nir_cross3(nb, srcs[0], srcs[1]); 518bf215546Sopenharmony_ci case OpenCLstd_Fdim: 519bf215546Sopenharmony_ci return nir_fdim(nb, srcs[0], srcs[1]); 520bf215546Sopenharmony_ci case OpenCLstd_Fmod: 521bf215546Sopenharmony_ci if (nb->shader->options->lower_fmod) 522bf215546Sopenharmony_ci break; 523bf215546Sopenharmony_ci return nir_fmod(nb, srcs[0], srcs[1]); 524bf215546Sopenharmony_ci case OpenCLstd_Mad: 525bf215546Sopenharmony_ci return nir_fmad(nb, srcs[0], srcs[1], srcs[2]); 526bf215546Sopenharmony_ci case OpenCLstd_Maxmag: 527bf215546Sopenharmony_ci return nir_maxmag(nb, srcs[0], srcs[1]); 528bf215546Sopenharmony_ci case OpenCLstd_Minmag: 529bf215546Sopenharmony_ci return nir_minmag(nb, srcs[0], srcs[1]); 530bf215546Sopenharmony_ci case OpenCLstd_Nan: 531bf215546Sopenharmony_ci return nir_nan(nb, srcs[0]); 532bf215546Sopenharmony_ci case OpenCLstd_Nextafter: 533bf215546Sopenharmony_ci return nir_nextafter(nb, srcs[0], srcs[1]); 534bf215546Sopenharmony_ci case OpenCLstd_Normalize: 535bf215546Sopenharmony_ci return nir_normalize(nb, srcs[0]); 536bf215546Sopenharmony_ci case OpenCLstd_Clz: 537bf215546Sopenharmony_ci return nir_clz_u(nb, srcs[0]); 538bf215546Sopenharmony_ci case OpenCLstd_Ctz: 539bf215546Sopenharmony_ci return nir_ctz_u(nb, srcs[0]); 540bf215546Sopenharmony_ci case OpenCLstd_Select: 541bf215546Sopenharmony_ci return nir_select(nb, srcs[0], srcs[1], srcs[2]); 542bf215546Sopenharmony_ci case OpenCLstd_S_Upsample: 543bf215546Sopenharmony_ci case OpenCLstd_U_Upsample: 544bf215546Sopenharmony_ci /* SPIR-V and CL have different defs for upsample, just implement in nir */ 545bf215546Sopenharmony_ci return nir_upsample(nb, srcs[0], srcs[1]); 546bf215546Sopenharmony_ci case OpenCLstd_Native_exp: 547bf215546Sopenharmony_ci return nir_fexp(nb, srcs[0]); 548bf215546Sopenharmony_ci case OpenCLstd_Native_exp10: 549bf215546Sopenharmony_ci return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2))); 550bf215546Sopenharmony_ci case OpenCLstd_Native_log: 551bf215546Sopenharmony_ci return nir_flog(nb, srcs[0]); 552bf215546Sopenharmony_ci case OpenCLstd_Native_log10: 553bf215546Sopenharmony_ci return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10)); 554bf215546Sopenharmony_ci case OpenCLstd_Native_tan: 555bf215546Sopenharmony_ci return nir_ftan(nb, srcs[0]); 556bf215546Sopenharmony_ci case OpenCLstd_Ldexp: 557bf215546Sopenharmony_ci if (nb->shader->options->lower_ldexp) 558bf215546Sopenharmony_ci break; 559bf215546Sopenharmony_ci return nir_ldexp(nb, srcs[0], srcs[1]); 560bf215546Sopenharmony_ci case OpenCLstd_Fma: 561bf215546Sopenharmony_ci /* FIXME: the software implementation only supports fp32 for now. */ 562bf215546Sopenharmony_ci if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32) 563bf215546Sopenharmony_ci break; 564bf215546Sopenharmony_ci return nir_ffma(nb, srcs[0], srcs[1], srcs[2]); 565bf215546Sopenharmony_ci default: 566bf215546Sopenharmony_ci break; 567bf215546Sopenharmony_ci } 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type); 570bf215546Sopenharmony_ci if (!ret) 571bf215546Sopenharmony_ci vtn_fail("No NIR equivalent"); 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci return ret; 574bf215546Sopenharmony_ci} 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_cistatic nir_ssa_def * 577bf215546Sopenharmony_cihandle_core(struct vtn_builder *b, uint32_t opcode, 578bf215546Sopenharmony_ci unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types, 579bf215546Sopenharmony_ci const struct vtn_type *dest_type) 580bf215546Sopenharmony_ci{ 581bf215546Sopenharmony_ci nir_deref_instr *ret_deref = NULL; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci switch ((SpvOp)opcode) { 584bf215546Sopenharmony_ci case SpvOpGroupAsyncCopy: { 585bf215546Sopenharmony_ci /* Libclc doesn't include 3-component overloads of the async copy functions. 586bf215546Sopenharmony_ci * However, the CLC spec says: 587bf215546Sopenharmony_ci * async_work_group_copy and async_work_group_strided_copy for 3-component vector types 588bf215546Sopenharmony_ci * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component 589bf215546Sopenharmony_ci * vector types 590bf215546Sopenharmony_ci */ 591bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; ++i) { 592bf215546Sopenharmony_ci if (src_types[i]->base_type == vtn_base_type_pointer && 593bf215546Sopenharmony_ci src_types[i]->deref->base_type == vtn_base_type_vector && 594bf215546Sopenharmony_ci src_types[i]->deref->length == 3) { 595bf215546Sopenharmony_ci src_types[i] = 596bf215546Sopenharmony_ci get_pointer_type(b, 597bf215546Sopenharmony_ci get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)), 598bf215546Sopenharmony_ci src_types[i]->storage_class); 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci } 601bf215546Sopenharmony_ci if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref)) 602bf215546Sopenharmony_ci return NULL; 603bf215546Sopenharmony_ci break; 604bf215546Sopenharmony_ci } 605bf215546Sopenharmony_ci case SpvOpGroupWaitEvents: { 606bf215546Sopenharmony_ci src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type()); 607bf215546Sopenharmony_ci if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref)) 608bf215546Sopenharmony_ci return NULL; 609bf215546Sopenharmony_ci break; 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci default: 612bf215546Sopenharmony_ci return NULL; 613bf215546Sopenharmony_ci } 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL; 616bf215546Sopenharmony_ci} 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_cistatic void 620bf215546Sopenharmony_ci_handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode, 621bf215546Sopenharmony_ci const uint32_t *w, unsigned count, bool load, 622bf215546Sopenharmony_ci bool vec_aligned, nir_rounding_mode rounding) 623bf215546Sopenharmony_ci{ 624bf215546Sopenharmony_ci struct vtn_type *type; 625bf215546Sopenharmony_ci if (load) 626bf215546Sopenharmony_ci type = vtn_get_type(b, w[1]); 627bf215546Sopenharmony_ci else 628bf215546Sopenharmony_ci type = vtn_get_value_type(b, w[5]); 629bf215546Sopenharmony_ci unsigned a = load ? 0 : 1; 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci enum glsl_base_type base_type = glsl_get_base_type(type->type); 632bf215546Sopenharmony_ci unsigned components = glsl_get_vector_elements(type->type); 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]); 635bf215546Sopenharmony_ci struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer); 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS]; 638bf215546Sopenharmony_ci nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS]; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset, 641bf215546Sopenharmony_ci (vec_aligned && components == 3) ? 4 : components); 642bf215546Sopenharmony_ci nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer); 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) : 645bf215546Sopenharmony_ci glsl_get_bit_size(type->type) / 8; 646bf215546Sopenharmony_ci enum glsl_base_type ptr_base_type = 647bf215546Sopenharmony_ci glsl_get_base_type(p->pointer->type->type); 648bf215546Sopenharmony_ci if (base_type != ptr_base_type) { 649bf215546Sopenharmony_ci vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 || 650bf215546Sopenharmony_ci (base_type != GLSL_TYPE_FLOAT && 651bf215546Sopenharmony_ci base_type != GLSL_TYPE_DOUBLE), 652bf215546Sopenharmony_ci "vload/vstore cannot do type conversion. " 653bf215546Sopenharmony_ci "vload/vstore_half can only convert from half to other " 654bf215546Sopenharmony_ci "floating-point types."); 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci /* Above-computed alignment was for floats/doubles, not halves */ 657bf215546Sopenharmony_ci alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type); 658bf215546Sopenharmony_ci } 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0); 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci for (int i = 0; i < components; i++) { 663bf215546Sopenharmony_ci nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i); 664bf215546Sopenharmony_ci nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset); 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci if (load) { 667bf215546Sopenharmony_ci comps[i] = vtn_local_load(b, arr_deref, p->type->access); 668bf215546Sopenharmony_ci ncomps[i] = comps[i]->def; 669bf215546Sopenharmony_ci if (base_type != ptr_base_type) { 670bf215546Sopenharmony_ci assert(ptr_base_type == GLSL_TYPE_FLOAT16 && 671bf215546Sopenharmony_ci (base_type == GLSL_TYPE_FLOAT || 672bf215546Sopenharmony_ci base_type == GLSL_TYPE_DOUBLE)); 673bf215546Sopenharmony_ci ncomps[i] = nir_f2fN(&b->nb, ncomps[i], 674bf215546Sopenharmony_ci glsl_base_type_get_bit_size(base_type)); 675bf215546Sopenharmony_ci } 676bf215546Sopenharmony_ci } else { 677bf215546Sopenharmony_ci struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); 678bf215546Sopenharmony_ci struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]); 679bf215546Sopenharmony_ci ssa->def = nir_channel(&b->nb, val->def, i); 680bf215546Sopenharmony_ci if (base_type != ptr_base_type) { 681bf215546Sopenharmony_ci assert(ptr_base_type == GLSL_TYPE_FLOAT16 && 682bf215546Sopenharmony_ci (base_type == GLSL_TYPE_FLOAT || 683bf215546Sopenharmony_ci base_type == GLSL_TYPE_DOUBLE)); 684bf215546Sopenharmony_ci if (rounding == nir_rounding_mode_undef) { 685bf215546Sopenharmony_ci ssa->def = nir_f2f16(&b->nb, ssa->def); 686bf215546Sopenharmony_ci } else { 687bf215546Sopenharmony_ci ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def, 688bf215546Sopenharmony_ci nir_type_float | ssa->def->bit_size, 689bf215546Sopenharmony_ci nir_type_float16, 690bf215546Sopenharmony_ci rounding, false); 691bf215546Sopenharmony_ci } 692bf215546Sopenharmony_ci } 693bf215546Sopenharmony_ci vtn_local_store(b, ssa, arr_deref, p->type->access); 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci } 696bf215546Sopenharmony_ci if (load) { 697bf215546Sopenharmony_ci vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components)); 698bf215546Sopenharmony_ci } 699bf215546Sopenharmony_ci} 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_cistatic void 702bf215546Sopenharmony_civtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode, 703bf215546Sopenharmony_ci const uint32_t *w, unsigned count) 704bf215546Sopenharmony_ci{ 705bf215546Sopenharmony_ci _handle_v_load_store(b, opcode, w, count, true, 706bf215546Sopenharmony_ci opcode == OpenCLstd_Vloada_halfn, 707bf215546Sopenharmony_ci nir_rounding_mode_undef); 708bf215546Sopenharmony_ci} 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_cistatic void 711bf215546Sopenharmony_civtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode, 712bf215546Sopenharmony_ci const uint32_t *w, unsigned count) 713bf215546Sopenharmony_ci{ 714bf215546Sopenharmony_ci _handle_v_load_store(b, opcode, w, count, false, 715bf215546Sopenharmony_ci opcode == OpenCLstd_Vstorea_halfn, 716bf215546Sopenharmony_ci nir_rounding_mode_undef); 717bf215546Sopenharmony_ci} 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_cistatic void 720bf215546Sopenharmony_civtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode, 721bf215546Sopenharmony_ci const uint32_t *w, unsigned count) 722bf215546Sopenharmony_ci{ 723bf215546Sopenharmony_ci _handle_v_load_store(b, opcode, w, count, false, 724bf215546Sopenharmony_ci opcode == OpenCLstd_Vstorea_halfn_r, 725bf215546Sopenharmony_ci vtn_rounding_mode_to_nir(b, w[8])); 726bf215546Sopenharmony_ci} 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_cistatic unsigned 729bf215546Sopenharmony_civtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info) 730bf215546Sopenharmony_ci{ 731bf215546Sopenharmony_ci nir_deref_instr *deref = vtn_nir_deref(b, id); 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci while (deref && deref->deref_type != nir_deref_type_var) 734bf215546Sopenharmony_ci deref = nir_deref_instr_parent(deref); 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant), 737bf215546Sopenharmony_ci "Printf string argument must be a pointer to a constant variable"); 738bf215546Sopenharmony_ci vtn_fail_if(deref->var->constant_initializer == NULL, 739bf215546Sopenharmony_ci "Printf string argument must have an initializer"); 740bf215546Sopenharmony_ci vtn_fail_if(!glsl_type_is_array(deref->var->type), 741bf215546Sopenharmony_ci "Printf string must be an char array"); 742bf215546Sopenharmony_ci const struct glsl_type *char_type = glsl_get_array_element(deref->var->type); 743bf215546Sopenharmony_ci vtn_fail_if(char_type != glsl_uint8_t_type() && 744bf215546Sopenharmony_ci char_type != glsl_int8_t_type(), 745bf215546Sopenharmony_ci "Printf string must be an char array"); 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci nir_constant *c = deref->var->constant_initializer; 748bf215546Sopenharmony_ci assert(c->num_elements == glsl_get_length(deref->var->type)); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci unsigned idx = info->string_size; 751bf215546Sopenharmony_ci info->strings = reralloc_size(b->shader, info->strings, 752bf215546Sopenharmony_ci idx + c->num_elements); 753bf215546Sopenharmony_ci info->string_size += c->num_elements; 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_ci char *str = &info->strings[idx]; 756bf215546Sopenharmony_ci bool found_null = false; 757bf215546Sopenharmony_ci for (unsigned i = 0; i < c->num_elements; i++) { 758bf215546Sopenharmony_ci memcpy((char *)str + i, c->elements[i]->values, 1); 759bf215546Sopenharmony_ci if (str[i] == '\0') 760bf215546Sopenharmony_ci found_null = true; 761bf215546Sopenharmony_ci } 762bf215546Sopenharmony_ci vtn_fail_if(!found_null, "Printf string must be null terminated"); 763bf215546Sopenharmony_ci return idx; 764bf215546Sopenharmony_ci} 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci/* printf is special because there are no limits on args */ 767bf215546Sopenharmony_cistatic void 768bf215546Sopenharmony_cihandle_printf(struct vtn_builder *b, uint32_t opcode, 769bf215546Sopenharmony_ci const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest) 770bf215546Sopenharmony_ci{ 771bf215546Sopenharmony_ci if (!b->options->caps.printf) { 772bf215546Sopenharmony_ci vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1)); 773bf215546Sopenharmony_ci return; 774bf215546Sopenharmony_ci } 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci /* Step 1. extract the format string */ 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci /* 779bf215546Sopenharmony_ci * info_idx is 1-based to match clover/llvm 780bf215546Sopenharmony_ci * the backend indexes the info table at info_idx - 1. 781bf215546Sopenharmony_ci */ 782bf215546Sopenharmony_ci b->shader->printf_info_count++; 783bf215546Sopenharmony_ci unsigned info_idx = b->shader->printf_info_count; 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci b->shader->printf_info = reralloc(b->shader, b->shader->printf_info, 786bf215546Sopenharmony_ci nir_printf_info, info_idx); 787bf215546Sopenharmony_ci nir_printf_info *info = &b->shader->printf_info[info_idx - 1]; 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci info->strings = NULL; 790bf215546Sopenharmony_ci info->string_size = 0; 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci vtn_add_printf_string(b, w_src[0], info); 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci info->num_args = num_srcs - 1; 795bf215546Sopenharmony_ci info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args); 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_ci /* Step 2, build an ad-hoc struct type out of the args */ 798bf215546Sopenharmony_ci unsigned field_offset = 0; 799bf215546Sopenharmony_ci struct glsl_struct_field *fields = 800bf215546Sopenharmony_ci rzalloc_array(b, struct glsl_struct_field, num_srcs - 1); 801bf215546Sopenharmony_ci for (unsigned i = 1; i < num_srcs; ++i) { 802bf215546Sopenharmony_ci struct vtn_value *val = vtn_untyped_value(b, w_src[i]); 803bf215546Sopenharmony_ci struct vtn_type *src_type = val->type; 804bf215546Sopenharmony_ci fields[i - 1].type = src_type->type; 805bf215546Sopenharmony_ci fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i); 806bf215546Sopenharmony_ci field_offset = align(field_offset, 4); 807bf215546Sopenharmony_ci fields[i - 1].offset = field_offset; 808bf215546Sopenharmony_ci info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type); 809bf215546Sopenharmony_ci field_offset += glsl_get_cl_size(src_type->type); 810bf215546Sopenharmony_ci } 811bf215546Sopenharmony_ci const struct glsl_type *struct_type = 812bf215546Sopenharmony_ci glsl_struct_type(fields, num_srcs - 1, "printf", true); 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci /* Step 3, create a variable of that type and populate its fields */ 815bf215546Sopenharmony_ci nir_variable *var = nir_local_variable_create(b->nb.impl, struct_type, NULL); 816bf215546Sopenharmony_ci nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var); 817bf215546Sopenharmony_ci size_t fmt_pos = 0; 818bf215546Sopenharmony_ci for (unsigned i = 1; i < num_srcs; ++i) { 819bf215546Sopenharmony_ci nir_deref_instr *field_deref = 820bf215546Sopenharmony_ci nir_build_deref_struct(&b->nb, deref_var, i - 1); 821bf215546Sopenharmony_ci nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def; 822bf215546Sopenharmony_ci /* extract strings */ 823bf215546Sopenharmony_ci fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos); 824bf215546Sopenharmony_ci if (fmt_pos != -1 && info->strings[fmt_pos] == 's') { 825bf215546Sopenharmony_ci unsigned idx = vtn_add_printf_string(b, w_src[i], info); 826bf215546Sopenharmony_ci nir_store_deref(&b->nb, field_deref, 827bf215546Sopenharmony_ci nir_imm_intN_t(&b->nb, idx, field_src->bit_size), 828bf215546Sopenharmony_ci ~0 /* write_mask */); 829bf215546Sopenharmony_ci } else 830bf215546Sopenharmony_ci nir_store_deref(&b->nb, field_deref, field_src, ~0); 831bf215546Sopenharmony_ci } 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci /* Lastly, the actual intrinsic */ 834bf215546Sopenharmony_ci nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx); 835bf215546Sopenharmony_ci nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa); 836bf215546Sopenharmony_ci vtn_push_nir_ssa(b, w_dest[1], ret); 837bf215546Sopenharmony_ci} 838bf215546Sopenharmony_ci 839bf215546Sopenharmony_cistatic nir_ssa_def * 840bf215546Sopenharmony_cihandle_round(struct vtn_builder *b, uint32_t opcode, 841bf215546Sopenharmony_ci unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types, 842bf215546Sopenharmony_ci const struct vtn_type *dest_type) 843bf215546Sopenharmony_ci{ 844bf215546Sopenharmony_ci nir_ssa_def *src = srcs[0]; 845bf215546Sopenharmony_ci nir_builder *nb = &b->nb; 846bf215546Sopenharmony_ci nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size); 847bf215546Sopenharmony_ci nir_ssa_def *truncated = nir_ftrunc(nb, src); 848bf215546Sopenharmony_ci nir_ssa_def *remainder = nir_fsub(nb, src, truncated); 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half), 851bf215546Sopenharmony_ci nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated); 852bf215546Sopenharmony_ci} 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_cistatic nir_ssa_def * 855bf215546Sopenharmony_cihandle_shuffle(struct vtn_builder *b, uint32_t opcode, 856bf215546Sopenharmony_ci unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types, 857bf215546Sopenharmony_ci const struct vtn_type *dest_type) 858bf215546Sopenharmony_ci{ 859bf215546Sopenharmony_ci struct nir_ssa_def *input = srcs[0]; 860bf215546Sopenharmony_ci struct nir_ssa_def *mask = srcs[1]; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci unsigned out_elems = dest_type->length; 863bf215546Sopenharmony_ci nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS]; 864bf215546Sopenharmony_ci unsigned in_elems = input->num_components; 865bf215546Sopenharmony_ci if (mask->bit_size != 32) 866bf215546Sopenharmony_ci mask = nir_u2u32(&b->nb, mask); 867bf215546Sopenharmony_ci mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size)); 868bf215546Sopenharmony_ci for (unsigned i = 0; i < out_elems; i++) 869bf215546Sopenharmony_ci outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i)); 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci return nir_vec(&b->nb, outres, out_elems); 872bf215546Sopenharmony_ci} 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_cistatic nir_ssa_def * 875bf215546Sopenharmony_cihandle_shuffle2(struct vtn_builder *b, uint32_t opcode, 876bf215546Sopenharmony_ci unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types, 877bf215546Sopenharmony_ci const struct vtn_type *dest_type) 878bf215546Sopenharmony_ci{ 879bf215546Sopenharmony_ci struct nir_ssa_def *input0 = srcs[0]; 880bf215546Sopenharmony_ci struct nir_ssa_def *input1 = srcs[1]; 881bf215546Sopenharmony_ci struct nir_ssa_def *mask = srcs[2]; 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci unsigned out_elems = dest_type->length; 884bf215546Sopenharmony_ci nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS]; 885bf215546Sopenharmony_ci unsigned in_elems = input0->num_components; 886bf215546Sopenharmony_ci unsigned total_mask = 2 * in_elems - 1; 887bf215546Sopenharmony_ci unsigned half_mask = in_elems - 1; 888bf215546Sopenharmony_ci if (mask->bit_size != 32) 889bf215546Sopenharmony_ci mask = nir_u2u32(&b->nb, mask); 890bf215546Sopenharmony_ci mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size)); 891bf215546Sopenharmony_ci for (unsigned i = 0; i < out_elems; i++) { 892bf215546Sopenharmony_ci nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i); 893bf215546Sopenharmony_ci nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size)); 894bf215546Sopenharmony_ci nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask); 895bf215546Sopenharmony_ci nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask); 896bf215546Sopenharmony_ci nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size)); 897bf215546Sopenharmony_ci outres[i] = nir_bcsel(&b->nb, sel, val0, val1); 898bf215546Sopenharmony_ci } 899bf215546Sopenharmony_ci return nir_vec(&b->nb, outres, out_elems); 900bf215546Sopenharmony_ci} 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_cibool 903bf215546Sopenharmony_civtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode, 904bf215546Sopenharmony_ci const uint32_t *w, unsigned count) 905bf215546Sopenharmony_ci{ 906bf215546Sopenharmony_ci enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode; 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci switch (cl_opcode) { 909bf215546Sopenharmony_ci case OpenCLstd_Fabs: 910bf215546Sopenharmony_ci case OpenCLstd_SAbs: 911bf215546Sopenharmony_ci case OpenCLstd_UAbs: 912bf215546Sopenharmony_ci case OpenCLstd_SAdd_sat: 913bf215546Sopenharmony_ci case OpenCLstd_UAdd_sat: 914bf215546Sopenharmony_ci case OpenCLstd_Ceil: 915bf215546Sopenharmony_ci case OpenCLstd_Floor: 916bf215546Sopenharmony_ci case OpenCLstd_Fmax: 917bf215546Sopenharmony_ci case OpenCLstd_SHadd: 918bf215546Sopenharmony_ci case OpenCLstd_UHadd: 919bf215546Sopenharmony_ci case OpenCLstd_SMax: 920bf215546Sopenharmony_ci case OpenCLstd_UMax: 921bf215546Sopenharmony_ci case OpenCLstd_Fmin: 922bf215546Sopenharmony_ci case OpenCLstd_SMin: 923bf215546Sopenharmony_ci case OpenCLstd_UMin: 924bf215546Sopenharmony_ci case OpenCLstd_Mix: 925bf215546Sopenharmony_ci case OpenCLstd_Native_cos: 926bf215546Sopenharmony_ci case OpenCLstd_Native_divide: 927bf215546Sopenharmony_ci case OpenCLstd_Native_exp2: 928bf215546Sopenharmony_ci case OpenCLstd_Native_log2: 929bf215546Sopenharmony_ci case OpenCLstd_Native_powr: 930bf215546Sopenharmony_ci case OpenCLstd_Native_recip: 931bf215546Sopenharmony_ci case OpenCLstd_Native_rsqrt: 932bf215546Sopenharmony_ci case OpenCLstd_Native_sin: 933bf215546Sopenharmony_ci case OpenCLstd_Native_sqrt: 934bf215546Sopenharmony_ci case OpenCLstd_SMul_hi: 935bf215546Sopenharmony_ci case OpenCLstd_UMul_hi: 936bf215546Sopenharmony_ci case OpenCLstd_Popcount: 937bf215546Sopenharmony_ci case OpenCLstd_SRhadd: 938bf215546Sopenharmony_ci case OpenCLstd_URhadd: 939bf215546Sopenharmony_ci case OpenCLstd_Rsqrt: 940bf215546Sopenharmony_ci case OpenCLstd_Sign: 941bf215546Sopenharmony_ci case OpenCLstd_Sqrt: 942bf215546Sopenharmony_ci case OpenCLstd_SSub_sat: 943bf215546Sopenharmony_ci case OpenCLstd_USub_sat: 944bf215546Sopenharmony_ci case OpenCLstd_Trunc: 945bf215546Sopenharmony_ci case OpenCLstd_Rint: 946bf215546Sopenharmony_ci case OpenCLstd_Half_divide: 947bf215546Sopenharmony_ci case OpenCLstd_Half_recip: 948bf215546Sopenharmony_ci handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu); 949bf215546Sopenharmony_ci return true; 950bf215546Sopenharmony_ci case OpenCLstd_SAbs_diff: 951bf215546Sopenharmony_ci case OpenCLstd_UAbs_diff: 952bf215546Sopenharmony_ci case OpenCLstd_SMad_hi: 953bf215546Sopenharmony_ci case OpenCLstd_UMad_hi: 954bf215546Sopenharmony_ci case OpenCLstd_SMad24: 955bf215546Sopenharmony_ci case OpenCLstd_UMad24: 956bf215546Sopenharmony_ci case OpenCLstd_SMul24: 957bf215546Sopenharmony_ci case OpenCLstd_UMul24: 958bf215546Sopenharmony_ci case OpenCLstd_Bitselect: 959bf215546Sopenharmony_ci case OpenCLstd_FClamp: 960bf215546Sopenharmony_ci case OpenCLstd_SClamp: 961bf215546Sopenharmony_ci case OpenCLstd_UClamp: 962bf215546Sopenharmony_ci case OpenCLstd_Copysign: 963bf215546Sopenharmony_ci case OpenCLstd_Cross: 964bf215546Sopenharmony_ci case OpenCLstd_Degrees: 965bf215546Sopenharmony_ci case OpenCLstd_Fdim: 966bf215546Sopenharmony_ci case OpenCLstd_Fma: 967bf215546Sopenharmony_ci case OpenCLstd_Distance: 968bf215546Sopenharmony_ci case OpenCLstd_Fast_distance: 969bf215546Sopenharmony_ci case OpenCLstd_Fast_length: 970bf215546Sopenharmony_ci case OpenCLstd_Fast_normalize: 971bf215546Sopenharmony_ci case OpenCLstd_Half_rsqrt: 972bf215546Sopenharmony_ci case OpenCLstd_Half_sqrt: 973bf215546Sopenharmony_ci case OpenCLstd_Length: 974bf215546Sopenharmony_ci case OpenCLstd_Mad: 975bf215546Sopenharmony_ci case OpenCLstd_Maxmag: 976bf215546Sopenharmony_ci case OpenCLstd_Minmag: 977bf215546Sopenharmony_ci case OpenCLstd_Nan: 978bf215546Sopenharmony_ci case OpenCLstd_Nextafter: 979bf215546Sopenharmony_ci case OpenCLstd_Normalize: 980bf215546Sopenharmony_ci case OpenCLstd_Radians: 981bf215546Sopenharmony_ci case OpenCLstd_Rotate: 982bf215546Sopenharmony_ci case OpenCLstd_Select: 983bf215546Sopenharmony_ci case OpenCLstd_Step: 984bf215546Sopenharmony_ci case OpenCLstd_Smoothstep: 985bf215546Sopenharmony_ci case OpenCLstd_S_Upsample: 986bf215546Sopenharmony_ci case OpenCLstd_U_Upsample: 987bf215546Sopenharmony_ci case OpenCLstd_Clz: 988bf215546Sopenharmony_ci case OpenCLstd_Ctz: 989bf215546Sopenharmony_ci case OpenCLstd_Native_exp: 990bf215546Sopenharmony_ci case OpenCLstd_Native_exp10: 991bf215546Sopenharmony_ci case OpenCLstd_Native_log: 992bf215546Sopenharmony_ci case OpenCLstd_Native_log10: 993bf215546Sopenharmony_ci case OpenCLstd_Acos: 994bf215546Sopenharmony_ci case OpenCLstd_Acosh: 995bf215546Sopenharmony_ci case OpenCLstd_Acospi: 996bf215546Sopenharmony_ci case OpenCLstd_Asin: 997bf215546Sopenharmony_ci case OpenCLstd_Asinh: 998bf215546Sopenharmony_ci case OpenCLstd_Asinpi: 999bf215546Sopenharmony_ci case OpenCLstd_Atan: 1000bf215546Sopenharmony_ci case OpenCLstd_Atan2: 1001bf215546Sopenharmony_ci case OpenCLstd_Atanh: 1002bf215546Sopenharmony_ci case OpenCLstd_Atanpi: 1003bf215546Sopenharmony_ci case OpenCLstd_Atan2pi: 1004bf215546Sopenharmony_ci case OpenCLstd_Fract: 1005bf215546Sopenharmony_ci case OpenCLstd_Frexp: 1006bf215546Sopenharmony_ci case OpenCLstd_Exp: 1007bf215546Sopenharmony_ci case OpenCLstd_Exp2: 1008bf215546Sopenharmony_ci case OpenCLstd_Expm1: 1009bf215546Sopenharmony_ci case OpenCLstd_Exp10: 1010bf215546Sopenharmony_ci case OpenCLstd_Fmod: 1011bf215546Sopenharmony_ci case OpenCLstd_Ilogb: 1012bf215546Sopenharmony_ci case OpenCLstd_Log: 1013bf215546Sopenharmony_ci case OpenCLstd_Log2: 1014bf215546Sopenharmony_ci case OpenCLstd_Log10: 1015bf215546Sopenharmony_ci case OpenCLstd_Log1p: 1016bf215546Sopenharmony_ci case OpenCLstd_Logb: 1017bf215546Sopenharmony_ci case OpenCLstd_Ldexp: 1018bf215546Sopenharmony_ci case OpenCLstd_Cos: 1019bf215546Sopenharmony_ci case OpenCLstd_Cosh: 1020bf215546Sopenharmony_ci case OpenCLstd_Cospi: 1021bf215546Sopenharmony_ci case OpenCLstd_Sin: 1022bf215546Sopenharmony_ci case OpenCLstd_Sinh: 1023bf215546Sopenharmony_ci case OpenCLstd_Sinpi: 1024bf215546Sopenharmony_ci case OpenCLstd_Tan: 1025bf215546Sopenharmony_ci case OpenCLstd_Tanh: 1026bf215546Sopenharmony_ci case OpenCLstd_Tanpi: 1027bf215546Sopenharmony_ci case OpenCLstd_Cbrt: 1028bf215546Sopenharmony_ci case OpenCLstd_Erfc: 1029bf215546Sopenharmony_ci case OpenCLstd_Erf: 1030bf215546Sopenharmony_ci case OpenCLstd_Lgamma: 1031bf215546Sopenharmony_ci case OpenCLstd_Lgamma_r: 1032bf215546Sopenharmony_ci case OpenCLstd_Tgamma: 1033bf215546Sopenharmony_ci case OpenCLstd_Pow: 1034bf215546Sopenharmony_ci case OpenCLstd_Powr: 1035bf215546Sopenharmony_ci case OpenCLstd_Pown: 1036bf215546Sopenharmony_ci case OpenCLstd_Rootn: 1037bf215546Sopenharmony_ci case OpenCLstd_Remainder: 1038bf215546Sopenharmony_ci case OpenCLstd_Remquo: 1039bf215546Sopenharmony_ci case OpenCLstd_Hypot: 1040bf215546Sopenharmony_ci case OpenCLstd_Sincos: 1041bf215546Sopenharmony_ci case OpenCLstd_Modf: 1042bf215546Sopenharmony_ci case OpenCLstd_UMad_sat: 1043bf215546Sopenharmony_ci case OpenCLstd_SMad_sat: 1044bf215546Sopenharmony_ci case OpenCLstd_Native_tan: 1045bf215546Sopenharmony_ci case OpenCLstd_Half_cos: 1046bf215546Sopenharmony_ci case OpenCLstd_Half_exp: 1047bf215546Sopenharmony_ci case OpenCLstd_Half_exp2: 1048bf215546Sopenharmony_ci case OpenCLstd_Half_exp10: 1049bf215546Sopenharmony_ci case OpenCLstd_Half_log: 1050bf215546Sopenharmony_ci case OpenCLstd_Half_log2: 1051bf215546Sopenharmony_ci case OpenCLstd_Half_log10: 1052bf215546Sopenharmony_ci case OpenCLstd_Half_powr: 1053bf215546Sopenharmony_ci case OpenCLstd_Half_sin: 1054bf215546Sopenharmony_ci case OpenCLstd_Half_tan: 1055bf215546Sopenharmony_ci handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special); 1056bf215546Sopenharmony_ci return true; 1057bf215546Sopenharmony_ci case OpenCLstd_Vloadn: 1058bf215546Sopenharmony_ci case OpenCLstd_Vload_half: 1059bf215546Sopenharmony_ci case OpenCLstd_Vload_halfn: 1060bf215546Sopenharmony_ci case OpenCLstd_Vloada_halfn: 1061bf215546Sopenharmony_ci vtn_handle_opencl_vload(b, cl_opcode, w, count); 1062bf215546Sopenharmony_ci return true; 1063bf215546Sopenharmony_ci case OpenCLstd_Vstoren: 1064bf215546Sopenharmony_ci case OpenCLstd_Vstore_half: 1065bf215546Sopenharmony_ci case OpenCLstd_Vstore_halfn: 1066bf215546Sopenharmony_ci case OpenCLstd_Vstorea_halfn: 1067bf215546Sopenharmony_ci vtn_handle_opencl_vstore(b, cl_opcode, w, count); 1068bf215546Sopenharmony_ci return true; 1069bf215546Sopenharmony_ci case OpenCLstd_Vstore_half_r: 1070bf215546Sopenharmony_ci case OpenCLstd_Vstore_halfn_r: 1071bf215546Sopenharmony_ci case OpenCLstd_Vstorea_halfn_r: 1072bf215546Sopenharmony_ci vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count); 1073bf215546Sopenharmony_ci return true; 1074bf215546Sopenharmony_ci case OpenCLstd_Shuffle: 1075bf215546Sopenharmony_ci handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle); 1076bf215546Sopenharmony_ci return true; 1077bf215546Sopenharmony_ci case OpenCLstd_Shuffle2: 1078bf215546Sopenharmony_ci handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2); 1079bf215546Sopenharmony_ci return true; 1080bf215546Sopenharmony_ci case OpenCLstd_Round: 1081bf215546Sopenharmony_ci handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round); 1082bf215546Sopenharmony_ci return true; 1083bf215546Sopenharmony_ci case OpenCLstd_Printf: 1084bf215546Sopenharmony_ci handle_printf(b, ext_opcode, w + 5, count - 5, w + 1); 1085bf215546Sopenharmony_ci return true; 1086bf215546Sopenharmony_ci case OpenCLstd_Prefetch: 1087bf215546Sopenharmony_ci /* TODO maybe add a nir instruction for this? */ 1088bf215546Sopenharmony_ci return true; 1089bf215546Sopenharmony_ci default: 1090bf215546Sopenharmony_ci vtn_fail("unhandled opencl opc: %u\n", ext_opcode); 1091bf215546Sopenharmony_ci return false; 1092bf215546Sopenharmony_ci } 1093bf215546Sopenharmony_ci} 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_cibool 1096bf215546Sopenharmony_civtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode, 1097bf215546Sopenharmony_ci const uint32_t *w, unsigned count) 1098bf215546Sopenharmony_ci{ 1099bf215546Sopenharmony_ci switch (opcode) { 1100bf215546Sopenharmony_ci case SpvOpGroupAsyncCopy: 1101bf215546Sopenharmony_ci handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core); 1102bf215546Sopenharmony_ci return true; 1103bf215546Sopenharmony_ci case SpvOpGroupWaitEvents: 1104bf215546Sopenharmony_ci handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core); 1105bf215546Sopenharmony_ci return true; 1106bf215546Sopenharmony_ci default: 1107bf215546Sopenharmony_ci return false; 1108bf215546Sopenharmony_ci } 1109bf215546Sopenharmony_ci return true; 1110bf215546Sopenharmony_ci} 1111