1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Red Hat
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Rob Clark (robdclark@gmail.com)
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "math.h"
28bf215546Sopenharmony_ci#include "nir/nir_builtin_builder.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "util/u_printf.h"
31bf215546Sopenharmony_ci#include "vtn_private.h"
32bf215546Sopenharmony_ci#include "OpenCL.std.h"
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_citypedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,
35bf215546Sopenharmony_ci                                    uint32_t opcode,
36bf215546Sopenharmony_ci                                    unsigned num_srcs, nir_ssa_def **srcs,
37bf215546Sopenharmony_ci                                    struct vtn_type **src_types,
38bf215546Sopenharmony_ci                                    const struct vtn_type *dest_type);
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic int to_llvm_address_space(SpvStorageClass mode)
41bf215546Sopenharmony_ci{
42bf215546Sopenharmony_ci   switch (mode) {
43bf215546Sopenharmony_ci   case SpvStorageClassPrivate:
44bf215546Sopenharmony_ci   case SpvStorageClassFunction: return 0;
45bf215546Sopenharmony_ci   case SpvStorageClassCrossWorkgroup: return 1;
46bf215546Sopenharmony_ci   case SpvStorageClassUniform:
47bf215546Sopenharmony_ci   case SpvStorageClassUniformConstant: return 2;
48bf215546Sopenharmony_ci   case SpvStorageClassWorkgroup: return 3;
49bf215546Sopenharmony_ci   case SpvStorageClassGeneric: return 4;
50bf215546Sopenharmony_ci   default: return -1;
51bf215546Sopenharmony_ci   }
52bf215546Sopenharmony_ci}
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_cistatic void
56bf215546Sopenharmony_civtn_opencl_mangle(const char *in_name,
57bf215546Sopenharmony_ci                  uint32_t const_mask,
58bf215546Sopenharmony_ci                  int ntypes, struct vtn_type **src_types,
59bf215546Sopenharmony_ci                  char **outstring)
60bf215546Sopenharmony_ci{
61bf215546Sopenharmony_ci   char local_name[256] = "";
62bf215546Sopenharmony_ci   char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   for (unsigned i = 0; i < ntypes; ++i) {
65bf215546Sopenharmony_ci      const struct glsl_type *type = src_types[i]->type;
66bf215546Sopenharmony_ci      enum vtn_base_type base_type = src_types[i]->base_type;
67bf215546Sopenharmony_ci      if (src_types[i]->base_type == vtn_base_type_pointer) {
68bf215546Sopenharmony_ci         *(args_str++) = 'P';
69bf215546Sopenharmony_ci         int address_space = to_llvm_address_space(src_types[i]->storage_class);
70bf215546Sopenharmony_ci         if (address_space > 0)
71bf215546Sopenharmony_ci            args_str += sprintf(args_str, "U3AS%d", address_space);
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci         type = src_types[i]->deref->type;
74bf215546Sopenharmony_ci         base_type = src_types[i]->deref->base_type;
75bf215546Sopenharmony_ci      }
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci      if (const_mask & (1 << i))
78bf215546Sopenharmony_ci         *(args_str++) = 'K';
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci      unsigned num_elements = glsl_get_components(type);
81bf215546Sopenharmony_ci      if (num_elements > 1) {
82bf215546Sopenharmony_ci         /* Vectors are not treated as built-ins for mangling, so check for substitution.
83bf215546Sopenharmony_ci          * In theory, we'd need to know which substitution value this is. In practice,
84bf215546Sopenharmony_ci          * the functions we need from libclc only support 1
85bf215546Sopenharmony_ci          */
86bf215546Sopenharmony_ci         bool substitution = false;
87bf215546Sopenharmony_ci         for (unsigned j = 0; j < i; ++j) {
88bf215546Sopenharmony_ci            const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?
89bf215546Sopenharmony_ci               src_types[j]->deref->type : src_types[j]->type;
90bf215546Sopenharmony_ci            if (type == other_type) {
91bf215546Sopenharmony_ci               substitution = true;
92bf215546Sopenharmony_ci               break;
93bf215546Sopenharmony_ci            }
94bf215546Sopenharmony_ci         }
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci         if (substitution) {
97bf215546Sopenharmony_ci            args_str += sprintf(args_str, "S_");
98bf215546Sopenharmony_ci            continue;
99bf215546Sopenharmony_ci         } else
100bf215546Sopenharmony_ci            args_str += sprintf(args_str, "Dv%d_", num_elements);
101bf215546Sopenharmony_ci      }
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci      const char *suffix = NULL;
104bf215546Sopenharmony_ci      switch (base_type) {
105bf215546Sopenharmony_ci      case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;
106bf215546Sopenharmony_ci      case vtn_base_type_event: suffix = "9ocl_event"; break;
107bf215546Sopenharmony_ci      default: {
108bf215546Sopenharmony_ci         const char *primitives[] = {
109bf215546Sopenharmony_ci            [GLSL_TYPE_UINT] = "j",
110bf215546Sopenharmony_ci            [GLSL_TYPE_INT] = "i",
111bf215546Sopenharmony_ci            [GLSL_TYPE_FLOAT] = "f",
112bf215546Sopenharmony_ci            [GLSL_TYPE_FLOAT16] = "Dh",
113bf215546Sopenharmony_ci            [GLSL_TYPE_DOUBLE] = "d",
114bf215546Sopenharmony_ci            [GLSL_TYPE_UINT8] = "h",
115bf215546Sopenharmony_ci            [GLSL_TYPE_INT8] = "c",
116bf215546Sopenharmony_ci            [GLSL_TYPE_UINT16] = "t",
117bf215546Sopenharmony_ci            [GLSL_TYPE_INT16] = "s",
118bf215546Sopenharmony_ci            [GLSL_TYPE_UINT64] = "m",
119bf215546Sopenharmony_ci            [GLSL_TYPE_INT64] = "l",
120bf215546Sopenharmony_ci            [GLSL_TYPE_BOOL] = "b",
121bf215546Sopenharmony_ci            [GLSL_TYPE_ERROR] = NULL,
122bf215546Sopenharmony_ci         };
123bf215546Sopenharmony_ci         enum glsl_base_type glsl_base_type = glsl_get_base_type(type);
124bf215546Sopenharmony_ci         assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);
125bf215546Sopenharmony_ci         suffix = primitives[glsl_base_type];
126bf215546Sopenharmony_ci         break;
127bf215546Sopenharmony_ci      }
128bf215546Sopenharmony_ci      }
129bf215546Sopenharmony_ci      args_str += sprintf(args_str, "%s", suffix);
130bf215546Sopenharmony_ci   }
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci   *outstring = strdup(local_name);
133bf215546Sopenharmony_ci}
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_cistatic nir_function *mangle_and_find(struct vtn_builder *b,
136bf215546Sopenharmony_ci                                     const char *name,
137bf215546Sopenharmony_ci                                     uint32_t const_mask,
138bf215546Sopenharmony_ci                                     uint32_t num_srcs,
139bf215546Sopenharmony_ci                                     struct vtn_type **src_types)
140bf215546Sopenharmony_ci{
141bf215546Sopenharmony_ci   char *mname;
142bf215546Sopenharmony_ci   nir_function *found = NULL;
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci   vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);
145bf215546Sopenharmony_ci   /* try and find in current shader first. */
146bf215546Sopenharmony_ci   nir_foreach_function(funcs, b->shader) {
147bf215546Sopenharmony_ci      if (!strcmp(funcs->name, mname)) {
148bf215546Sopenharmony_ci         found = funcs;
149bf215546Sopenharmony_ci         break;
150bf215546Sopenharmony_ci      }
151bf215546Sopenharmony_ci   }
152bf215546Sopenharmony_ci   /* if not found here find in clc shader and create a decl mirroring it */
153bf215546Sopenharmony_ci   if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {
154bf215546Sopenharmony_ci      nir_foreach_function(funcs, b->options->clc_shader) {
155bf215546Sopenharmony_ci         if (!strcmp(funcs->name, mname)) {
156bf215546Sopenharmony_ci            found = funcs;
157bf215546Sopenharmony_ci            break;
158bf215546Sopenharmony_ci         }
159bf215546Sopenharmony_ci      }
160bf215546Sopenharmony_ci      if (found) {
161bf215546Sopenharmony_ci         nir_function *decl = nir_function_create(b->shader, mname);
162bf215546Sopenharmony_ci         decl->num_params = found->num_params;
163bf215546Sopenharmony_ci         decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);
164bf215546Sopenharmony_ci         for (unsigned i = 0; i < decl->num_params; i++) {
165bf215546Sopenharmony_ci            decl->params[i] = found->params[i];
166bf215546Sopenharmony_ci         }
167bf215546Sopenharmony_ci         found = decl;
168bf215546Sopenharmony_ci      }
169bf215546Sopenharmony_ci   }
170bf215546Sopenharmony_ci   if (!found)
171bf215546Sopenharmony_ci      vtn_fail("Can't find clc function %s\n", mname);
172bf215546Sopenharmony_ci   free(mname);
173bf215546Sopenharmony_ci   return found;
174bf215546Sopenharmony_ci}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_cistatic bool call_mangled_function(struct vtn_builder *b,
177bf215546Sopenharmony_ci                                  const char *name,
178bf215546Sopenharmony_ci                                  uint32_t const_mask,
179bf215546Sopenharmony_ci                                  uint32_t num_srcs,
180bf215546Sopenharmony_ci                                  struct vtn_type **src_types,
181bf215546Sopenharmony_ci                                  const struct vtn_type *dest_type,
182bf215546Sopenharmony_ci                                  nir_ssa_def **srcs,
183bf215546Sopenharmony_ci                                  nir_deref_instr **ret_deref_ptr)
184bf215546Sopenharmony_ci{
185bf215546Sopenharmony_ci   nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);
186bf215546Sopenharmony_ci   if (!found)
187bf215546Sopenharmony_ci      return false;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   nir_call_instr *call = nir_call_instr_create(b->shader, found);
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   nir_deref_instr *ret_deref = NULL;
192bf215546Sopenharmony_ci   uint32_t param_idx = 0;
193bf215546Sopenharmony_ci   if (dest_type) {
194bf215546Sopenharmony_ci      nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,
195bf215546Sopenharmony_ci                                                        glsl_get_bare_type(dest_type->type),
196bf215546Sopenharmony_ci                                                        "return_tmp");
197bf215546Sopenharmony_ci      ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
198bf215546Sopenharmony_ci      call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
199bf215546Sopenharmony_ci   }
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_srcs; i++)
202bf215546Sopenharmony_ci      call->params[param_idx++] = nir_src_for_ssa(srcs[i]);
203bf215546Sopenharmony_ci   nir_builder_instr_insert(&b->nb, &call->instr);
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   *ret_deref_ptr = ret_deref;
206bf215546Sopenharmony_ci   return true;
207bf215546Sopenharmony_ci}
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_cistatic void
210bf215546Sopenharmony_cihandle_instr(struct vtn_builder *b, uint32_t opcode,
211bf215546Sopenharmony_ci             const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)
212bf215546Sopenharmony_ci{
213bf215546Sopenharmony_ci   struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   nir_ssa_def *srcs[5] = { NULL };
216bf215546Sopenharmony_ci   struct vtn_type *src_types[5] = { NULL };
217bf215546Sopenharmony_ci   vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
218bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_srcs; i++) {
219bf215546Sopenharmony_ci      struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
220bf215546Sopenharmony_ci      struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);
221bf215546Sopenharmony_ci      srcs[i] = ssa->def;
222bf215546Sopenharmony_ci      src_types[i] = val->type;
223bf215546Sopenharmony_ci   }
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);
226bf215546Sopenharmony_ci   if (result) {
227bf215546Sopenharmony_ci      vtn_push_nir_ssa(b, w_dest[1], result);
228bf215546Sopenharmony_ci   } else {
229bf215546Sopenharmony_ci      vtn_assert(dest_type == NULL);
230bf215546Sopenharmony_ci   }
231bf215546Sopenharmony_ci}
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_cistatic nir_op
234bf215546Sopenharmony_cinir_alu_op_for_opencl_opcode(struct vtn_builder *b,
235bf215546Sopenharmony_ci                             enum OpenCLstd_Entrypoints opcode)
236bf215546Sopenharmony_ci{
237bf215546Sopenharmony_ci   switch (opcode) {
238bf215546Sopenharmony_ci   case OpenCLstd_Fabs: return nir_op_fabs;
239bf215546Sopenharmony_ci   case OpenCLstd_SAbs: return nir_op_iabs;
240bf215546Sopenharmony_ci   case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;
241bf215546Sopenharmony_ci   case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;
242bf215546Sopenharmony_ci   case OpenCLstd_Ceil: return nir_op_fceil;
243bf215546Sopenharmony_ci   case OpenCLstd_Floor: return nir_op_ffloor;
244bf215546Sopenharmony_ci   case OpenCLstd_SHadd: return nir_op_ihadd;
245bf215546Sopenharmony_ci   case OpenCLstd_UHadd: return nir_op_uhadd;
246bf215546Sopenharmony_ci   case OpenCLstd_Fmax: return nir_op_fmax;
247bf215546Sopenharmony_ci   case OpenCLstd_SMax: return nir_op_imax;
248bf215546Sopenharmony_ci   case OpenCLstd_UMax: return nir_op_umax;
249bf215546Sopenharmony_ci   case OpenCLstd_Fmin: return nir_op_fmin;
250bf215546Sopenharmony_ci   case OpenCLstd_SMin: return nir_op_imin;
251bf215546Sopenharmony_ci   case OpenCLstd_UMin: return nir_op_umin;
252bf215546Sopenharmony_ci   case OpenCLstd_Mix: return nir_op_flrp;
253bf215546Sopenharmony_ci   case OpenCLstd_Native_cos: return nir_op_fcos;
254bf215546Sopenharmony_ci   case OpenCLstd_Native_divide: return nir_op_fdiv;
255bf215546Sopenharmony_ci   case OpenCLstd_Native_exp2: return nir_op_fexp2;
256bf215546Sopenharmony_ci   case OpenCLstd_Native_log2: return nir_op_flog2;
257bf215546Sopenharmony_ci   case OpenCLstd_Native_powr: return nir_op_fpow;
258bf215546Sopenharmony_ci   case OpenCLstd_Native_recip: return nir_op_frcp;
259bf215546Sopenharmony_ci   case OpenCLstd_Native_rsqrt: return nir_op_frsq;
260bf215546Sopenharmony_ci   case OpenCLstd_Native_sin: return nir_op_fsin;
261bf215546Sopenharmony_ci   case OpenCLstd_Native_sqrt: return nir_op_fsqrt;
262bf215546Sopenharmony_ci   case OpenCLstd_SMul_hi: return nir_op_imul_high;
263bf215546Sopenharmony_ci   case OpenCLstd_UMul_hi: return nir_op_umul_high;
264bf215546Sopenharmony_ci   case OpenCLstd_Popcount: return nir_op_bit_count;
265bf215546Sopenharmony_ci   case OpenCLstd_SRhadd: return nir_op_irhadd;
266bf215546Sopenharmony_ci   case OpenCLstd_URhadd: return nir_op_urhadd;
267bf215546Sopenharmony_ci   case OpenCLstd_Rsqrt: return nir_op_frsq;
268bf215546Sopenharmony_ci   case OpenCLstd_Sign: return nir_op_fsign;
269bf215546Sopenharmony_ci   case OpenCLstd_Sqrt: return nir_op_fsqrt;
270bf215546Sopenharmony_ci   case OpenCLstd_SSub_sat: return nir_op_isub_sat;
271bf215546Sopenharmony_ci   case OpenCLstd_USub_sat: return nir_op_usub_sat;
272bf215546Sopenharmony_ci   case OpenCLstd_Trunc: return nir_op_ftrunc;
273bf215546Sopenharmony_ci   case OpenCLstd_Rint: return nir_op_fround_even;
274bf215546Sopenharmony_ci   case OpenCLstd_Half_divide: return nir_op_fdiv;
275bf215546Sopenharmony_ci   case OpenCLstd_Half_recip: return nir_op_frcp;
276bf215546Sopenharmony_ci   /* uhm... */
277bf215546Sopenharmony_ci   case OpenCLstd_UAbs: return nir_op_mov;
278bf215546Sopenharmony_ci   default:
279bf215546Sopenharmony_ci      vtn_fail("No NIR equivalent");
280bf215546Sopenharmony_ci   }
281bf215546Sopenharmony_ci}
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_cistatic nir_ssa_def *
284bf215546Sopenharmony_cihandle_alu(struct vtn_builder *b, uint32_t opcode,
285bf215546Sopenharmony_ci           unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
286bf215546Sopenharmony_ci           const struct vtn_type *dest_type)
287bf215546Sopenharmony_ci{
288bf215546Sopenharmony_ci   nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),
289bf215546Sopenharmony_ci                                    srcs[0], srcs[1], srcs[2], NULL);
290bf215546Sopenharmony_ci   if (opcode == OpenCLstd_Popcount)
291bf215546Sopenharmony_ci      ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));
292bf215546Sopenharmony_ci   return ret;
293bf215546Sopenharmony_ci}
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci#define REMAP(op, str) [OpenCLstd_##op] = { str }
296bf215546Sopenharmony_cistatic const struct {
297bf215546Sopenharmony_ci   const char *fn;
298bf215546Sopenharmony_ci} remap_table[] = {
299bf215546Sopenharmony_ci   REMAP(Distance, "distance"),
300bf215546Sopenharmony_ci   REMAP(Fast_distance, "fast_distance"),
301bf215546Sopenharmony_ci   REMAP(Fast_length, "fast_length"),
302bf215546Sopenharmony_ci   REMAP(Fast_normalize, "fast_normalize"),
303bf215546Sopenharmony_ci   REMAP(Half_rsqrt, "half_rsqrt"),
304bf215546Sopenharmony_ci   REMAP(Half_sqrt, "half_sqrt"),
305bf215546Sopenharmony_ci   REMAP(Length, "length"),
306bf215546Sopenharmony_ci   REMAP(Normalize, "normalize"),
307bf215546Sopenharmony_ci   REMAP(Degrees, "degrees"),
308bf215546Sopenharmony_ci   REMAP(Radians, "radians"),
309bf215546Sopenharmony_ci   REMAP(Rotate, "rotate"),
310bf215546Sopenharmony_ci   REMAP(Smoothstep, "smoothstep"),
311bf215546Sopenharmony_ci   REMAP(Step, "step"),
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   REMAP(Pow, "pow"),
314bf215546Sopenharmony_ci   REMAP(Pown, "pown"),
315bf215546Sopenharmony_ci   REMAP(Powr, "powr"),
316bf215546Sopenharmony_ci   REMAP(Rootn, "rootn"),
317bf215546Sopenharmony_ci   REMAP(Modf, "modf"),
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   REMAP(Acos, "acos"),
320bf215546Sopenharmony_ci   REMAP(Acosh, "acosh"),
321bf215546Sopenharmony_ci   REMAP(Acospi, "acospi"),
322bf215546Sopenharmony_ci   REMAP(Asin, "asin"),
323bf215546Sopenharmony_ci   REMAP(Asinh, "asinh"),
324bf215546Sopenharmony_ci   REMAP(Asinpi, "asinpi"),
325bf215546Sopenharmony_ci   REMAP(Atan, "atan"),
326bf215546Sopenharmony_ci   REMAP(Atan2, "atan2"),
327bf215546Sopenharmony_ci   REMAP(Atanh, "atanh"),
328bf215546Sopenharmony_ci   REMAP(Atanpi, "atanpi"),
329bf215546Sopenharmony_ci   REMAP(Atan2pi, "atan2pi"),
330bf215546Sopenharmony_ci   REMAP(Cos, "cos"),
331bf215546Sopenharmony_ci   REMAP(Cosh, "cosh"),
332bf215546Sopenharmony_ci   REMAP(Cospi, "cospi"),
333bf215546Sopenharmony_ci   REMAP(Sin, "sin"),
334bf215546Sopenharmony_ci   REMAP(Sinh, "sinh"),
335bf215546Sopenharmony_ci   REMAP(Sinpi, "sinpi"),
336bf215546Sopenharmony_ci   REMAP(Tan, "tan"),
337bf215546Sopenharmony_ci   REMAP(Tanh, "tanh"),
338bf215546Sopenharmony_ci   REMAP(Tanpi, "tanpi"),
339bf215546Sopenharmony_ci   REMAP(Sincos, "sincos"),
340bf215546Sopenharmony_ci   REMAP(Fract, "fract"),
341bf215546Sopenharmony_ci   REMAP(Frexp, "frexp"),
342bf215546Sopenharmony_ci   REMAP(Fma, "fma"),
343bf215546Sopenharmony_ci   REMAP(Fmod, "fmod"),
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci   REMAP(Half_cos, "cos"),
346bf215546Sopenharmony_ci   REMAP(Half_exp, "exp"),
347bf215546Sopenharmony_ci   REMAP(Half_exp2, "exp2"),
348bf215546Sopenharmony_ci   REMAP(Half_exp10, "exp10"),
349bf215546Sopenharmony_ci   REMAP(Half_log, "log"),
350bf215546Sopenharmony_ci   REMAP(Half_log2, "log2"),
351bf215546Sopenharmony_ci   REMAP(Half_log10, "log10"),
352bf215546Sopenharmony_ci   REMAP(Half_powr, "powr"),
353bf215546Sopenharmony_ci   REMAP(Half_sin, "sin"),
354bf215546Sopenharmony_ci   REMAP(Half_tan, "tan"),
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   REMAP(Remainder, "remainder"),
357bf215546Sopenharmony_ci   REMAP(Remquo, "remquo"),
358bf215546Sopenharmony_ci   REMAP(Hypot, "hypot"),
359bf215546Sopenharmony_ci   REMAP(Exp, "exp"),
360bf215546Sopenharmony_ci   REMAP(Exp2, "exp2"),
361bf215546Sopenharmony_ci   REMAP(Exp10, "exp10"),
362bf215546Sopenharmony_ci   REMAP(Expm1, "expm1"),
363bf215546Sopenharmony_ci   REMAP(Ldexp, "ldexp"),
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci   REMAP(Ilogb, "ilogb"),
366bf215546Sopenharmony_ci   REMAP(Log, "log"),
367bf215546Sopenharmony_ci   REMAP(Log2, "log2"),
368bf215546Sopenharmony_ci   REMAP(Log10, "log10"),
369bf215546Sopenharmony_ci   REMAP(Log1p, "log1p"),
370bf215546Sopenharmony_ci   REMAP(Logb, "logb"),
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci   REMAP(Cbrt, "cbrt"),
373bf215546Sopenharmony_ci   REMAP(Erfc, "erfc"),
374bf215546Sopenharmony_ci   REMAP(Erf, "erf"),
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci   REMAP(Lgamma, "lgamma"),
377bf215546Sopenharmony_ci   REMAP(Lgamma_r, "lgamma_r"),
378bf215546Sopenharmony_ci   REMAP(Tgamma, "tgamma"),
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci   REMAP(UMad_sat, "mad_sat"),
381bf215546Sopenharmony_ci   REMAP(SMad_sat, "mad_sat"),
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   REMAP(Shuffle, "shuffle"),
384bf215546Sopenharmony_ci   REMAP(Shuffle2, "shuffle2"),
385bf215546Sopenharmony_ci};
386bf215546Sopenharmony_ci#undef REMAP
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_cistatic const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)
389bf215546Sopenharmony_ci{
390bf215546Sopenharmony_ci   if (opcode >= (sizeof(remap_table) / sizeof(const char *)))
391bf215546Sopenharmony_ci      return NULL;
392bf215546Sopenharmony_ci   return remap_table[opcode].fn;
393bf215546Sopenharmony_ci}
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_cistatic struct vtn_type *
396bf215546Sopenharmony_ciget_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)
397bf215546Sopenharmony_ci{
398bf215546Sopenharmony_ci   struct vtn_type *ret = rzalloc(b, struct vtn_type);
399bf215546Sopenharmony_ci   assert(glsl_type_is_vector_or_scalar(type));
400bf215546Sopenharmony_ci   ret->type = type;
401bf215546Sopenharmony_ci   ret->length = glsl_get_vector_elements(type);
402bf215546Sopenharmony_ci   ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;
403bf215546Sopenharmony_ci   return ret;
404bf215546Sopenharmony_ci}
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_cistatic struct vtn_type *
407bf215546Sopenharmony_ciget_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)
408bf215546Sopenharmony_ci{
409bf215546Sopenharmony_ci   struct vtn_type *ret = rzalloc(b, struct vtn_type);
410bf215546Sopenharmony_ci   ret->type = nir_address_format_to_glsl_type(
411bf215546Sopenharmony_ci            vtn_mode_to_address_format(
412bf215546Sopenharmony_ci               b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));
413bf215546Sopenharmony_ci   ret->base_type = vtn_base_type_pointer;
414bf215546Sopenharmony_ci   ret->storage_class = storage_class;
415bf215546Sopenharmony_ci   ret->deref = t;
416bf215546Sopenharmony_ci   return ret;
417bf215546Sopenharmony_ci}
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_cistatic struct vtn_type *
420bf215546Sopenharmony_ciget_signed_type(struct vtn_builder *b, struct vtn_type *t)
421bf215546Sopenharmony_ci{
422bf215546Sopenharmony_ci   if (t->base_type == vtn_base_type_pointer) {
423bf215546Sopenharmony_ci      return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);
424bf215546Sopenharmony_ci   }
425bf215546Sopenharmony_ci   return get_vtn_type_for_glsl_type(
426bf215546Sopenharmony_ci      b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),
427bf215546Sopenharmony_ci                          glsl_get_vector_elements(t->type)));
428bf215546Sopenharmony_ci}
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_cistatic nir_ssa_def *
431bf215546Sopenharmony_cihandle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
432bf215546Sopenharmony_ci              int num_srcs,
433bf215546Sopenharmony_ci              nir_ssa_def **srcs,
434bf215546Sopenharmony_ci              struct vtn_type **src_types,
435bf215546Sopenharmony_ci              const struct vtn_type *dest_type)
436bf215546Sopenharmony_ci{
437bf215546Sopenharmony_ci   const char *name = remap_clc_opcode(opcode);
438bf215546Sopenharmony_ci   if (!name)
439bf215546Sopenharmony_ci       return NULL;
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci   /* Some functions which take params end up with uint (or pointer-to-uint) being passed,
442bf215546Sopenharmony_ci    * which doesn't mangle correctly when the function expects int or pointer-to-int.
443bf215546Sopenharmony_ci    * See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers
444bf215546Sopenharmony_ci    */
445bf215546Sopenharmony_ci   int signed_param = -1;
446bf215546Sopenharmony_ci   switch (opcode) {
447bf215546Sopenharmony_ci   case OpenCLstd_Frexp:
448bf215546Sopenharmony_ci   case OpenCLstd_Lgamma_r:
449bf215546Sopenharmony_ci   case OpenCLstd_Pown:
450bf215546Sopenharmony_ci   case OpenCLstd_Rootn:
451bf215546Sopenharmony_ci   case OpenCLstd_Ldexp:
452bf215546Sopenharmony_ci      signed_param = 1;
453bf215546Sopenharmony_ci      break;
454bf215546Sopenharmony_ci   case OpenCLstd_Remquo:
455bf215546Sopenharmony_ci      signed_param = 2;
456bf215546Sopenharmony_ci      break;
457bf215546Sopenharmony_ci   case OpenCLstd_SMad_sat: {
458bf215546Sopenharmony_ci      /* All parameters need to be converted to signed */
459bf215546Sopenharmony_ci      src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);
460bf215546Sopenharmony_ci      break;
461bf215546Sopenharmony_ci   }
462bf215546Sopenharmony_ci   default: break;
463bf215546Sopenharmony_ci   }
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   if (signed_param >= 0) {
466bf215546Sopenharmony_ci      src_types[signed_param] = get_signed_type(b, src_types[signed_param]);
467bf215546Sopenharmony_ci   }
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci   nir_deref_instr *ret_deref = NULL;
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci   if (!call_mangled_function(b, name, 0, num_srcs, src_types,
472bf215546Sopenharmony_ci                              dest_type, srcs, &ret_deref))
473bf215546Sopenharmony_ci      return NULL;
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci   return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
476bf215546Sopenharmony_ci}
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_cistatic nir_ssa_def *
479bf215546Sopenharmony_cihandle_special(struct vtn_builder *b, uint32_t opcode,
480bf215546Sopenharmony_ci               unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
481bf215546Sopenharmony_ci               const struct vtn_type *dest_type)
482bf215546Sopenharmony_ci{
483bf215546Sopenharmony_ci   nir_builder *nb = &b->nb;
484bf215546Sopenharmony_ci   enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci   switch (cl_opcode) {
487bf215546Sopenharmony_ci   case OpenCLstd_SAbs_diff:
488bf215546Sopenharmony_ci     /* these works easier in direct NIR */
489bf215546Sopenharmony_ci      return nir_iabs_diff(nb, srcs[0], srcs[1]);
490bf215546Sopenharmony_ci   case OpenCLstd_UAbs_diff:
491bf215546Sopenharmony_ci      return nir_uabs_diff(nb, srcs[0], srcs[1]);
492bf215546Sopenharmony_ci   case OpenCLstd_Bitselect:
493bf215546Sopenharmony_ci      return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
494bf215546Sopenharmony_ci   case OpenCLstd_SMad_hi:
495bf215546Sopenharmony_ci      return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);
496bf215546Sopenharmony_ci   case OpenCLstd_UMad_hi:
497bf215546Sopenharmony_ci      return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);
498bf215546Sopenharmony_ci   case OpenCLstd_SMul24:
499bf215546Sopenharmony_ci      return nir_imul24_relaxed(nb, srcs[0], srcs[1]);
500bf215546Sopenharmony_ci   case OpenCLstd_UMul24:
501bf215546Sopenharmony_ci      return nir_umul24_relaxed(nb, srcs[0], srcs[1]);
502bf215546Sopenharmony_ci   case OpenCLstd_SMad24:
503bf215546Sopenharmony_ci      return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);
504bf215546Sopenharmony_ci   case OpenCLstd_UMad24:
505bf215546Sopenharmony_ci      return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);
506bf215546Sopenharmony_ci   case OpenCLstd_FClamp:
507bf215546Sopenharmony_ci      return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
508bf215546Sopenharmony_ci   case OpenCLstd_SClamp:
509bf215546Sopenharmony_ci      return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
510bf215546Sopenharmony_ci   case OpenCLstd_UClamp:
511bf215546Sopenharmony_ci      return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
512bf215546Sopenharmony_ci   case OpenCLstd_Copysign:
513bf215546Sopenharmony_ci      return nir_copysign(nb, srcs[0], srcs[1]);
514bf215546Sopenharmony_ci   case OpenCLstd_Cross:
515bf215546Sopenharmony_ci      if (dest_type->length == 4)
516bf215546Sopenharmony_ci         return nir_cross4(nb, srcs[0], srcs[1]);
517bf215546Sopenharmony_ci      return nir_cross3(nb, srcs[0], srcs[1]);
518bf215546Sopenharmony_ci   case OpenCLstd_Fdim:
519bf215546Sopenharmony_ci      return nir_fdim(nb, srcs[0], srcs[1]);
520bf215546Sopenharmony_ci   case OpenCLstd_Fmod:
521bf215546Sopenharmony_ci      if (nb->shader->options->lower_fmod)
522bf215546Sopenharmony_ci         break;
523bf215546Sopenharmony_ci      return nir_fmod(nb, srcs[0], srcs[1]);
524bf215546Sopenharmony_ci   case OpenCLstd_Mad:
525bf215546Sopenharmony_ci      return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
526bf215546Sopenharmony_ci   case OpenCLstd_Maxmag:
527bf215546Sopenharmony_ci      return nir_maxmag(nb, srcs[0], srcs[1]);
528bf215546Sopenharmony_ci   case OpenCLstd_Minmag:
529bf215546Sopenharmony_ci      return nir_minmag(nb, srcs[0], srcs[1]);
530bf215546Sopenharmony_ci   case OpenCLstd_Nan:
531bf215546Sopenharmony_ci      return nir_nan(nb, srcs[0]);
532bf215546Sopenharmony_ci   case OpenCLstd_Nextafter:
533bf215546Sopenharmony_ci      return nir_nextafter(nb, srcs[0], srcs[1]);
534bf215546Sopenharmony_ci   case OpenCLstd_Normalize:
535bf215546Sopenharmony_ci      return nir_normalize(nb, srcs[0]);
536bf215546Sopenharmony_ci   case OpenCLstd_Clz:
537bf215546Sopenharmony_ci      return nir_clz_u(nb, srcs[0]);
538bf215546Sopenharmony_ci   case OpenCLstd_Ctz:
539bf215546Sopenharmony_ci      return nir_ctz_u(nb, srcs[0]);
540bf215546Sopenharmony_ci   case OpenCLstd_Select:
541bf215546Sopenharmony_ci      return nir_select(nb, srcs[0], srcs[1], srcs[2]);
542bf215546Sopenharmony_ci   case OpenCLstd_S_Upsample:
543bf215546Sopenharmony_ci   case OpenCLstd_U_Upsample:
544bf215546Sopenharmony_ci      /* SPIR-V and CL have different defs for upsample, just implement in nir */
545bf215546Sopenharmony_ci      return nir_upsample(nb, srcs[0], srcs[1]);
546bf215546Sopenharmony_ci   case OpenCLstd_Native_exp:
547bf215546Sopenharmony_ci      return nir_fexp(nb, srcs[0]);
548bf215546Sopenharmony_ci   case OpenCLstd_Native_exp10:
549bf215546Sopenharmony_ci      return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));
550bf215546Sopenharmony_ci   case OpenCLstd_Native_log:
551bf215546Sopenharmony_ci      return nir_flog(nb, srcs[0]);
552bf215546Sopenharmony_ci   case OpenCLstd_Native_log10:
553bf215546Sopenharmony_ci      return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));
554bf215546Sopenharmony_ci   case OpenCLstd_Native_tan:
555bf215546Sopenharmony_ci      return nir_ftan(nb, srcs[0]);
556bf215546Sopenharmony_ci   case OpenCLstd_Ldexp:
557bf215546Sopenharmony_ci      if (nb->shader->options->lower_ldexp)
558bf215546Sopenharmony_ci         break;
559bf215546Sopenharmony_ci      return nir_ldexp(nb, srcs[0], srcs[1]);
560bf215546Sopenharmony_ci   case OpenCLstd_Fma:
561bf215546Sopenharmony_ci      /* FIXME: the software implementation only supports fp32 for now. */
562bf215546Sopenharmony_ci      if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)
563bf215546Sopenharmony_ci         break;
564bf215546Sopenharmony_ci      return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
565bf215546Sopenharmony_ci   default:
566bf215546Sopenharmony_ci      break;
567bf215546Sopenharmony_ci   }
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci   nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);
570bf215546Sopenharmony_ci   if (!ret)
571bf215546Sopenharmony_ci      vtn_fail("No NIR equivalent");
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_ci   return ret;
574bf215546Sopenharmony_ci}
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_cistatic nir_ssa_def *
577bf215546Sopenharmony_cihandle_core(struct vtn_builder *b, uint32_t opcode,
578bf215546Sopenharmony_ci            unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
579bf215546Sopenharmony_ci            const struct vtn_type *dest_type)
580bf215546Sopenharmony_ci{
581bf215546Sopenharmony_ci   nir_deref_instr *ret_deref = NULL;
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci   switch ((SpvOp)opcode) {
584bf215546Sopenharmony_ci   case SpvOpGroupAsyncCopy: {
585bf215546Sopenharmony_ci      /* Libclc doesn't include 3-component overloads of the async copy functions.
586bf215546Sopenharmony_ci       * However, the CLC spec says:
587bf215546Sopenharmony_ci       * async_work_group_copy and async_work_group_strided_copy for 3-component vector types
588bf215546Sopenharmony_ci       * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
589bf215546Sopenharmony_ci       * vector types
590bf215546Sopenharmony_ci       */
591bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_srcs; ++i) {
592bf215546Sopenharmony_ci         if (src_types[i]->base_type == vtn_base_type_pointer &&
593bf215546Sopenharmony_ci             src_types[i]->deref->base_type == vtn_base_type_vector &&
594bf215546Sopenharmony_ci             src_types[i]->deref->length == 3) {
595bf215546Sopenharmony_ci            src_types[i] =
596bf215546Sopenharmony_ci               get_pointer_type(b,
597bf215546Sopenharmony_ci                                get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
598bf215546Sopenharmony_ci                                src_types[i]->storage_class);
599bf215546Sopenharmony_ci         }
600bf215546Sopenharmony_ci      }
601bf215546Sopenharmony_ci      if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
602bf215546Sopenharmony_ci         return NULL;
603bf215546Sopenharmony_ci      break;
604bf215546Sopenharmony_ci   }
605bf215546Sopenharmony_ci   case SpvOpGroupWaitEvents: {
606bf215546Sopenharmony_ci      src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
607bf215546Sopenharmony_ci      if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
608bf215546Sopenharmony_ci         return NULL;
609bf215546Sopenharmony_ci      break;
610bf215546Sopenharmony_ci   }
611bf215546Sopenharmony_ci   default:
612bf215546Sopenharmony_ci      return NULL;
613bf215546Sopenharmony_ci   }
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_ci   return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
616bf215546Sopenharmony_ci}
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_cistatic void
620bf215546Sopenharmony_ci_handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
621bf215546Sopenharmony_ci                     const uint32_t *w, unsigned count, bool load,
622bf215546Sopenharmony_ci                     bool vec_aligned, nir_rounding_mode rounding)
623bf215546Sopenharmony_ci{
624bf215546Sopenharmony_ci   struct vtn_type *type;
625bf215546Sopenharmony_ci   if (load)
626bf215546Sopenharmony_ci      type = vtn_get_type(b, w[1]);
627bf215546Sopenharmony_ci   else
628bf215546Sopenharmony_ci      type = vtn_get_value_type(b, w[5]);
629bf215546Sopenharmony_ci   unsigned a = load ? 0 : 1;
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_ci   enum glsl_base_type base_type = glsl_get_base_type(type->type);
632bf215546Sopenharmony_ci   unsigned components = glsl_get_vector_elements(type->type);
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci   nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);
635bf215546Sopenharmony_ci   struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
636bf215546Sopenharmony_ci
637bf215546Sopenharmony_ci   struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];
638bf215546Sopenharmony_ci   nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci   nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,
641bf215546Sopenharmony_ci      (vec_aligned && components == 3) ? 4 : components);
642bf215546Sopenharmony_ci   nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
643bf215546Sopenharmony_ci
644bf215546Sopenharmony_ci   unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :
645bf215546Sopenharmony_ci                                      glsl_get_bit_size(type->type) / 8;
646bf215546Sopenharmony_ci   enum glsl_base_type ptr_base_type =
647bf215546Sopenharmony_ci      glsl_get_base_type(p->pointer->type->type);
648bf215546Sopenharmony_ci   if (base_type != ptr_base_type) {
649bf215546Sopenharmony_ci      vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||
650bf215546Sopenharmony_ci                  (base_type != GLSL_TYPE_FLOAT &&
651bf215546Sopenharmony_ci                   base_type != GLSL_TYPE_DOUBLE),
652bf215546Sopenharmony_ci                  "vload/vstore cannot do type conversion. "
653bf215546Sopenharmony_ci                  "vload/vstore_half can only convert from half to other "
654bf215546Sopenharmony_ci                  "floating-point types.");
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci      /* Above-computed alignment was for floats/doubles, not halves */
657bf215546Sopenharmony_ci      alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);
658bf215546Sopenharmony_ci   }
659bf215546Sopenharmony_ci
660bf215546Sopenharmony_ci   deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci   for (int i = 0; i < components; i++) {
663bf215546Sopenharmony_ci      nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);
664bf215546Sopenharmony_ci      nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_ci      if (load) {
667bf215546Sopenharmony_ci         comps[i] = vtn_local_load(b, arr_deref, p->type->access);
668bf215546Sopenharmony_ci         ncomps[i] = comps[i]->def;
669bf215546Sopenharmony_ci         if (base_type != ptr_base_type) {
670bf215546Sopenharmony_ci            assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
671bf215546Sopenharmony_ci                   (base_type == GLSL_TYPE_FLOAT ||
672bf215546Sopenharmony_ci                    base_type == GLSL_TYPE_DOUBLE));
673bf215546Sopenharmony_ci            ncomps[i] = nir_f2fN(&b->nb, ncomps[i],
674bf215546Sopenharmony_ci                                 glsl_base_type_get_bit_size(base_type));
675bf215546Sopenharmony_ci         }
676bf215546Sopenharmony_ci      } else {
677bf215546Sopenharmony_ci         struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
678bf215546Sopenharmony_ci         struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
679bf215546Sopenharmony_ci         ssa->def = nir_channel(&b->nb, val->def, i);
680bf215546Sopenharmony_ci         if (base_type != ptr_base_type) {
681bf215546Sopenharmony_ci            assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
682bf215546Sopenharmony_ci                   (base_type == GLSL_TYPE_FLOAT ||
683bf215546Sopenharmony_ci                    base_type == GLSL_TYPE_DOUBLE));
684bf215546Sopenharmony_ci            if (rounding == nir_rounding_mode_undef) {
685bf215546Sopenharmony_ci               ssa->def = nir_f2f16(&b->nb, ssa->def);
686bf215546Sopenharmony_ci            } else {
687bf215546Sopenharmony_ci               ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,
688bf215546Sopenharmony_ci                                                nir_type_float | ssa->def->bit_size,
689bf215546Sopenharmony_ci                                                nir_type_float16,
690bf215546Sopenharmony_ci                                                rounding, false);
691bf215546Sopenharmony_ci            }
692bf215546Sopenharmony_ci         }
693bf215546Sopenharmony_ci         vtn_local_store(b, ssa, arr_deref, p->type->access);
694bf215546Sopenharmony_ci      }
695bf215546Sopenharmony_ci   }
696bf215546Sopenharmony_ci   if (load) {
697bf215546Sopenharmony_ci      vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));
698bf215546Sopenharmony_ci   }
699bf215546Sopenharmony_ci}
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_cistatic void
702bf215546Sopenharmony_civtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
703bf215546Sopenharmony_ci                        const uint32_t *w, unsigned count)
704bf215546Sopenharmony_ci{
705bf215546Sopenharmony_ci   _handle_v_load_store(b, opcode, w, count, true,
706bf215546Sopenharmony_ci                        opcode == OpenCLstd_Vloada_halfn,
707bf215546Sopenharmony_ci                        nir_rounding_mode_undef);
708bf215546Sopenharmony_ci}
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_cistatic void
711bf215546Sopenharmony_civtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
712bf215546Sopenharmony_ci                         const uint32_t *w, unsigned count)
713bf215546Sopenharmony_ci{
714bf215546Sopenharmony_ci   _handle_v_load_store(b, opcode, w, count, false,
715bf215546Sopenharmony_ci                        opcode == OpenCLstd_Vstorea_halfn,
716bf215546Sopenharmony_ci                        nir_rounding_mode_undef);
717bf215546Sopenharmony_ci}
718bf215546Sopenharmony_ci
719bf215546Sopenharmony_cistatic void
720bf215546Sopenharmony_civtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
721bf215546Sopenharmony_ci                                const uint32_t *w, unsigned count)
722bf215546Sopenharmony_ci{
723bf215546Sopenharmony_ci   _handle_v_load_store(b, opcode, w, count, false,
724bf215546Sopenharmony_ci                        opcode == OpenCLstd_Vstorea_halfn_r,
725bf215546Sopenharmony_ci                        vtn_rounding_mode_to_nir(b, w[8]));
726bf215546Sopenharmony_ci}
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_cistatic unsigned
729bf215546Sopenharmony_civtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)
730bf215546Sopenharmony_ci{
731bf215546Sopenharmony_ci   nir_deref_instr *deref = vtn_nir_deref(b, id);
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci   while (deref && deref->deref_type != nir_deref_type_var)
734bf215546Sopenharmony_ci      deref = nir_deref_instr_parent(deref);
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci   vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),
737bf215546Sopenharmony_ci               "Printf string argument must be a pointer to a constant variable");
738bf215546Sopenharmony_ci   vtn_fail_if(deref->var->constant_initializer == NULL,
739bf215546Sopenharmony_ci               "Printf string argument must have an initializer");
740bf215546Sopenharmony_ci   vtn_fail_if(!glsl_type_is_array(deref->var->type),
741bf215546Sopenharmony_ci               "Printf string must be an char array");
742bf215546Sopenharmony_ci   const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);
743bf215546Sopenharmony_ci   vtn_fail_if(char_type != glsl_uint8_t_type() &&
744bf215546Sopenharmony_ci               char_type != glsl_int8_t_type(),
745bf215546Sopenharmony_ci               "Printf string must be an char array");
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci   nir_constant *c = deref->var->constant_initializer;
748bf215546Sopenharmony_ci   assert(c->num_elements == glsl_get_length(deref->var->type));
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci   unsigned idx = info->string_size;
751bf215546Sopenharmony_ci   info->strings = reralloc_size(b->shader, info->strings,
752bf215546Sopenharmony_ci                                 idx + c->num_elements);
753bf215546Sopenharmony_ci   info->string_size += c->num_elements;
754bf215546Sopenharmony_ci
755bf215546Sopenharmony_ci   char *str = &info->strings[idx];
756bf215546Sopenharmony_ci   bool found_null = false;
757bf215546Sopenharmony_ci   for (unsigned i = 0; i < c->num_elements; i++) {
758bf215546Sopenharmony_ci      memcpy((char *)str + i, c->elements[i]->values, 1);
759bf215546Sopenharmony_ci      if (str[i] == '\0')
760bf215546Sopenharmony_ci         found_null = true;
761bf215546Sopenharmony_ci   }
762bf215546Sopenharmony_ci   vtn_fail_if(!found_null, "Printf string must be null terminated");
763bf215546Sopenharmony_ci   return idx;
764bf215546Sopenharmony_ci}
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci/* printf is special because there are no limits on args */
767bf215546Sopenharmony_cistatic void
768bf215546Sopenharmony_cihandle_printf(struct vtn_builder *b, uint32_t opcode,
769bf215546Sopenharmony_ci              const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)
770bf215546Sopenharmony_ci{
771bf215546Sopenharmony_ci   if (!b->options->caps.printf) {
772bf215546Sopenharmony_ci      vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));
773bf215546Sopenharmony_ci      return;
774bf215546Sopenharmony_ci   }
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci   /* Step 1. extract the format string */
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci   /*
779bf215546Sopenharmony_ci    * info_idx is 1-based to match clover/llvm
780bf215546Sopenharmony_ci    * the backend indexes the info table at info_idx - 1.
781bf215546Sopenharmony_ci    */
782bf215546Sopenharmony_ci   b->shader->printf_info_count++;
783bf215546Sopenharmony_ci   unsigned info_idx = b->shader->printf_info_count;
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci   b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,
786bf215546Sopenharmony_ci                                     nir_printf_info, info_idx);
787bf215546Sopenharmony_ci   nir_printf_info *info = &b->shader->printf_info[info_idx - 1];
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci   info->strings = NULL;
790bf215546Sopenharmony_ci   info->string_size = 0;
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci   vtn_add_printf_string(b, w_src[0], info);
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   info->num_args = num_srcs - 1;
795bf215546Sopenharmony_ci   info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_ci   /* Step 2, build an ad-hoc struct type out of the args */
798bf215546Sopenharmony_ci   unsigned field_offset = 0;
799bf215546Sopenharmony_ci   struct glsl_struct_field *fields =
800bf215546Sopenharmony_ci      rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);
801bf215546Sopenharmony_ci   for (unsigned i = 1; i < num_srcs; ++i) {
802bf215546Sopenharmony_ci      struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
803bf215546Sopenharmony_ci      struct vtn_type *src_type = val->type;
804bf215546Sopenharmony_ci      fields[i - 1].type = src_type->type;
805bf215546Sopenharmony_ci      fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);
806bf215546Sopenharmony_ci      field_offset = align(field_offset, 4);
807bf215546Sopenharmony_ci      fields[i - 1].offset = field_offset;
808bf215546Sopenharmony_ci      info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);
809bf215546Sopenharmony_ci      field_offset += glsl_get_cl_size(src_type->type);
810bf215546Sopenharmony_ci   }
811bf215546Sopenharmony_ci   const struct glsl_type *struct_type =
812bf215546Sopenharmony_ci      glsl_struct_type(fields, num_srcs - 1, "printf", true);
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci   /* Step 3, create a variable of that type and populate its fields */
815bf215546Sopenharmony_ci   nir_variable *var = nir_local_variable_create(b->nb.impl, struct_type, NULL);
816bf215546Sopenharmony_ci   nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);
817bf215546Sopenharmony_ci   size_t fmt_pos = 0;
818bf215546Sopenharmony_ci   for (unsigned i = 1; i < num_srcs; ++i) {
819bf215546Sopenharmony_ci      nir_deref_instr *field_deref =
820bf215546Sopenharmony_ci         nir_build_deref_struct(&b->nb, deref_var, i - 1);
821bf215546Sopenharmony_ci      nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;
822bf215546Sopenharmony_ci      /* extract strings */
823bf215546Sopenharmony_ci      fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);
824bf215546Sopenharmony_ci      if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {
825bf215546Sopenharmony_ci         unsigned idx = vtn_add_printf_string(b, w_src[i], info);
826bf215546Sopenharmony_ci         nir_store_deref(&b->nb, field_deref,
827bf215546Sopenharmony_ci                         nir_imm_intN_t(&b->nb, idx, field_src->bit_size),
828bf215546Sopenharmony_ci                         ~0 /* write_mask */);
829bf215546Sopenharmony_ci      } else
830bf215546Sopenharmony_ci         nir_store_deref(&b->nb, field_deref, field_src, ~0);
831bf215546Sopenharmony_ci   }
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci   /* Lastly, the actual intrinsic */
834bf215546Sopenharmony_ci   nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);
835bf215546Sopenharmony_ci   nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);
836bf215546Sopenharmony_ci   vtn_push_nir_ssa(b, w_dest[1], ret);
837bf215546Sopenharmony_ci}
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_cistatic nir_ssa_def *
840bf215546Sopenharmony_cihandle_round(struct vtn_builder *b, uint32_t opcode,
841bf215546Sopenharmony_ci             unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
842bf215546Sopenharmony_ci             const struct vtn_type *dest_type)
843bf215546Sopenharmony_ci{
844bf215546Sopenharmony_ci   nir_ssa_def *src = srcs[0];
845bf215546Sopenharmony_ci   nir_builder *nb = &b->nb;
846bf215546Sopenharmony_ci   nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);
847bf215546Sopenharmony_ci   nir_ssa_def *truncated = nir_ftrunc(nb, src);
848bf215546Sopenharmony_ci   nir_ssa_def *remainder = nir_fsub(nb, src, truncated);
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_ci   return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),
851bf215546Sopenharmony_ci                    nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);
852bf215546Sopenharmony_ci}
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_cistatic nir_ssa_def *
855bf215546Sopenharmony_cihandle_shuffle(struct vtn_builder *b, uint32_t opcode,
856bf215546Sopenharmony_ci               unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
857bf215546Sopenharmony_ci               const struct vtn_type *dest_type)
858bf215546Sopenharmony_ci{
859bf215546Sopenharmony_ci   struct nir_ssa_def *input = srcs[0];
860bf215546Sopenharmony_ci   struct nir_ssa_def *mask = srcs[1];
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   unsigned out_elems = dest_type->length;
863bf215546Sopenharmony_ci   nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
864bf215546Sopenharmony_ci   unsigned in_elems = input->num_components;
865bf215546Sopenharmony_ci   if (mask->bit_size != 32)
866bf215546Sopenharmony_ci      mask = nir_u2u32(&b->nb, mask);
867bf215546Sopenharmony_ci   mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));
868bf215546Sopenharmony_ci   for (unsigned i = 0; i < out_elems; i++)
869bf215546Sopenharmony_ci      outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));
870bf215546Sopenharmony_ci
871bf215546Sopenharmony_ci   return nir_vec(&b->nb, outres, out_elems);
872bf215546Sopenharmony_ci}
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_cistatic nir_ssa_def *
875bf215546Sopenharmony_cihandle_shuffle2(struct vtn_builder *b, uint32_t opcode,
876bf215546Sopenharmony_ci                unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
877bf215546Sopenharmony_ci                const struct vtn_type *dest_type)
878bf215546Sopenharmony_ci{
879bf215546Sopenharmony_ci   struct nir_ssa_def *input0 = srcs[0];
880bf215546Sopenharmony_ci   struct nir_ssa_def *input1 = srcs[1];
881bf215546Sopenharmony_ci   struct nir_ssa_def *mask = srcs[2];
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci   unsigned out_elems = dest_type->length;
884bf215546Sopenharmony_ci   nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
885bf215546Sopenharmony_ci   unsigned in_elems = input0->num_components;
886bf215546Sopenharmony_ci   unsigned total_mask = 2 * in_elems - 1;
887bf215546Sopenharmony_ci   unsigned half_mask = in_elems - 1;
888bf215546Sopenharmony_ci   if (mask->bit_size != 32)
889bf215546Sopenharmony_ci      mask = nir_u2u32(&b->nb, mask);
890bf215546Sopenharmony_ci   mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));
891bf215546Sopenharmony_ci   for (unsigned i = 0; i < out_elems; i++) {
892bf215546Sopenharmony_ci      nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);
893bf215546Sopenharmony_ci      nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));
894bf215546Sopenharmony_ci      nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);
895bf215546Sopenharmony_ci      nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);
896bf215546Sopenharmony_ci      nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));
897bf215546Sopenharmony_ci      outres[i] = nir_bcsel(&b->nb, sel, val0, val1);
898bf215546Sopenharmony_ci   }
899bf215546Sopenharmony_ci   return nir_vec(&b->nb, outres, out_elems);
900bf215546Sopenharmony_ci}
901bf215546Sopenharmony_ci
902bf215546Sopenharmony_cibool
903bf215546Sopenharmony_civtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
904bf215546Sopenharmony_ci                              const uint32_t *w, unsigned count)
905bf215546Sopenharmony_ci{
906bf215546Sopenharmony_ci   enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci   switch (cl_opcode) {
909bf215546Sopenharmony_ci   case OpenCLstd_Fabs:
910bf215546Sopenharmony_ci   case OpenCLstd_SAbs:
911bf215546Sopenharmony_ci   case OpenCLstd_UAbs:
912bf215546Sopenharmony_ci   case OpenCLstd_SAdd_sat:
913bf215546Sopenharmony_ci   case OpenCLstd_UAdd_sat:
914bf215546Sopenharmony_ci   case OpenCLstd_Ceil:
915bf215546Sopenharmony_ci   case OpenCLstd_Floor:
916bf215546Sopenharmony_ci   case OpenCLstd_Fmax:
917bf215546Sopenharmony_ci   case OpenCLstd_SHadd:
918bf215546Sopenharmony_ci   case OpenCLstd_UHadd:
919bf215546Sopenharmony_ci   case OpenCLstd_SMax:
920bf215546Sopenharmony_ci   case OpenCLstd_UMax:
921bf215546Sopenharmony_ci   case OpenCLstd_Fmin:
922bf215546Sopenharmony_ci   case OpenCLstd_SMin:
923bf215546Sopenharmony_ci   case OpenCLstd_UMin:
924bf215546Sopenharmony_ci   case OpenCLstd_Mix:
925bf215546Sopenharmony_ci   case OpenCLstd_Native_cos:
926bf215546Sopenharmony_ci   case OpenCLstd_Native_divide:
927bf215546Sopenharmony_ci   case OpenCLstd_Native_exp2:
928bf215546Sopenharmony_ci   case OpenCLstd_Native_log2:
929bf215546Sopenharmony_ci   case OpenCLstd_Native_powr:
930bf215546Sopenharmony_ci   case OpenCLstd_Native_recip:
931bf215546Sopenharmony_ci   case OpenCLstd_Native_rsqrt:
932bf215546Sopenharmony_ci   case OpenCLstd_Native_sin:
933bf215546Sopenharmony_ci   case OpenCLstd_Native_sqrt:
934bf215546Sopenharmony_ci   case OpenCLstd_SMul_hi:
935bf215546Sopenharmony_ci   case OpenCLstd_UMul_hi:
936bf215546Sopenharmony_ci   case OpenCLstd_Popcount:
937bf215546Sopenharmony_ci   case OpenCLstd_SRhadd:
938bf215546Sopenharmony_ci   case OpenCLstd_URhadd:
939bf215546Sopenharmony_ci   case OpenCLstd_Rsqrt:
940bf215546Sopenharmony_ci   case OpenCLstd_Sign:
941bf215546Sopenharmony_ci   case OpenCLstd_Sqrt:
942bf215546Sopenharmony_ci   case OpenCLstd_SSub_sat:
943bf215546Sopenharmony_ci   case OpenCLstd_USub_sat:
944bf215546Sopenharmony_ci   case OpenCLstd_Trunc:
945bf215546Sopenharmony_ci   case OpenCLstd_Rint:
946bf215546Sopenharmony_ci   case OpenCLstd_Half_divide:
947bf215546Sopenharmony_ci   case OpenCLstd_Half_recip:
948bf215546Sopenharmony_ci      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);
949bf215546Sopenharmony_ci      return true;
950bf215546Sopenharmony_ci   case OpenCLstd_SAbs_diff:
951bf215546Sopenharmony_ci   case OpenCLstd_UAbs_diff:
952bf215546Sopenharmony_ci   case OpenCLstd_SMad_hi:
953bf215546Sopenharmony_ci   case OpenCLstd_UMad_hi:
954bf215546Sopenharmony_ci   case OpenCLstd_SMad24:
955bf215546Sopenharmony_ci   case OpenCLstd_UMad24:
956bf215546Sopenharmony_ci   case OpenCLstd_SMul24:
957bf215546Sopenharmony_ci   case OpenCLstd_UMul24:
958bf215546Sopenharmony_ci   case OpenCLstd_Bitselect:
959bf215546Sopenharmony_ci   case OpenCLstd_FClamp:
960bf215546Sopenharmony_ci   case OpenCLstd_SClamp:
961bf215546Sopenharmony_ci   case OpenCLstd_UClamp:
962bf215546Sopenharmony_ci   case OpenCLstd_Copysign:
963bf215546Sopenharmony_ci   case OpenCLstd_Cross:
964bf215546Sopenharmony_ci   case OpenCLstd_Degrees:
965bf215546Sopenharmony_ci   case OpenCLstd_Fdim:
966bf215546Sopenharmony_ci   case OpenCLstd_Fma:
967bf215546Sopenharmony_ci   case OpenCLstd_Distance:
968bf215546Sopenharmony_ci   case OpenCLstd_Fast_distance:
969bf215546Sopenharmony_ci   case OpenCLstd_Fast_length:
970bf215546Sopenharmony_ci   case OpenCLstd_Fast_normalize:
971bf215546Sopenharmony_ci   case OpenCLstd_Half_rsqrt:
972bf215546Sopenharmony_ci   case OpenCLstd_Half_sqrt:
973bf215546Sopenharmony_ci   case OpenCLstd_Length:
974bf215546Sopenharmony_ci   case OpenCLstd_Mad:
975bf215546Sopenharmony_ci   case OpenCLstd_Maxmag:
976bf215546Sopenharmony_ci   case OpenCLstd_Minmag:
977bf215546Sopenharmony_ci   case OpenCLstd_Nan:
978bf215546Sopenharmony_ci   case OpenCLstd_Nextafter:
979bf215546Sopenharmony_ci   case OpenCLstd_Normalize:
980bf215546Sopenharmony_ci   case OpenCLstd_Radians:
981bf215546Sopenharmony_ci   case OpenCLstd_Rotate:
982bf215546Sopenharmony_ci   case OpenCLstd_Select:
983bf215546Sopenharmony_ci   case OpenCLstd_Step:
984bf215546Sopenharmony_ci   case OpenCLstd_Smoothstep:
985bf215546Sopenharmony_ci   case OpenCLstd_S_Upsample:
986bf215546Sopenharmony_ci   case OpenCLstd_U_Upsample:
987bf215546Sopenharmony_ci   case OpenCLstd_Clz:
988bf215546Sopenharmony_ci   case OpenCLstd_Ctz:
989bf215546Sopenharmony_ci   case OpenCLstd_Native_exp:
990bf215546Sopenharmony_ci   case OpenCLstd_Native_exp10:
991bf215546Sopenharmony_ci   case OpenCLstd_Native_log:
992bf215546Sopenharmony_ci   case OpenCLstd_Native_log10:
993bf215546Sopenharmony_ci   case OpenCLstd_Acos:
994bf215546Sopenharmony_ci   case OpenCLstd_Acosh:
995bf215546Sopenharmony_ci   case OpenCLstd_Acospi:
996bf215546Sopenharmony_ci   case OpenCLstd_Asin:
997bf215546Sopenharmony_ci   case OpenCLstd_Asinh:
998bf215546Sopenharmony_ci   case OpenCLstd_Asinpi:
999bf215546Sopenharmony_ci   case OpenCLstd_Atan:
1000bf215546Sopenharmony_ci   case OpenCLstd_Atan2:
1001bf215546Sopenharmony_ci   case OpenCLstd_Atanh:
1002bf215546Sopenharmony_ci   case OpenCLstd_Atanpi:
1003bf215546Sopenharmony_ci   case OpenCLstd_Atan2pi:
1004bf215546Sopenharmony_ci   case OpenCLstd_Fract:
1005bf215546Sopenharmony_ci   case OpenCLstd_Frexp:
1006bf215546Sopenharmony_ci   case OpenCLstd_Exp:
1007bf215546Sopenharmony_ci   case OpenCLstd_Exp2:
1008bf215546Sopenharmony_ci   case OpenCLstd_Expm1:
1009bf215546Sopenharmony_ci   case OpenCLstd_Exp10:
1010bf215546Sopenharmony_ci   case OpenCLstd_Fmod:
1011bf215546Sopenharmony_ci   case OpenCLstd_Ilogb:
1012bf215546Sopenharmony_ci   case OpenCLstd_Log:
1013bf215546Sopenharmony_ci   case OpenCLstd_Log2:
1014bf215546Sopenharmony_ci   case OpenCLstd_Log10:
1015bf215546Sopenharmony_ci   case OpenCLstd_Log1p:
1016bf215546Sopenharmony_ci   case OpenCLstd_Logb:
1017bf215546Sopenharmony_ci   case OpenCLstd_Ldexp:
1018bf215546Sopenharmony_ci   case OpenCLstd_Cos:
1019bf215546Sopenharmony_ci   case OpenCLstd_Cosh:
1020bf215546Sopenharmony_ci   case OpenCLstd_Cospi:
1021bf215546Sopenharmony_ci   case OpenCLstd_Sin:
1022bf215546Sopenharmony_ci   case OpenCLstd_Sinh:
1023bf215546Sopenharmony_ci   case OpenCLstd_Sinpi:
1024bf215546Sopenharmony_ci   case OpenCLstd_Tan:
1025bf215546Sopenharmony_ci   case OpenCLstd_Tanh:
1026bf215546Sopenharmony_ci   case OpenCLstd_Tanpi:
1027bf215546Sopenharmony_ci   case OpenCLstd_Cbrt:
1028bf215546Sopenharmony_ci   case OpenCLstd_Erfc:
1029bf215546Sopenharmony_ci   case OpenCLstd_Erf:
1030bf215546Sopenharmony_ci   case OpenCLstd_Lgamma:
1031bf215546Sopenharmony_ci   case OpenCLstd_Lgamma_r:
1032bf215546Sopenharmony_ci   case OpenCLstd_Tgamma:
1033bf215546Sopenharmony_ci   case OpenCLstd_Pow:
1034bf215546Sopenharmony_ci   case OpenCLstd_Powr:
1035bf215546Sopenharmony_ci   case OpenCLstd_Pown:
1036bf215546Sopenharmony_ci   case OpenCLstd_Rootn:
1037bf215546Sopenharmony_ci   case OpenCLstd_Remainder:
1038bf215546Sopenharmony_ci   case OpenCLstd_Remquo:
1039bf215546Sopenharmony_ci   case OpenCLstd_Hypot:
1040bf215546Sopenharmony_ci   case OpenCLstd_Sincos:
1041bf215546Sopenharmony_ci   case OpenCLstd_Modf:
1042bf215546Sopenharmony_ci   case OpenCLstd_UMad_sat:
1043bf215546Sopenharmony_ci   case OpenCLstd_SMad_sat:
1044bf215546Sopenharmony_ci   case OpenCLstd_Native_tan:
1045bf215546Sopenharmony_ci   case OpenCLstd_Half_cos:
1046bf215546Sopenharmony_ci   case OpenCLstd_Half_exp:
1047bf215546Sopenharmony_ci   case OpenCLstd_Half_exp2:
1048bf215546Sopenharmony_ci   case OpenCLstd_Half_exp10:
1049bf215546Sopenharmony_ci   case OpenCLstd_Half_log:
1050bf215546Sopenharmony_ci   case OpenCLstd_Half_log2:
1051bf215546Sopenharmony_ci   case OpenCLstd_Half_log10:
1052bf215546Sopenharmony_ci   case OpenCLstd_Half_powr:
1053bf215546Sopenharmony_ci   case OpenCLstd_Half_sin:
1054bf215546Sopenharmony_ci   case OpenCLstd_Half_tan:
1055bf215546Sopenharmony_ci      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);
1056bf215546Sopenharmony_ci      return true;
1057bf215546Sopenharmony_ci   case OpenCLstd_Vloadn:
1058bf215546Sopenharmony_ci   case OpenCLstd_Vload_half:
1059bf215546Sopenharmony_ci   case OpenCLstd_Vload_halfn:
1060bf215546Sopenharmony_ci   case OpenCLstd_Vloada_halfn:
1061bf215546Sopenharmony_ci      vtn_handle_opencl_vload(b, cl_opcode, w, count);
1062bf215546Sopenharmony_ci      return true;
1063bf215546Sopenharmony_ci   case OpenCLstd_Vstoren:
1064bf215546Sopenharmony_ci   case OpenCLstd_Vstore_half:
1065bf215546Sopenharmony_ci   case OpenCLstd_Vstore_halfn:
1066bf215546Sopenharmony_ci   case OpenCLstd_Vstorea_halfn:
1067bf215546Sopenharmony_ci      vtn_handle_opencl_vstore(b, cl_opcode, w, count);
1068bf215546Sopenharmony_ci      return true;
1069bf215546Sopenharmony_ci   case OpenCLstd_Vstore_half_r:
1070bf215546Sopenharmony_ci   case OpenCLstd_Vstore_halfn_r:
1071bf215546Sopenharmony_ci   case OpenCLstd_Vstorea_halfn_r:
1072bf215546Sopenharmony_ci      vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);
1073bf215546Sopenharmony_ci      return true;
1074bf215546Sopenharmony_ci   case OpenCLstd_Shuffle:
1075bf215546Sopenharmony_ci      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);
1076bf215546Sopenharmony_ci      return true;
1077bf215546Sopenharmony_ci   case OpenCLstd_Shuffle2:
1078bf215546Sopenharmony_ci      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);
1079bf215546Sopenharmony_ci      return true;
1080bf215546Sopenharmony_ci   case OpenCLstd_Round:
1081bf215546Sopenharmony_ci      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);
1082bf215546Sopenharmony_ci      return true;
1083bf215546Sopenharmony_ci   case OpenCLstd_Printf:
1084bf215546Sopenharmony_ci      handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);
1085bf215546Sopenharmony_ci      return true;
1086bf215546Sopenharmony_ci   case OpenCLstd_Prefetch:
1087bf215546Sopenharmony_ci      /* TODO maybe add a nir instruction for this? */
1088bf215546Sopenharmony_ci      return true;
1089bf215546Sopenharmony_ci   default:
1090bf215546Sopenharmony_ci      vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
1091bf215546Sopenharmony_ci      return false;
1092bf215546Sopenharmony_ci   }
1093bf215546Sopenharmony_ci}
1094bf215546Sopenharmony_ci
1095bf215546Sopenharmony_cibool
1096bf215546Sopenharmony_civtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
1097bf215546Sopenharmony_ci                                   const uint32_t *w, unsigned count)
1098bf215546Sopenharmony_ci{
1099bf215546Sopenharmony_ci   switch (opcode) {
1100bf215546Sopenharmony_ci   case SpvOpGroupAsyncCopy:
1101bf215546Sopenharmony_ci      handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
1102bf215546Sopenharmony_ci      return true;
1103bf215546Sopenharmony_ci   case SpvOpGroupWaitEvents:
1104bf215546Sopenharmony_ci      handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
1105bf215546Sopenharmony_ci      return true;
1106bf215546Sopenharmony_ci   default:
1107bf215546Sopenharmony_ci      return false;
1108bf215546Sopenharmony_ci   }
1109bf215546Sopenharmony_ci   return true;
1110bf215546Sopenharmony_ci}
1111