1/*
2 * Copyright © 2018 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark (robdclark@gmail.com)
25 */
26
27#include "math.h"
28#include "nir/nir_builtin_builder.h"
29
30#include "util/u_printf.h"
31#include "vtn_private.h"
32#include "OpenCL.std.h"
33
34typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,
35                                    uint32_t opcode,
36                                    unsigned num_srcs, nir_ssa_def **srcs,
37                                    struct vtn_type **src_types,
38                                    const struct vtn_type *dest_type);
39
40static int to_llvm_address_space(SpvStorageClass mode)
41{
42   switch (mode) {
43   case SpvStorageClassPrivate:
44   case SpvStorageClassFunction: return 0;
45   case SpvStorageClassCrossWorkgroup: return 1;
46   case SpvStorageClassUniform:
47   case SpvStorageClassUniformConstant: return 2;
48   case SpvStorageClassWorkgroup: return 3;
49   case SpvStorageClassGeneric: return 4;
50   default: return -1;
51   }
52}
53
54
55static void
56vtn_opencl_mangle(const char *in_name,
57                  uint32_t const_mask,
58                  int ntypes, struct vtn_type **src_types,
59                  char **outstring)
60{
61   char local_name[256] = "";
62   char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);
63
64   for (unsigned i = 0; i < ntypes; ++i) {
65      const struct glsl_type *type = src_types[i]->type;
66      enum vtn_base_type base_type = src_types[i]->base_type;
67      if (src_types[i]->base_type == vtn_base_type_pointer) {
68         *(args_str++) = 'P';
69         int address_space = to_llvm_address_space(src_types[i]->storage_class);
70         if (address_space > 0)
71            args_str += sprintf(args_str, "U3AS%d", address_space);
72
73         type = src_types[i]->deref->type;
74         base_type = src_types[i]->deref->base_type;
75      }
76
77      if (const_mask & (1 << i))
78         *(args_str++) = 'K';
79
80      unsigned num_elements = glsl_get_components(type);
81      if (num_elements > 1) {
82         /* Vectors are not treated as built-ins for mangling, so check for substitution.
83          * In theory, we'd need to know which substitution value this is. In practice,
84          * the functions we need from libclc only support 1
85          */
86         bool substitution = false;
87         for (unsigned j = 0; j < i; ++j) {
88            const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?
89               src_types[j]->deref->type : src_types[j]->type;
90            if (type == other_type) {
91               substitution = true;
92               break;
93            }
94         }
95
96         if (substitution) {
97            args_str += sprintf(args_str, "S_");
98            continue;
99         } else
100            args_str += sprintf(args_str, "Dv%d_", num_elements);
101      }
102
103      const char *suffix = NULL;
104      switch (base_type) {
105      case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;
106      case vtn_base_type_event: suffix = "9ocl_event"; break;
107      default: {
108         const char *primitives[] = {
109            [GLSL_TYPE_UINT] = "j",
110            [GLSL_TYPE_INT] = "i",
111            [GLSL_TYPE_FLOAT] = "f",
112            [GLSL_TYPE_FLOAT16] = "Dh",
113            [GLSL_TYPE_DOUBLE] = "d",
114            [GLSL_TYPE_UINT8] = "h",
115            [GLSL_TYPE_INT8] = "c",
116            [GLSL_TYPE_UINT16] = "t",
117            [GLSL_TYPE_INT16] = "s",
118            [GLSL_TYPE_UINT64] = "m",
119            [GLSL_TYPE_INT64] = "l",
120            [GLSL_TYPE_BOOL] = "b",
121            [GLSL_TYPE_ERROR] = NULL,
122         };
123         enum glsl_base_type glsl_base_type = glsl_get_base_type(type);
124         assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);
125         suffix = primitives[glsl_base_type];
126         break;
127      }
128      }
129      args_str += sprintf(args_str, "%s", suffix);
130   }
131
132   *outstring = strdup(local_name);
133}
134
135static nir_function *mangle_and_find(struct vtn_builder *b,
136                                     const char *name,
137                                     uint32_t const_mask,
138                                     uint32_t num_srcs,
139                                     struct vtn_type **src_types)
140{
141   char *mname;
142   nir_function *found = NULL;
143
144   vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);
145   /* try and find in current shader first. */
146   nir_foreach_function(funcs, b->shader) {
147      if (!strcmp(funcs->name, mname)) {
148         found = funcs;
149         break;
150      }
151   }
152   /* if not found here find in clc shader and create a decl mirroring it */
153   if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {
154      nir_foreach_function(funcs, b->options->clc_shader) {
155         if (!strcmp(funcs->name, mname)) {
156            found = funcs;
157            break;
158         }
159      }
160      if (found) {
161         nir_function *decl = nir_function_create(b->shader, mname);
162         decl->num_params = found->num_params;
163         decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);
164         for (unsigned i = 0; i < decl->num_params; i++) {
165            decl->params[i] = found->params[i];
166         }
167         found = decl;
168      }
169   }
170   if (!found)
171      vtn_fail("Can't find clc function %s\n", mname);
172   free(mname);
173   return found;
174}
175
176static bool call_mangled_function(struct vtn_builder *b,
177                                  const char *name,
178                                  uint32_t const_mask,
179                                  uint32_t num_srcs,
180                                  struct vtn_type **src_types,
181                                  const struct vtn_type *dest_type,
182                                  nir_ssa_def **srcs,
183                                  nir_deref_instr **ret_deref_ptr)
184{
185   nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);
186   if (!found)
187      return false;
188
189   nir_call_instr *call = nir_call_instr_create(b->shader, found);
190
191   nir_deref_instr *ret_deref = NULL;
192   uint32_t param_idx = 0;
193   if (dest_type) {
194      nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,
195                                                        glsl_get_bare_type(dest_type->type),
196                                                        "return_tmp");
197      ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
198      call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
199   }
200
201   for (unsigned i = 0; i < num_srcs; i++)
202      call->params[param_idx++] = nir_src_for_ssa(srcs[i]);
203   nir_builder_instr_insert(&b->nb, &call->instr);
204
205   *ret_deref_ptr = ret_deref;
206   return true;
207}
208
209static void
210handle_instr(struct vtn_builder *b, uint32_t opcode,
211             const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)
212{
213   struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;
214
215   nir_ssa_def *srcs[5] = { NULL };
216   struct vtn_type *src_types[5] = { NULL };
217   vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
218   for (unsigned i = 0; i < num_srcs; i++) {
219      struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
220      struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);
221      srcs[i] = ssa->def;
222      src_types[i] = val->type;
223   }
224
225   nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);
226   if (result) {
227      vtn_push_nir_ssa(b, w_dest[1], result);
228   } else {
229      vtn_assert(dest_type == NULL);
230   }
231}
232
233static nir_op
234nir_alu_op_for_opencl_opcode(struct vtn_builder *b,
235                             enum OpenCLstd_Entrypoints opcode)
236{
237   switch (opcode) {
238   case OpenCLstd_Fabs: return nir_op_fabs;
239   case OpenCLstd_SAbs: return nir_op_iabs;
240   case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;
241   case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;
242   case OpenCLstd_Ceil: return nir_op_fceil;
243   case OpenCLstd_Floor: return nir_op_ffloor;
244   case OpenCLstd_SHadd: return nir_op_ihadd;
245   case OpenCLstd_UHadd: return nir_op_uhadd;
246   case OpenCLstd_Fmax: return nir_op_fmax;
247   case OpenCLstd_SMax: return nir_op_imax;
248   case OpenCLstd_UMax: return nir_op_umax;
249   case OpenCLstd_Fmin: return nir_op_fmin;
250   case OpenCLstd_SMin: return nir_op_imin;
251   case OpenCLstd_UMin: return nir_op_umin;
252   case OpenCLstd_Mix: return nir_op_flrp;
253   case OpenCLstd_Native_cos: return nir_op_fcos;
254   case OpenCLstd_Native_divide: return nir_op_fdiv;
255   case OpenCLstd_Native_exp2: return nir_op_fexp2;
256   case OpenCLstd_Native_log2: return nir_op_flog2;
257   case OpenCLstd_Native_powr: return nir_op_fpow;
258   case OpenCLstd_Native_recip: return nir_op_frcp;
259   case OpenCLstd_Native_rsqrt: return nir_op_frsq;
260   case OpenCLstd_Native_sin: return nir_op_fsin;
261   case OpenCLstd_Native_sqrt: return nir_op_fsqrt;
262   case OpenCLstd_SMul_hi: return nir_op_imul_high;
263   case OpenCLstd_UMul_hi: return nir_op_umul_high;
264   case OpenCLstd_Popcount: return nir_op_bit_count;
265   case OpenCLstd_SRhadd: return nir_op_irhadd;
266   case OpenCLstd_URhadd: return nir_op_urhadd;
267   case OpenCLstd_Rsqrt: return nir_op_frsq;
268   case OpenCLstd_Sign: return nir_op_fsign;
269   case OpenCLstd_Sqrt: return nir_op_fsqrt;
270   case OpenCLstd_SSub_sat: return nir_op_isub_sat;
271   case OpenCLstd_USub_sat: return nir_op_usub_sat;
272   case OpenCLstd_Trunc: return nir_op_ftrunc;
273   case OpenCLstd_Rint: return nir_op_fround_even;
274   case OpenCLstd_Half_divide: return nir_op_fdiv;
275   case OpenCLstd_Half_recip: return nir_op_frcp;
276   /* uhm... */
277   case OpenCLstd_UAbs: return nir_op_mov;
278   default:
279      vtn_fail("No NIR equivalent");
280   }
281}
282
283static nir_ssa_def *
284handle_alu(struct vtn_builder *b, uint32_t opcode,
285           unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
286           const struct vtn_type *dest_type)
287{
288   nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),
289                                    srcs[0], srcs[1], srcs[2], NULL);
290   if (opcode == OpenCLstd_Popcount)
291      ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));
292   return ret;
293}
294
295#define REMAP(op, str) [OpenCLstd_##op] = { str }
296static const struct {
297   const char *fn;
298} remap_table[] = {
299   REMAP(Distance, "distance"),
300   REMAP(Fast_distance, "fast_distance"),
301   REMAP(Fast_length, "fast_length"),
302   REMAP(Fast_normalize, "fast_normalize"),
303   REMAP(Half_rsqrt, "half_rsqrt"),
304   REMAP(Half_sqrt, "half_sqrt"),
305   REMAP(Length, "length"),
306   REMAP(Normalize, "normalize"),
307   REMAP(Degrees, "degrees"),
308   REMAP(Radians, "radians"),
309   REMAP(Rotate, "rotate"),
310   REMAP(Smoothstep, "smoothstep"),
311   REMAP(Step, "step"),
312
313   REMAP(Pow, "pow"),
314   REMAP(Pown, "pown"),
315   REMAP(Powr, "powr"),
316   REMAP(Rootn, "rootn"),
317   REMAP(Modf, "modf"),
318
319   REMAP(Acos, "acos"),
320   REMAP(Acosh, "acosh"),
321   REMAP(Acospi, "acospi"),
322   REMAP(Asin, "asin"),
323   REMAP(Asinh, "asinh"),
324   REMAP(Asinpi, "asinpi"),
325   REMAP(Atan, "atan"),
326   REMAP(Atan2, "atan2"),
327   REMAP(Atanh, "atanh"),
328   REMAP(Atanpi, "atanpi"),
329   REMAP(Atan2pi, "atan2pi"),
330   REMAP(Cos, "cos"),
331   REMAP(Cosh, "cosh"),
332   REMAP(Cospi, "cospi"),
333   REMAP(Sin, "sin"),
334   REMAP(Sinh, "sinh"),
335   REMAP(Sinpi, "sinpi"),
336   REMAP(Tan, "tan"),
337   REMAP(Tanh, "tanh"),
338   REMAP(Tanpi, "tanpi"),
339   REMAP(Sincos, "sincos"),
340   REMAP(Fract, "fract"),
341   REMAP(Frexp, "frexp"),
342   REMAP(Fma, "fma"),
343   REMAP(Fmod, "fmod"),
344
345   REMAP(Half_cos, "cos"),
346   REMAP(Half_exp, "exp"),
347   REMAP(Half_exp2, "exp2"),
348   REMAP(Half_exp10, "exp10"),
349   REMAP(Half_log, "log"),
350   REMAP(Half_log2, "log2"),
351   REMAP(Half_log10, "log10"),
352   REMAP(Half_powr, "powr"),
353   REMAP(Half_sin, "sin"),
354   REMAP(Half_tan, "tan"),
355
356   REMAP(Remainder, "remainder"),
357   REMAP(Remquo, "remquo"),
358   REMAP(Hypot, "hypot"),
359   REMAP(Exp, "exp"),
360   REMAP(Exp2, "exp2"),
361   REMAP(Exp10, "exp10"),
362   REMAP(Expm1, "expm1"),
363   REMAP(Ldexp, "ldexp"),
364
365   REMAP(Ilogb, "ilogb"),
366   REMAP(Log, "log"),
367   REMAP(Log2, "log2"),
368   REMAP(Log10, "log10"),
369   REMAP(Log1p, "log1p"),
370   REMAP(Logb, "logb"),
371
372   REMAP(Cbrt, "cbrt"),
373   REMAP(Erfc, "erfc"),
374   REMAP(Erf, "erf"),
375
376   REMAP(Lgamma, "lgamma"),
377   REMAP(Lgamma_r, "lgamma_r"),
378   REMAP(Tgamma, "tgamma"),
379
380   REMAP(UMad_sat, "mad_sat"),
381   REMAP(SMad_sat, "mad_sat"),
382
383   REMAP(Shuffle, "shuffle"),
384   REMAP(Shuffle2, "shuffle2"),
385};
386#undef REMAP
387
388static const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)
389{
390   if (opcode >= (sizeof(remap_table) / sizeof(const char *)))
391      return NULL;
392   return remap_table[opcode].fn;
393}
394
395static struct vtn_type *
396get_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)
397{
398   struct vtn_type *ret = rzalloc(b, struct vtn_type);
399   assert(glsl_type_is_vector_or_scalar(type));
400   ret->type = type;
401   ret->length = glsl_get_vector_elements(type);
402   ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;
403   return ret;
404}
405
406static struct vtn_type *
407get_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)
408{
409   struct vtn_type *ret = rzalloc(b, struct vtn_type);
410   ret->type = nir_address_format_to_glsl_type(
411            vtn_mode_to_address_format(
412               b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));
413   ret->base_type = vtn_base_type_pointer;
414   ret->storage_class = storage_class;
415   ret->deref = t;
416   return ret;
417}
418
419static struct vtn_type *
420get_signed_type(struct vtn_builder *b, struct vtn_type *t)
421{
422   if (t->base_type == vtn_base_type_pointer) {
423      return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);
424   }
425   return get_vtn_type_for_glsl_type(
426      b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),
427                          glsl_get_vector_elements(t->type)));
428}
429
430static nir_ssa_def *
431handle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
432              int num_srcs,
433              nir_ssa_def **srcs,
434              struct vtn_type **src_types,
435              const struct vtn_type *dest_type)
436{
437   const char *name = remap_clc_opcode(opcode);
438   if (!name)
439       return NULL;
440
441   /* Some functions which take params end up with uint (or pointer-to-uint) being passed,
442    * which doesn't mangle correctly when the function expects int or pointer-to-int.
443    * See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers
444    */
445   int signed_param = -1;
446   switch (opcode) {
447   case OpenCLstd_Frexp:
448   case OpenCLstd_Lgamma_r:
449   case OpenCLstd_Pown:
450   case OpenCLstd_Rootn:
451   case OpenCLstd_Ldexp:
452      signed_param = 1;
453      break;
454   case OpenCLstd_Remquo:
455      signed_param = 2;
456      break;
457   case OpenCLstd_SMad_sat: {
458      /* All parameters need to be converted to signed */
459      src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);
460      break;
461   }
462   default: break;
463   }
464
465   if (signed_param >= 0) {
466      src_types[signed_param] = get_signed_type(b, src_types[signed_param]);
467   }
468
469   nir_deref_instr *ret_deref = NULL;
470
471   if (!call_mangled_function(b, name, 0, num_srcs, src_types,
472                              dest_type, srcs, &ret_deref))
473      return NULL;
474
475   return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
476}
477
478static nir_ssa_def *
479handle_special(struct vtn_builder *b, uint32_t opcode,
480               unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
481               const struct vtn_type *dest_type)
482{
483   nir_builder *nb = &b->nb;
484   enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;
485
486   switch (cl_opcode) {
487   case OpenCLstd_SAbs_diff:
488     /* these works easier in direct NIR */
489      return nir_iabs_diff(nb, srcs[0], srcs[1]);
490   case OpenCLstd_UAbs_diff:
491      return nir_uabs_diff(nb, srcs[0], srcs[1]);
492   case OpenCLstd_Bitselect:
493      return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
494   case OpenCLstd_SMad_hi:
495      return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);
496   case OpenCLstd_UMad_hi:
497      return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);
498   case OpenCLstd_SMul24:
499      return nir_imul24_relaxed(nb, srcs[0], srcs[1]);
500   case OpenCLstd_UMul24:
501      return nir_umul24_relaxed(nb, srcs[0], srcs[1]);
502   case OpenCLstd_SMad24:
503      return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);
504   case OpenCLstd_UMad24:
505      return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);
506   case OpenCLstd_FClamp:
507      return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
508   case OpenCLstd_SClamp:
509      return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
510   case OpenCLstd_UClamp:
511      return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
512   case OpenCLstd_Copysign:
513      return nir_copysign(nb, srcs[0], srcs[1]);
514   case OpenCLstd_Cross:
515      if (dest_type->length == 4)
516         return nir_cross4(nb, srcs[0], srcs[1]);
517      return nir_cross3(nb, srcs[0], srcs[1]);
518   case OpenCLstd_Fdim:
519      return nir_fdim(nb, srcs[0], srcs[1]);
520   case OpenCLstd_Fmod:
521      if (nb->shader->options->lower_fmod)
522         break;
523      return nir_fmod(nb, srcs[0], srcs[1]);
524   case OpenCLstd_Mad:
525      return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
526   case OpenCLstd_Maxmag:
527      return nir_maxmag(nb, srcs[0], srcs[1]);
528   case OpenCLstd_Minmag:
529      return nir_minmag(nb, srcs[0], srcs[1]);
530   case OpenCLstd_Nan:
531      return nir_nan(nb, srcs[0]);
532   case OpenCLstd_Nextafter:
533      return nir_nextafter(nb, srcs[0], srcs[1]);
534   case OpenCLstd_Normalize:
535      return nir_normalize(nb, srcs[0]);
536   case OpenCLstd_Clz:
537      return nir_clz_u(nb, srcs[0]);
538   case OpenCLstd_Ctz:
539      return nir_ctz_u(nb, srcs[0]);
540   case OpenCLstd_Select:
541      return nir_select(nb, srcs[0], srcs[1], srcs[2]);
542   case OpenCLstd_S_Upsample:
543   case OpenCLstd_U_Upsample:
544      /* SPIR-V and CL have different defs for upsample, just implement in nir */
545      return nir_upsample(nb, srcs[0], srcs[1]);
546   case OpenCLstd_Native_exp:
547      return nir_fexp(nb, srcs[0]);
548   case OpenCLstd_Native_exp10:
549      return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));
550   case OpenCLstd_Native_log:
551      return nir_flog(nb, srcs[0]);
552   case OpenCLstd_Native_log10:
553      return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));
554   case OpenCLstd_Native_tan:
555      return nir_ftan(nb, srcs[0]);
556   case OpenCLstd_Ldexp:
557      if (nb->shader->options->lower_ldexp)
558         break;
559      return nir_ldexp(nb, srcs[0], srcs[1]);
560   case OpenCLstd_Fma:
561      /* FIXME: the software implementation only supports fp32 for now. */
562      if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)
563         break;
564      return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
565   default:
566      break;
567   }
568
569   nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);
570   if (!ret)
571      vtn_fail("No NIR equivalent");
572
573   return ret;
574}
575
576static nir_ssa_def *
577handle_core(struct vtn_builder *b, uint32_t opcode,
578            unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
579            const struct vtn_type *dest_type)
580{
581   nir_deref_instr *ret_deref = NULL;
582
583   switch ((SpvOp)opcode) {
584   case SpvOpGroupAsyncCopy: {
585      /* Libclc doesn't include 3-component overloads of the async copy functions.
586       * However, the CLC spec says:
587       * async_work_group_copy and async_work_group_strided_copy for 3-component vector types
588       * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
589       * vector types
590       */
591      for (unsigned i = 0; i < num_srcs; ++i) {
592         if (src_types[i]->base_type == vtn_base_type_pointer &&
593             src_types[i]->deref->base_type == vtn_base_type_vector &&
594             src_types[i]->deref->length == 3) {
595            src_types[i] =
596               get_pointer_type(b,
597                                get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
598                                src_types[i]->storage_class);
599         }
600      }
601      if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
602         return NULL;
603      break;
604   }
605   case SpvOpGroupWaitEvents: {
606      src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
607      if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
608         return NULL;
609      break;
610   }
611   default:
612      return NULL;
613   }
614
615   return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
616}
617
618
619static void
620_handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
621                     const uint32_t *w, unsigned count, bool load,
622                     bool vec_aligned, nir_rounding_mode rounding)
623{
624   struct vtn_type *type;
625   if (load)
626      type = vtn_get_type(b, w[1]);
627   else
628      type = vtn_get_value_type(b, w[5]);
629   unsigned a = load ? 0 : 1;
630
631   enum glsl_base_type base_type = glsl_get_base_type(type->type);
632   unsigned components = glsl_get_vector_elements(type->type);
633
634   nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);
635   struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
636
637   struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];
638   nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];
639
640   nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,
641      (vec_aligned && components == 3) ? 4 : components);
642   nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
643
644   unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :
645                                      glsl_get_bit_size(type->type) / 8;
646   enum glsl_base_type ptr_base_type =
647      glsl_get_base_type(p->pointer->type->type);
648   if (base_type != ptr_base_type) {
649      vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||
650                  (base_type != GLSL_TYPE_FLOAT &&
651                   base_type != GLSL_TYPE_DOUBLE),
652                  "vload/vstore cannot do type conversion. "
653                  "vload/vstore_half can only convert from half to other "
654                  "floating-point types.");
655
656      /* Above-computed alignment was for floats/doubles, not halves */
657      alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);
658   }
659
660   deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);
661
662   for (int i = 0; i < components; i++) {
663      nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);
664      nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);
665
666      if (load) {
667         comps[i] = vtn_local_load(b, arr_deref, p->type->access);
668         ncomps[i] = comps[i]->def;
669         if (base_type != ptr_base_type) {
670            assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
671                   (base_type == GLSL_TYPE_FLOAT ||
672                    base_type == GLSL_TYPE_DOUBLE));
673            ncomps[i] = nir_f2fN(&b->nb, ncomps[i],
674                                 glsl_base_type_get_bit_size(base_type));
675         }
676      } else {
677         struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
678         struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
679         ssa->def = nir_channel(&b->nb, val->def, i);
680         if (base_type != ptr_base_type) {
681            assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
682                   (base_type == GLSL_TYPE_FLOAT ||
683                    base_type == GLSL_TYPE_DOUBLE));
684            if (rounding == nir_rounding_mode_undef) {
685               ssa->def = nir_f2f16(&b->nb, ssa->def);
686            } else {
687               ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,
688                                                nir_type_float | ssa->def->bit_size,
689                                                nir_type_float16,
690                                                rounding, false);
691            }
692         }
693         vtn_local_store(b, ssa, arr_deref, p->type->access);
694      }
695   }
696   if (load) {
697      vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));
698   }
699}
700
701static void
702vtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
703                        const uint32_t *w, unsigned count)
704{
705   _handle_v_load_store(b, opcode, w, count, true,
706                        opcode == OpenCLstd_Vloada_halfn,
707                        nir_rounding_mode_undef);
708}
709
710static void
711vtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
712                         const uint32_t *w, unsigned count)
713{
714   _handle_v_load_store(b, opcode, w, count, false,
715                        opcode == OpenCLstd_Vstorea_halfn,
716                        nir_rounding_mode_undef);
717}
718
719static void
720vtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
721                                const uint32_t *w, unsigned count)
722{
723   _handle_v_load_store(b, opcode, w, count, false,
724                        opcode == OpenCLstd_Vstorea_halfn_r,
725                        vtn_rounding_mode_to_nir(b, w[8]));
726}
727
728static unsigned
729vtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)
730{
731   nir_deref_instr *deref = vtn_nir_deref(b, id);
732
733   while (deref && deref->deref_type != nir_deref_type_var)
734      deref = nir_deref_instr_parent(deref);
735
736   vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),
737               "Printf string argument must be a pointer to a constant variable");
738   vtn_fail_if(deref->var->constant_initializer == NULL,
739               "Printf string argument must have an initializer");
740   vtn_fail_if(!glsl_type_is_array(deref->var->type),
741               "Printf string must be an char array");
742   const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);
743   vtn_fail_if(char_type != glsl_uint8_t_type() &&
744               char_type != glsl_int8_t_type(),
745               "Printf string must be an char array");
746
747   nir_constant *c = deref->var->constant_initializer;
748   assert(c->num_elements == glsl_get_length(deref->var->type));
749
750   unsigned idx = info->string_size;
751   info->strings = reralloc_size(b->shader, info->strings,
752                                 idx + c->num_elements);
753   info->string_size += c->num_elements;
754
755   char *str = &info->strings[idx];
756   bool found_null = false;
757   for (unsigned i = 0; i < c->num_elements; i++) {
758      memcpy((char *)str + i, c->elements[i]->values, 1);
759      if (str[i] == '\0')
760         found_null = true;
761   }
762   vtn_fail_if(!found_null, "Printf string must be null terminated");
763   return idx;
764}
765
766/* printf is special because there are no limits on args */
767static void
768handle_printf(struct vtn_builder *b, uint32_t opcode,
769              const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)
770{
771   if (!b->options->caps.printf) {
772      vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));
773      return;
774   }
775
776   /* Step 1. extract the format string */
777
778   /*
779    * info_idx is 1-based to match clover/llvm
780    * the backend indexes the info table at info_idx - 1.
781    */
782   b->shader->printf_info_count++;
783   unsigned info_idx = b->shader->printf_info_count;
784
785   b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,
786                                     nir_printf_info, info_idx);
787   nir_printf_info *info = &b->shader->printf_info[info_idx - 1];
788
789   info->strings = NULL;
790   info->string_size = 0;
791
792   vtn_add_printf_string(b, w_src[0], info);
793
794   info->num_args = num_srcs - 1;
795   info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);
796
797   /* Step 2, build an ad-hoc struct type out of the args */
798   unsigned field_offset = 0;
799   struct glsl_struct_field *fields =
800      rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);
801   for (unsigned i = 1; i < num_srcs; ++i) {
802      struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
803      struct vtn_type *src_type = val->type;
804      fields[i - 1].type = src_type->type;
805      fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);
806      field_offset = align(field_offset, 4);
807      fields[i - 1].offset = field_offset;
808      info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);
809      field_offset += glsl_get_cl_size(src_type->type);
810   }
811   const struct glsl_type *struct_type =
812      glsl_struct_type(fields, num_srcs - 1, "printf", true);
813
814   /* Step 3, create a variable of that type and populate its fields */
815   nir_variable *var = nir_local_variable_create(b->nb.impl, struct_type, NULL);
816   nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);
817   size_t fmt_pos = 0;
818   for (unsigned i = 1; i < num_srcs; ++i) {
819      nir_deref_instr *field_deref =
820         nir_build_deref_struct(&b->nb, deref_var, i - 1);
821      nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;
822      /* extract strings */
823      fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);
824      if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {
825         unsigned idx = vtn_add_printf_string(b, w_src[i], info);
826         nir_store_deref(&b->nb, field_deref,
827                         nir_imm_intN_t(&b->nb, idx, field_src->bit_size),
828                         ~0 /* write_mask */);
829      } else
830         nir_store_deref(&b->nb, field_deref, field_src, ~0);
831   }
832
833   /* Lastly, the actual intrinsic */
834   nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);
835   nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);
836   vtn_push_nir_ssa(b, w_dest[1], ret);
837}
838
839static nir_ssa_def *
840handle_round(struct vtn_builder *b, uint32_t opcode,
841             unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
842             const struct vtn_type *dest_type)
843{
844   nir_ssa_def *src = srcs[0];
845   nir_builder *nb = &b->nb;
846   nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);
847   nir_ssa_def *truncated = nir_ftrunc(nb, src);
848   nir_ssa_def *remainder = nir_fsub(nb, src, truncated);
849
850   return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),
851                    nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);
852}
853
854static nir_ssa_def *
855handle_shuffle(struct vtn_builder *b, uint32_t opcode,
856               unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
857               const struct vtn_type *dest_type)
858{
859   struct nir_ssa_def *input = srcs[0];
860   struct nir_ssa_def *mask = srcs[1];
861
862   unsigned out_elems = dest_type->length;
863   nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
864   unsigned in_elems = input->num_components;
865   if (mask->bit_size != 32)
866      mask = nir_u2u32(&b->nb, mask);
867   mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));
868   for (unsigned i = 0; i < out_elems; i++)
869      outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));
870
871   return nir_vec(&b->nb, outres, out_elems);
872}
873
874static nir_ssa_def *
875handle_shuffle2(struct vtn_builder *b, uint32_t opcode,
876                unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
877                const struct vtn_type *dest_type)
878{
879   struct nir_ssa_def *input0 = srcs[0];
880   struct nir_ssa_def *input1 = srcs[1];
881   struct nir_ssa_def *mask = srcs[2];
882
883   unsigned out_elems = dest_type->length;
884   nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
885   unsigned in_elems = input0->num_components;
886   unsigned total_mask = 2 * in_elems - 1;
887   unsigned half_mask = in_elems - 1;
888   if (mask->bit_size != 32)
889      mask = nir_u2u32(&b->nb, mask);
890   mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));
891   for (unsigned i = 0; i < out_elems; i++) {
892      nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);
893      nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));
894      nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);
895      nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);
896      nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));
897      outres[i] = nir_bcsel(&b->nb, sel, val0, val1);
898   }
899   return nir_vec(&b->nb, outres, out_elems);
900}
901
902bool
903vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
904                              const uint32_t *w, unsigned count)
905{
906   enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;
907
908   switch (cl_opcode) {
909   case OpenCLstd_Fabs:
910   case OpenCLstd_SAbs:
911   case OpenCLstd_UAbs:
912   case OpenCLstd_SAdd_sat:
913   case OpenCLstd_UAdd_sat:
914   case OpenCLstd_Ceil:
915   case OpenCLstd_Floor:
916   case OpenCLstd_Fmax:
917   case OpenCLstd_SHadd:
918   case OpenCLstd_UHadd:
919   case OpenCLstd_SMax:
920   case OpenCLstd_UMax:
921   case OpenCLstd_Fmin:
922   case OpenCLstd_SMin:
923   case OpenCLstd_UMin:
924   case OpenCLstd_Mix:
925   case OpenCLstd_Native_cos:
926   case OpenCLstd_Native_divide:
927   case OpenCLstd_Native_exp2:
928   case OpenCLstd_Native_log2:
929   case OpenCLstd_Native_powr:
930   case OpenCLstd_Native_recip:
931   case OpenCLstd_Native_rsqrt:
932   case OpenCLstd_Native_sin:
933   case OpenCLstd_Native_sqrt:
934   case OpenCLstd_SMul_hi:
935   case OpenCLstd_UMul_hi:
936   case OpenCLstd_Popcount:
937   case OpenCLstd_SRhadd:
938   case OpenCLstd_URhadd:
939   case OpenCLstd_Rsqrt:
940   case OpenCLstd_Sign:
941   case OpenCLstd_Sqrt:
942   case OpenCLstd_SSub_sat:
943   case OpenCLstd_USub_sat:
944   case OpenCLstd_Trunc:
945   case OpenCLstd_Rint:
946   case OpenCLstd_Half_divide:
947   case OpenCLstd_Half_recip:
948      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);
949      return true;
950   case OpenCLstd_SAbs_diff:
951   case OpenCLstd_UAbs_diff:
952   case OpenCLstd_SMad_hi:
953   case OpenCLstd_UMad_hi:
954   case OpenCLstd_SMad24:
955   case OpenCLstd_UMad24:
956   case OpenCLstd_SMul24:
957   case OpenCLstd_UMul24:
958   case OpenCLstd_Bitselect:
959   case OpenCLstd_FClamp:
960   case OpenCLstd_SClamp:
961   case OpenCLstd_UClamp:
962   case OpenCLstd_Copysign:
963   case OpenCLstd_Cross:
964   case OpenCLstd_Degrees:
965   case OpenCLstd_Fdim:
966   case OpenCLstd_Fma:
967   case OpenCLstd_Distance:
968   case OpenCLstd_Fast_distance:
969   case OpenCLstd_Fast_length:
970   case OpenCLstd_Fast_normalize:
971   case OpenCLstd_Half_rsqrt:
972   case OpenCLstd_Half_sqrt:
973   case OpenCLstd_Length:
974   case OpenCLstd_Mad:
975   case OpenCLstd_Maxmag:
976   case OpenCLstd_Minmag:
977   case OpenCLstd_Nan:
978   case OpenCLstd_Nextafter:
979   case OpenCLstd_Normalize:
980   case OpenCLstd_Radians:
981   case OpenCLstd_Rotate:
982   case OpenCLstd_Select:
983   case OpenCLstd_Step:
984   case OpenCLstd_Smoothstep:
985   case OpenCLstd_S_Upsample:
986   case OpenCLstd_U_Upsample:
987   case OpenCLstd_Clz:
988   case OpenCLstd_Ctz:
989   case OpenCLstd_Native_exp:
990   case OpenCLstd_Native_exp10:
991   case OpenCLstd_Native_log:
992   case OpenCLstd_Native_log10:
993   case OpenCLstd_Acos:
994   case OpenCLstd_Acosh:
995   case OpenCLstd_Acospi:
996   case OpenCLstd_Asin:
997   case OpenCLstd_Asinh:
998   case OpenCLstd_Asinpi:
999   case OpenCLstd_Atan:
1000   case OpenCLstd_Atan2:
1001   case OpenCLstd_Atanh:
1002   case OpenCLstd_Atanpi:
1003   case OpenCLstd_Atan2pi:
1004   case OpenCLstd_Fract:
1005   case OpenCLstd_Frexp:
1006   case OpenCLstd_Exp:
1007   case OpenCLstd_Exp2:
1008   case OpenCLstd_Expm1:
1009   case OpenCLstd_Exp10:
1010   case OpenCLstd_Fmod:
1011   case OpenCLstd_Ilogb:
1012   case OpenCLstd_Log:
1013   case OpenCLstd_Log2:
1014   case OpenCLstd_Log10:
1015   case OpenCLstd_Log1p:
1016   case OpenCLstd_Logb:
1017   case OpenCLstd_Ldexp:
1018   case OpenCLstd_Cos:
1019   case OpenCLstd_Cosh:
1020   case OpenCLstd_Cospi:
1021   case OpenCLstd_Sin:
1022   case OpenCLstd_Sinh:
1023   case OpenCLstd_Sinpi:
1024   case OpenCLstd_Tan:
1025   case OpenCLstd_Tanh:
1026   case OpenCLstd_Tanpi:
1027   case OpenCLstd_Cbrt:
1028   case OpenCLstd_Erfc:
1029   case OpenCLstd_Erf:
1030   case OpenCLstd_Lgamma:
1031   case OpenCLstd_Lgamma_r:
1032   case OpenCLstd_Tgamma:
1033   case OpenCLstd_Pow:
1034   case OpenCLstd_Powr:
1035   case OpenCLstd_Pown:
1036   case OpenCLstd_Rootn:
1037   case OpenCLstd_Remainder:
1038   case OpenCLstd_Remquo:
1039   case OpenCLstd_Hypot:
1040   case OpenCLstd_Sincos:
1041   case OpenCLstd_Modf:
1042   case OpenCLstd_UMad_sat:
1043   case OpenCLstd_SMad_sat:
1044   case OpenCLstd_Native_tan:
1045   case OpenCLstd_Half_cos:
1046   case OpenCLstd_Half_exp:
1047   case OpenCLstd_Half_exp2:
1048   case OpenCLstd_Half_exp10:
1049   case OpenCLstd_Half_log:
1050   case OpenCLstd_Half_log2:
1051   case OpenCLstd_Half_log10:
1052   case OpenCLstd_Half_powr:
1053   case OpenCLstd_Half_sin:
1054   case OpenCLstd_Half_tan:
1055      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);
1056      return true;
1057   case OpenCLstd_Vloadn:
1058   case OpenCLstd_Vload_half:
1059   case OpenCLstd_Vload_halfn:
1060   case OpenCLstd_Vloada_halfn:
1061      vtn_handle_opencl_vload(b, cl_opcode, w, count);
1062      return true;
1063   case OpenCLstd_Vstoren:
1064   case OpenCLstd_Vstore_half:
1065   case OpenCLstd_Vstore_halfn:
1066   case OpenCLstd_Vstorea_halfn:
1067      vtn_handle_opencl_vstore(b, cl_opcode, w, count);
1068      return true;
1069   case OpenCLstd_Vstore_half_r:
1070   case OpenCLstd_Vstore_halfn_r:
1071   case OpenCLstd_Vstorea_halfn_r:
1072      vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);
1073      return true;
1074   case OpenCLstd_Shuffle:
1075      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);
1076      return true;
1077   case OpenCLstd_Shuffle2:
1078      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);
1079      return true;
1080   case OpenCLstd_Round:
1081      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);
1082      return true;
1083   case OpenCLstd_Printf:
1084      handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);
1085      return true;
1086   case OpenCLstd_Prefetch:
1087      /* TODO maybe add a nir instruction for this? */
1088      return true;
1089   default:
1090      vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
1091      return false;
1092   }
1093}
1094
1095bool
1096vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
1097                                   const uint32_t *w, unsigned count)
1098{
1099   switch (opcode) {
1100   case SpvOpGroupAsyncCopy:
1101      handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
1102      return true;
1103   case SpvOpGroupWaitEvents:
1104      handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
1105      return true;
1106   default:
1107      return false;
1108   }
1109   return true;
1110}
1111