1/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_builder.h"
26#include "nir_deref.h"
27#include "util/hash_table.h"
28
29static bool
30is_trivial_deref_cast(nir_deref_instr *cast)
31{
32   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
33   if (!parent)
34      return false;
35
36   return cast->modes == parent->modes &&
37          cast->type == parent->type &&
38          cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
39          cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
40}
41
42void
43nir_deref_path_init(nir_deref_path *path,
44                    nir_deref_instr *deref, void *mem_ctx)
45{
46   assert(deref != NULL);
47
48   /* The length of the short path is at most ARRAY_SIZE - 1 because we need
49    * room for the NULL terminator.
50    */
51   static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
52
53   int count = 0;
54
55   nir_deref_instr **tail = &path->_short_path[max_short_path_len];
56   nir_deref_instr **head = tail;
57
58   *tail = NULL;
59   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
60      if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
61         continue;
62      count++;
63      if (count <= max_short_path_len)
64         *(--head) = d;
65   }
66
67   if (count <= max_short_path_len) {
68      /* If we're under max_short_path_len, just use the short path. */
69      path->path = head;
70      goto done;
71   }
72
73#ifndef NDEBUG
74   /* Just in case someone uses short_path by accident */
75   for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
76      path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
77#endif
78
79   path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
80   head = tail = path->path + count;
81   *tail = NULL;
82   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
83      if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
84         continue;
85      *(--head) = d;
86   }
87
88done:
89   assert(head == path->path);
90   assert(tail == head + count);
91   assert(*tail == NULL);
92}
93
94void
95nir_deref_path_finish(nir_deref_path *path)
96{
97   if (path->path < &path->_short_path[0] ||
98       path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
99      ralloc_free(path->path);
100}
101
102/**
103 * Recursively removes unused deref instructions
104 */
105bool
106nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
107{
108   bool progress = false;
109
110   for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
111      /* If anyone is using this deref, leave it alone */
112      assert(d->dest.is_ssa);
113      if (!nir_ssa_def_is_unused(&d->dest.ssa))
114         break;
115
116      nir_instr_remove(&d->instr);
117      progress = true;
118   }
119
120   return progress;
121}
122
123bool
124nir_deref_instr_has_indirect(nir_deref_instr *instr)
125{
126   while (instr->deref_type != nir_deref_type_var) {
127      /* Consider casts to be indirects */
128      if (instr->deref_type == nir_deref_type_cast)
129         return true;
130
131      if ((instr->deref_type == nir_deref_type_array ||
132           instr->deref_type == nir_deref_type_ptr_as_array) &&
133          !nir_src_is_const(instr->arr.index))
134         return true;
135
136      instr = nir_deref_instr_parent(instr);
137   }
138
139   return false;
140}
141
142bool
143nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
144{
145   for (; instr; instr = nir_deref_instr_parent(instr)) {
146      if (instr->deref_type == nir_deref_type_array &&
147          nir_src_is_const(instr->arr.index) &&
148           nir_src_as_uint(instr->arr.index) >=
149           glsl_get_length(nir_deref_instr_parent(instr)->type))
150         return true;
151   }
152
153   return false;
154}
155
156bool
157nir_deref_instr_has_complex_use(nir_deref_instr *deref,
158                                nir_deref_instr_has_complex_use_options opts)
159{
160   nir_foreach_use(use_src, &deref->dest.ssa) {
161      nir_instr *use_instr = use_src->parent_instr;
162
163      switch (use_instr->type) {
164      case nir_instr_type_deref: {
165         nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
166
167         /* A var deref has no sources */
168         assert(use_deref->deref_type != nir_deref_type_var);
169
170         /* If a deref shows up in an array index or something like that, it's
171          * a complex use.
172          */
173         if (use_src != &use_deref->parent)
174            return true;
175
176         /* Anything that isn't a basic struct or array deref is considered to
177          * be a "complex" use.  In particular, we don't allow ptr_as_array
178          * because we assume that opt_deref will turn any non-complex
179          * ptr_as_array derefs into regular array derefs eventually so passes
180          * which only want to handle simple derefs will pick them up in a
181          * later pass.
182          */
183         if (use_deref->deref_type != nir_deref_type_struct &&
184             use_deref->deref_type != nir_deref_type_array_wildcard &&
185             use_deref->deref_type != nir_deref_type_array)
186            return true;
187
188         if (nir_deref_instr_has_complex_use(use_deref, opts))
189            return true;
190
191         continue;
192      }
193
194      case nir_instr_type_intrinsic: {
195         nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
196         switch (use_intrin->intrinsic) {
197         case nir_intrinsic_load_deref:
198            assert(use_src == &use_intrin->src[0]);
199            continue;
200
201         case nir_intrinsic_copy_deref:
202            assert(use_src == &use_intrin->src[0] ||
203                   use_src == &use_intrin->src[1]);
204            continue;
205
206         case nir_intrinsic_store_deref:
207            /* A use in src[1] of a store means we're taking that pointer and
208             * writing it to a variable.  Because we have no idea who will
209             * read that variable and what they will do with the pointer, it's
210             * considered a "complex" use.  A use in src[0], on the other
211             * hand, is a simple use because we're just going to dereference
212             * it and write a value there.
213             */
214            if (use_src == &use_intrin->src[0])
215               continue;
216            return true;
217
218         case nir_intrinsic_memcpy_deref:
219            if (use_src == &use_intrin->src[0] &&
220                (opts & nir_deref_instr_has_complex_use_allow_memcpy_dst))
221               continue;
222            if (use_src == &use_intrin->src[1] &&
223                (opts & nir_deref_instr_has_complex_use_allow_memcpy_src))
224               continue;
225            return true;
226
227         default:
228            return true;
229         }
230         unreachable("Switch default failed");
231      }
232
233      default:
234         return true;
235      }
236   }
237
238   nir_foreach_if_use(use, &deref->dest.ssa)
239      return true;
240
241   return false;
242}
243
244static unsigned
245type_scalar_size_bytes(const struct glsl_type *type)
246{
247   assert(glsl_type_is_vector_or_scalar(type) ||
248          glsl_type_is_matrix(type));
249   return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
250}
251
252unsigned
253nir_deref_instr_array_stride(nir_deref_instr *deref)
254{
255   switch (deref->deref_type) {
256   case nir_deref_type_array:
257   case nir_deref_type_array_wildcard: {
258      const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
259      unsigned stride = glsl_get_explicit_stride(arr_type);
260
261      if ((glsl_type_is_matrix(arr_type) &&
262           glsl_matrix_type_is_row_major(arr_type)) ||
263          (glsl_type_is_vector(arr_type) && stride == 0))
264         stride = type_scalar_size_bytes(arr_type);
265
266      return stride;
267   }
268   case nir_deref_type_ptr_as_array:
269      return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
270   case nir_deref_type_cast:
271      return deref->cast.ptr_stride;
272   default:
273      return 0;
274   }
275}
276
277static unsigned
278type_get_array_stride(const struct glsl_type *elem_type,
279                      glsl_type_size_align_func size_align)
280{
281   unsigned elem_size, elem_align;
282   size_align(elem_type, &elem_size, &elem_align);
283   return ALIGN_POT(elem_size, elem_align);
284}
285
286static unsigned
287struct_type_get_field_offset(const struct glsl_type *struct_type,
288                             glsl_type_size_align_func size_align,
289                             unsigned field_idx)
290{
291   assert(glsl_type_is_struct_or_ifc(struct_type));
292   unsigned offset = 0;
293   for (unsigned i = 0; i <= field_idx; i++) {
294      unsigned elem_size, elem_align;
295      size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
296      offset = ALIGN_POT(offset, elem_align);
297      if (i < field_idx)
298         offset += elem_size;
299   }
300   return offset;
301}
302
303unsigned
304nir_deref_instr_get_const_offset(nir_deref_instr *deref,
305                                 glsl_type_size_align_func size_align)
306{
307   nir_deref_path path;
308   nir_deref_path_init(&path, deref, NULL);
309
310   unsigned offset = 0;
311   for (nir_deref_instr **p = &path.path[1]; *p; p++) {
312      switch ((*p)->deref_type) {
313      case nir_deref_type_array:
314         offset += nir_src_as_uint((*p)->arr.index) *
315                   type_get_array_stride((*p)->type, size_align);
316	 break;
317      case nir_deref_type_struct: {
318         /* p starts at path[1], so this is safe */
319         nir_deref_instr *parent = *(p - 1);
320         offset += struct_type_get_field_offset(parent->type, size_align,
321                                                (*p)->strct.index);
322	 break;
323      }
324      case nir_deref_type_cast:
325         /* A cast doesn't contribute to the offset */
326         break;
327      default:
328         unreachable("Unsupported deref type");
329      }
330   }
331
332   nir_deref_path_finish(&path);
333
334   return offset;
335}
336
337nir_ssa_def *
338nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
339                       glsl_type_size_align_func size_align)
340{
341   nir_deref_path path;
342   nir_deref_path_init(&path, deref, NULL);
343
344   nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);
345   for (nir_deref_instr **p = &path.path[1]; *p; p++) {
346      switch ((*p)->deref_type) {
347      case nir_deref_type_array:
348      case nir_deref_type_ptr_as_array: {
349         nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
350         int stride = type_get_array_stride((*p)->type, size_align);
351         offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
352         break;
353      }
354      case nir_deref_type_struct: {
355         /* p starts at path[1], so this is safe */
356         nir_deref_instr *parent = *(p - 1);
357         unsigned field_offset =
358            struct_type_get_field_offset(parent->type, size_align,
359                                         (*p)->strct.index);
360         offset = nir_iadd_imm(b, offset, field_offset);
361         break;
362      }
363      case nir_deref_type_cast:
364         /* A cast doesn't contribute to the offset */
365         break;
366      default:
367         unreachable("Unsupported deref type");
368      }
369   }
370
371   nir_deref_path_finish(&path);
372
373   return offset;
374}
375
376bool
377nir_remove_dead_derefs_impl(nir_function_impl *impl)
378{
379   bool progress = false;
380
381   nir_foreach_block(block, impl) {
382      nir_foreach_instr_safe(instr, block) {
383         if (instr->type == nir_instr_type_deref &&
384             nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
385            progress = true;
386      }
387   }
388
389   if (progress) {
390      nir_metadata_preserve(impl, nir_metadata_block_index |
391                                  nir_metadata_dominance);
392   } else {
393      nir_metadata_preserve(impl, nir_metadata_all);
394   }
395
396   return progress;
397}
398
399bool
400nir_remove_dead_derefs(nir_shader *shader)
401{
402   bool progress = false;
403   nir_foreach_function(function, shader) {
404      if (function->impl && nir_remove_dead_derefs_impl(function->impl))
405         progress = true;
406   }
407
408   return progress;
409}
410
411void
412nir_fixup_deref_modes(nir_shader *shader)
413{
414   nir_foreach_function(function, shader) {
415      if (!function->impl)
416         continue;
417
418      nir_foreach_block(block, function->impl) {
419         nir_foreach_instr(instr, block) {
420            if (instr->type != nir_instr_type_deref)
421               continue;
422
423            nir_deref_instr *deref = nir_instr_as_deref(instr);
424            if (deref->deref_type == nir_deref_type_cast)
425               continue;
426
427            nir_variable_mode parent_modes;
428            if (deref->deref_type == nir_deref_type_var) {
429               parent_modes = deref->var->data.mode;
430            } else {
431               assert(deref->parent.is_ssa);
432               nir_deref_instr *parent =
433                  nir_instr_as_deref(deref->parent.ssa->parent_instr);
434               parent_modes = parent->modes;
435            }
436
437            deref->modes = parent_modes;
438         }
439      }
440   }
441}
442
443static bool
444modes_may_alias(nir_variable_mode a, nir_variable_mode b)
445{
446   /* Generic pointers can alias with SSBOs */
447   if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
448       (b & (nir_var_mem_ssbo | nir_var_mem_global)))
449      return true;
450
451   /* Pointers can only alias if they share a mode. */
452   return a & b;
453}
454
455ALWAYS_INLINE static nir_deref_compare_result
456compare_deref_paths(nir_deref_path *a_path, nir_deref_path *b_path,
457                    unsigned *i, bool (*stop_fn)(const nir_deref_instr *))
458{
459   /* Start off assuming they fully compare.  We ignore equality for now.  In
460    * the end, we'll determine that by containment.
461    */
462   nir_deref_compare_result result = nir_derefs_may_alias_bit |
463                                     nir_derefs_a_contains_b_bit |
464                                     nir_derefs_b_contains_a_bit;
465
466   nir_deref_instr **a = a_path->path;
467   nir_deref_instr **b = b_path->path;
468
469   for (; a[*i] != NULL; (*i)++) {
470      if (a[*i] != b[*i])
471         break;
472
473      if (stop_fn && stop_fn(a[*i]))
474         break;
475   }
476
477   /* We're at either the tail or the divergence point between the two deref
478    * paths.  Look to see if either contains cast or a ptr_as_array deref.  If
479    * it does we don't know how to safely make any inferences.  Hopefully,
480    * nir_opt_deref will clean most of these up and we can start inferring
481    * things again.
482    *
483    * In theory, we could do a bit better.  For instance, we could detect the
484    * case where we have exactly one ptr_as_array deref in the chain after the
485    * divergence point and it's matched in both chains and the two chains have
486    * different constant indices.
487    */
488   for (unsigned j = *i; a[j] != NULL; j++) {
489      if (stop_fn && stop_fn(a[j]))
490         break;
491
492      if (a[j]->deref_type == nir_deref_type_cast ||
493          a[j]->deref_type == nir_deref_type_ptr_as_array)
494         return nir_derefs_may_alias_bit;
495   }
496   for (unsigned j = *i; b[j] != NULL; j++) {
497      if (stop_fn && stop_fn(b[j]))
498         break;
499
500      if (b[j]->deref_type == nir_deref_type_cast ||
501          b[j]->deref_type == nir_deref_type_ptr_as_array)
502         return nir_derefs_may_alias_bit;
503   }
504
505   for (; a[*i] != NULL && b[*i] != NULL; (*i)++) {
506      if (stop_fn && (stop_fn(a[*i]) || stop_fn(b[*i])))
507         break;
508
509      switch (a[*i]->deref_type) {
510      case nir_deref_type_array:
511      case nir_deref_type_array_wildcard: {
512         assert(b[*i]->deref_type == nir_deref_type_array ||
513                b[*i]->deref_type == nir_deref_type_array_wildcard);
514
515         if (a[*i]->deref_type == nir_deref_type_array_wildcard) {
516            if (b[*i]->deref_type != nir_deref_type_array_wildcard)
517               result &= ~nir_derefs_b_contains_a_bit;
518         } else if (b[*i]->deref_type == nir_deref_type_array_wildcard) {
519            if (a[*i]->deref_type != nir_deref_type_array_wildcard)
520               result &= ~nir_derefs_a_contains_b_bit;
521         } else {
522            assert(a[*i]->deref_type == nir_deref_type_array &&
523                   b[*i]->deref_type == nir_deref_type_array);
524            assert(a[*i]->arr.index.is_ssa && b[*i]->arr.index.is_ssa);
525
526            if (nir_src_is_const(a[*i]->arr.index) &&
527                nir_src_is_const(b[*i]->arr.index)) {
528               /* If they're both direct and have different offsets, they
529                * don't even alias much less anything else.
530                */
531               if (nir_src_as_uint(a[*i]->arr.index) !=
532                   nir_src_as_uint(b[*i]->arr.index))
533                  return nir_derefs_do_not_alias;
534            } else if (a[*i]->arr.index.ssa == b[*i]->arr.index.ssa) {
535               /* They're the same indirect, continue on */
536            } else {
537               /* They're not the same index so we can't prove anything about
538                * containment.
539                */
540               result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
541            }
542         }
543         break;
544      }
545
546      case nir_deref_type_struct: {
547         /* If they're different struct members, they don't even alias */
548         if (a[*i]->strct.index != b[*i]->strct.index)
549            return nir_derefs_do_not_alias;
550         break;
551      }
552
553      default:
554         unreachable("Invalid deref type");
555      }
556   }
557
558   /* If a is longer than b, then it can't contain b.  If neither a[i] nor
559    * b[i] are NULL then we aren't at the end of the chain and we know nothing
560    * about containment.
561    */
562   if (a[*i] != NULL)
563      result &= ~nir_derefs_a_contains_b_bit;
564   if (b[*i] != NULL)
565      result &= ~nir_derefs_b_contains_a_bit;
566
567   /* If a contains b and b contains a they must be equal. */
568   if ((result & nir_derefs_a_contains_b_bit) &&
569       (result & nir_derefs_b_contains_a_bit))
570      result |= nir_derefs_equal_bit;
571
572   return result;
573}
574
575static bool
576is_interface_struct_deref(const nir_deref_instr *deref)
577{
578   if (deref->deref_type == nir_deref_type_struct) {
579      assert(glsl_type_is_struct_or_ifc(nir_deref_instr_parent(deref)->type));
580      return true;
581   } else {
582      return false;
583   }
584}
585
586nir_deref_compare_result
587nir_compare_deref_paths(nir_deref_path *a_path,
588                        nir_deref_path *b_path)
589{
590   if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
591      return nir_derefs_do_not_alias;
592
593   if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
594      return nir_derefs_may_alias_bit;
595
596   unsigned path_idx = 1;
597   if (a_path->path[0]->deref_type == nir_deref_type_var) {
598      const nir_variable *a_var = a_path->path[0]->var;
599      const nir_variable *b_var = b_path->path[0]->var;
600
601      /* If we got here, the two variables must have the same mode.  The
602       * only way modes_may_alias() can return true for two different modes
603       * is if one is global and the other ssbo.  However, Global variables
604       * only exist in OpenCL and SSBOs don't exist there.  No API allows
605       * both for variables.
606       */
607      assert(a_var->data.mode == b_var->data.mode);
608
609      switch (a_var->data.mode) {
610      case nir_var_mem_ssbo: {
611         nir_deref_compare_result binding_compare;
612         if (a_var == b_var) {
613            binding_compare = compare_deref_paths(a_path, b_path, &path_idx,
614                                                  is_interface_struct_deref);
615         } else {
616            binding_compare = nir_derefs_do_not_alias;
617         }
618
619         if (binding_compare & nir_derefs_equal_bit)
620            break;
621
622         /* If the binding derefs can't alias and at least one is RESTRICT,
623          * then we know they can't alias.
624          */
625         if (!(binding_compare & nir_derefs_may_alias_bit) &&
626             ((a_var->data.access & ACCESS_RESTRICT) ||
627              (b_var->data.access & ACCESS_RESTRICT)))
628            return nir_derefs_do_not_alias;
629
630         return nir_derefs_may_alias_bit;
631      }
632
633      case nir_var_mem_shared:
634         if (a_var == b_var)
635            break;
636
637         /* Per SPV_KHR_workgroup_memory_explicit_layout and
638          * GL_EXT_shared_memory_block, shared blocks alias each other.
639          * We will have either all blocks or all non-blocks.
640          */
641         if (glsl_type_is_interface(a_var->type) ||
642             glsl_type_is_interface(b_var->type)) {
643            assert(glsl_type_is_interface(a_var->type) &&
644                   glsl_type_is_interface(b_var->type));
645            return nir_derefs_may_alias_bit;
646         }
647
648         /* Otherwise, distinct shared vars don't alias */
649         return nir_derefs_do_not_alias;
650
651      default:
652         /* For any other variable types, if we can chase them back to the
653          * variable, and the variables are different, they don't alias.
654          */
655         if (a_var == b_var)
656            break;
657
658         return nir_derefs_do_not_alias;
659      }
660   } else {
661      assert(a_path->path[0]->deref_type == nir_deref_type_cast);
662      /* If they're not exactly the same cast, it's hard to compare them so we
663       * just assume they alias.  Comparing casts is tricky as there are lots
664       * of things such as mode, type, etc. to make sure work out; for now, we
665       * just assume nit_opt_deref will combine them and compare the deref
666       * instructions.
667       *
668       * TODO: At some point in the future, we could be clever and understand
669       * that a float[] and int[] have the same layout and aliasing structure
670       * but double[] and vec3[] do not and we could potentially be a bit
671       * smarter here.
672       */
673      if (a_path->path[0] != b_path->path[0])
674         return nir_derefs_may_alias_bit;
675   }
676
677   return compare_deref_paths(a_path, b_path, &path_idx, NULL);
678}
679
680nir_deref_compare_result
681nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
682{
683   if (a == b) {
684      return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
685             nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
686   }
687
688   nir_deref_path a_path, b_path;
689   nir_deref_path_init(&a_path, a, NULL);
690   nir_deref_path_init(&b_path, b, NULL);
691   assert(a_path.path[0]->deref_type == nir_deref_type_var ||
692          a_path.path[0]->deref_type == nir_deref_type_cast);
693   assert(b_path.path[0]->deref_type == nir_deref_type_var ||
694          b_path.path[0]->deref_type == nir_deref_type_cast);
695
696   nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
697
698   nir_deref_path_finish(&a_path);
699   nir_deref_path_finish(&b_path);
700
701   return result;
702}
703
704nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
705{
706   if (!deref->_path) {
707      deref->_path = ralloc(mem_ctx, nir_deref_path);
708      nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
709   }
710   return deref->_path;
711}
712
713nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,
714                                                      nir_deref_and_path *a,
715                                                      nir_deref_and_path *b)
716{
717   if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
718      return nir_compare_derefs(a->instr, b->instr);
719
720   return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
721                                  nir_get_deref_path(mem_ctx, b));
722}
723
724struct rematerialize_deref_state {
725   bool progress;
726   nir_builder builder;
727   nir_block *block;
728   struct hash_table *cache;
729};
730
731static nir_deref_instr *
732rematerialize_deref_in_block(nir_deref_instr *deref,
733                             struct rematerialize_deref_state *state)
734{
735   if (deref->instr.block == state->block)
736      return deref;
737
738   if (!state->cache) {
739      state->cache = _mesa_pointer_hash_table_create(NULL);
740   }
741
742   struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
743   if (cached)
744      return cached->data;
745
746   nir_builder *b = &state->builder;
747   nir_deref_instr *new_deref =
748      nir_deref_instr_create(b->shader, deref->deref_type);
749   new_deref->modes = deref->modes;
750   new_deref->type = deref->type;
751
752   if (deref->deref_type == nir_deref_type_var) {
753      new_deref->var = deref->var;
754   } else {
755      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
756      if (parent) {
757         parent = rematerialize_deref_in_block(parent, state);
758         new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
759      } else {
760         nir_src_copy(&new_deref->parent, &deref->parent);
761      }
762   }
763
764   switch (deref->deref_type) {
765   case nir_deref_type_var:
766   case nir_deref_type_array_wildcard:
767      /* Nothing more to do */
768      break;
769
770   case nir_deref_type_cast:
771      new_deref->cast.ptr_stride = deref->cast.ptr_stride;
772      break;
773
774   case nir_deref_type_array:
775   case nir_deref_type_ptr_as_array:
776      assert(!nir_src_as_deref(deref->arr.index));
777      nir_src_copy(&new_deref->arr.index, &deref->arr.index);
778      break;
779
780   case nir_deref_type_struct:
781      new_deref->strct.index = deref->strct.index;
782      break;
783
784   default:
785      unreachable("Invalid deref instruction type");
786   }
787
788   nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
789                     deref->dest.ssa.num_components,
790                     deref->dest.ssa.bit_size,
791                     NULL);
792   nir_builder_instr_insert(b, &new_deref->instr);
793
794   return new_deref;
795}
796
797static bool
798rematerialize_deref_src(nir_src *src, void *_state)
799{
800   struct rematerialize_deref_state *state = _state;
801
802   nir_deref_instr *deref = nir_src_as_deref(*src);
803   if (!deref)
804      return true;
805
806   nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
807   if (block_deref != deref) {
808      nir_instr_rewrite_src(src->parent_instr, src,
809                            nir_src_for_ssa(&block_deref->dest.ssa));
810      nir_deref_instr_remove_if_unused(deref);
811      state->progress = true;
812   }
813
814   return true;
815}
816
817/** Re-materialize derefs in every block
818 *
819 * This pass re-materializes deref instructions in every block in which it is
820 * used.  After this pass has been run, every use of a deref will be of a
821 * deref in the same block as the use.  Also, all unused derefs will be
822 * deleted as a side-effect.
823 *
824 * Derefs used as sources of phi instructions are not rematerialized.
825 */
826bool
827nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
828{
829   struct rematerialize_deref_state state = { 0 };
830   nir_builder_init(&state.builder, impl);
831
832   nir_foreach_block_unstructured(block, impl) {
833      state.block = block;
834
835      /* Start each block with a fresh cache */
836      if (state.cache)
837         _mesa_hash_table_clear(state.cache, NULL);
838
839      nir_foreach_instr_safe(instr, block) {
840         if (instr->type == nir_instr_type_deref &&
841             nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
842            continue;
843
844         /* If a deref is used in a phi, we can't rematerialize it, as the new
845          * derefs would appear before the phi, which is not valid.
846          */
847         if (instr->type == nir_instr_type_phi)
848            continue;
849
850         state.builder.cursor = nir_before_instr(instr);
851         nir_foreach_src(instr, rematerialize_deref_src, &state);
852      }
853
854#ifndef NDEBUG
855      nir_if *following_if = nir_block_get_following_if(block);
856      if (following_if)
857         assert(!nir_src_as_deref(following_if->condition));
858#endif
859   }
860
861   _mesa_hash_table_destroy(state.cache, NULL);
862
863   return state.progress;
864}
865
866static void
867nir_deref_instr_fixup_child_types(nir_deref_instr *parent)
868{
869   nir_foreach_use(use, &parent->dest.ssa) {
870      if (use->parent_instr->type != nir_instr_type_deref)
871         continue;
872
873      nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);
874      switch (child->deref_type) {
875      case nir_deref_type_var:
876         unreachable("nir_deref_type_var cannot be a child");
877
878      case nir_deref_type_array:
879      case nir_deref_type_array_wildcard:
880         child->type = glsl_get_array_element(parent->type);
881         break;
882
883      case nir_deref_type_ptr_as_array:
884         child->type = parent->type;
885         break;
886
887      case nir_deref_type_struct:
888         child->type = glsl_get_struct_field(parent->type,
889                                             child->strct.index);
890         break;
891
892      case nir_deref_type_cast:
893         /* We stop the recursion here */
894         continue;
895      }
896
897      /* Recurse into children */
898      nir_deref_instr_fixup_child_types(child);
899   }
900}
901
902static bool
903opt_alu_of_cast(nir_alu_instr *alu)
904{
905   bool progress = false;
906
907   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
908      assert(alu->src[i].src.is_ssa);
909      nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
910      if (src_instr->type != nir_instr_type_deref)
911         continue;
912
913      nir_deref_instr *src_deref = nir_instr_as_deref(src_instr);
914      if (src_deref->deref_type != nir_deref_type_cast)
915         continue;
916
917      assert(src_deref->parent.is_ssa);
918      nir_instr_rewrite_src_ssa(&alu->instr, &alu->src[i].src,
919                                src_deref->parent.ssa);
920      progress = true;
921   }
922
923   return progress;
924}
925
926static bool
927is_trivial_array_deref_cast(nir_deref_instr *cast)
928{
929   assert(is_trivial_deref_cast(cast));
930
931   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
932
933   if (parent->deref_type == nir_deref_type_array) {
934      return cast->cast.ptr_stride ==
935             glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
936   } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
937      return cast->cast.ptr_stride ==
938             nir_deref_instr_array_stride(parent);
939   } else {
940      return false;
941   }
942}
943
944static bool
945is_deref_ptr_as_array(nir_instr *instr)
946{
947   return instr->type == nir_instr_type_deref &&
948          nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
949}
950
951static bool
952opt_remove_restricting_cast_alignments(nir_deref_instr *cast)
953{
954   assert(cast->deref_type == nir_deref_type_cast);
955   if (cast->cast.align_mul == 0)
956      return false;
957
958   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
959   if (parent == NULL)
960      return false;
961
962   /* Don't use any default alignment for this check.  We don't want to fall
963    * back to type alignment too early in case we find out later that we're
964    * somehow a child of a packed struct.
965    */
966   uint32_t parent_mul, parent_offset;
967   if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
968                                     &parent_mul, &parent_offset))
969      return false;
970
971   /* If this cast increases the alignment, we want to keep it.
972    *
973    * There is a possibility that the larger alignment provided by this cast
974    * somehow disagrees with the smaller alignment further up the deref chain.
975    * In that case, we choose to favor the alignment closer to the actual
976    * memory operation which, in this case, is the cast and not its parent so
977    * keeping the cast alignment is the right thing to do.
978    */
979   if (parent_mul < cast->cast.align_mul)
980      return false;
981
982   /* If we've gotten here, we have a parent deref with an align_mul at least
983    * as large as ours so we can potentially throw away the alignment
984    * information on this deref.  There are two cases to consider here:
985    *
986    *  1. We can chase the deref all the way back to the variable.  In this
987    *     case, we have "perfect" knowledge, modulo indirect array derefs.
988    *     Unless we've done something wrong in our indirect/wildcard stride
989    *     calculations, our knowledge from the deref walk is better than the
990    *     client's.
991    *
992    *  2. We can't chase it all the way back to the variable.  In this case,
993    *     because our call to nir_get_explicit_deref_align(parent, ...) above
994    *     above passes default_to_type_align=false, the only way we can even
995    *     get here is if something further up the deref chain has a cast with
996    *     an alignment which can only happen if we get an alignment from the
997    *     client (most likely a decoration in the SPIR-V).  If the client has
998    *     provided us with two conflicting alignments in the deref chain,
999    *     that's their fault and we can do whatever we want.
1000    *
1001    * In either case, we should be without our rights, at this point, to throw
1002    * away the alignment information on this deref.  However, to be "nice" to
1003    * weird clients, we do one more check.  It really shouldn't happen but
1004    * it's possible that the parent's alignment offset disagrees with the
1005    * cast's alignment offset.  In this case, we consider the cast as
1006    * providing more information (or at least more valid information) and keep
1007    * it even if the align_mul from the parent is larger.
1008    */
1009   assert(cast->cast.align_mul <= parent_mul);
1010   if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
1011      return false;
1012
1013   /* If we got here, the parent has better alignment information than the
1014    * child and we can get rid of the child alignment information.
1015    */
1016   cast->cast.align_mul = 0;
1017   cast->cast.align_offset = 0;
1018   return true;
1019}
1020
1021/**
1022 * Remove casts that just wrap other casts.
1023 */
1024static bool
1025opt_remove_cast_cast(nir_deref_instr *cast)
1026{
1027   nir_deref_instr *first_cast = cast;
1028
1029   while (true) {
1030      nir_deref_instr *parent = nir_deref_instr_parent(first_cast);
1031      if (parent == NULL || parent->deref_type != nir_deref_type_cast)
1032         break;
1033      first_cast = parent;
1034   }
1035   if (cast == first_cast)
1036      return false;
1037
1038   nir_instr_rewrite_src(&cast->instr, &cast->parent,
1039                         nir_src_for_ssa(first_cast->parent.ssa));
1040   return true;
1041}
1042
1043/* Restrict variable modes in casts.
1044 *
1045 * If we know from something higher up the deref chain that the deref has a
1046 * specific mode, we can cast to more general and back but we can never cast
1047 * across modes.  For non-cast derefs, we should only ever do anything here if
1048 * the parent eventually comes from a cast that we restricted earlier.
1049 */
1050static bool
1051opt_restrict_deref_modes(nir_deref_instr *deref)
1052{
1053   if (deref->deref_type == nir_deref_type_var) {
1054      assert(deref->modes == deref->var->data.mode);
1055      return false;
1056   }
1057
1058   nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1059   if (parent == NULL || parent->modes == deref->modes)
1060      return false;
1061
1062   assert(parent->modes & deref->modes);
1063   deref->modes &= parent->modes;
1064   return true;
1065}
1066
1067static bool
1068opt_remove_sampler_cast(nir_deref_instr *cast)
1069{
1070   assert(cast->deref_type == nir_deref_type_cast);
1071   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1072   if (parent == NULL)
1073      return false;
1074
1075   /* Strip both types down to their non-array type and bail if there are any
1076    * discrepancies in array lengths.
1077    */
1078   const struct glsl_type *parent_type = parent->type;
1079   const struct glsl_type *cast_type = cast->type;
1080   while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
1081      if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
1082         return false;
1083      parent_type = glsl_get_array_element(parent_type);
1084      cast_type = glsl_get_array_element(cast_type);
1085   }
1086
1087   if (!glsl_type_is_sampler(parent_type))
1088      return false;
1089
1090   if (cast_type != glsl_bare_sampler_type() &&
1091       (glsl_type_is_bare_sampler(parent_type) ||
1092        cast_type != glsl_sampler_type_to_texture(parent_type)))
1093      return false;
1094
1095   /* We're a cast from a more detailed sampler type to a bare sampler or a
1096    * texture type with the same dimensionality.
1097    */
1098   nir_ssa_def_rewrite_uses(&cast->dest.ssa,
1099                            &parent->dest.ssa);
1100   nir_instr_remove(&cast->instr);
1101
1102   /* Recursively crawl the deref tree and clean up types */
1103   nir_deref_instr_fixup_child_types(parent);
1104
1105   return true;
1106}
1107
1108/**
1109 * Is this casting a struct to a contained struct.
1110 * struct a { struct b field0 };
1111 * ssa_5 is structa;
1112 * deref_cast (structb *)ssa_5 (function_temp structb);
1113 * converts to
1114 * deref_struct &ssa_5->field0 (function_temp structb);
1115 * This allows subsequent copy propagation to work.
1116 */
1117static bool
1118opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
1119{
1120   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1121   if (!parent)
1122      return false;
1123
1124   if (cast->cast.align_mul > 0)
1125      return false;
1126
1127   if (!glsl_type_is_struct(parent->type))
1128      return false;
1129
1130   /* Empty struct */
1131   if (glsl_get_length(parent->type) < 1)
1132      return false;
1133
1134   if (glsl_get_struct_field_offset(parent->type, 0) != 0)
1135      return false;
1136
1137   if (cast->type != glsl_get_struct_field(parent->type, 0))
1138      return false;
1139
1140   nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
1141   nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);
1142   nir_deref_instr_remove_if_unused(cast);
1143   return true;
1144}
1145
1146static bool
1147opt_deref_cast(nir_builder *b, nir_deref_instr *cast)
1148{
1149   bool progress = false;
1150
1151   progress |= opt_remove_restricting_cast_alignments(cast);
1152
1153   if (opt_replace_struct_wrapper_cast(b, cast))
1154      return true;
1155
1156   if (opt_remove_sampler_cast(cast))
1157      return true;
1158
1159   progress |= opt_remove_cast_cast(cast);
1160   if (!is_trivial_deref_cast(cast))
1161      return progress;
1162
1163   /* If this deref still contains useful alignment information, we don't want
1164    * to delete it.
1165    */
1166   if (cast->cast.align_mul > 0)
1167      return progress;
1168
1169   bool trivial_array_cast = is_trivial_array_deref_cast(cast);
1170
1171   assert(cast->dest.is_ssa);
1172   assert(cast->parent.is_ssa);
1173
1174   nir_foreach_use_safe(use_src, &cast->dest.ssa) {
1175      /* If this isn't a trivial array cast, we can't propagate into
1176       * ptr_as_array derefs.
1177       */
1178      if (is_deref_ptr_as_array(use_src->parent_instr) &&
1179          !trivial_array_cast)
1180         continue;
1181
1182      nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
1183      progress = true;
1184   }
1185
1186   /* If uses would be a bit crazy */
1187   assert(list_is_empty(&cast->dest.ssa.if_uses));
1188
1189   if (nir_deref_instr_remove_if_unused(cast))
1190      progress = true;
1191
1192   return progress;
1193}
1194
1195static bool
1196opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
1197{
1198   assert(deref->deref_type == nir_deref_type_ptr_as_array);
1199
1200   nir_deref_instr *parent = nir_deref_instr_parent(deref);
1201
1202   if (nir_src_is_const(deref->arr.index) &&
1203       nir_src_as_int(deref->arr.index) == 0) {
1204      /* If it's a ptr_as_array deref with an index of 0, it does nothing
1205       * and we can just replace its uses with its parent, unless it has
1206       * alignment information.
1207       *
1208       * The source of a ptr_as_array deref always has a deref_type of
1209       * nir_deref_type_array or nir_deref_type_cast.  If it's a cast, it
1210       * may be trivial and we may be able to get rid of that too.  Any
1211       * trivial cast of trivial cast cases should be handled already by
1212       * opt_deref_cast() above.
1213       */
1214      if (parent->deref_type == nir_deref_type_cast &&
1215          parent->cast.align_mul == 0 &&
1216          is_trivial_deref_cast(parent))
1217         parent = nir_deref_instr_parent(parent);
1218      nir_ssa_def_rewrite_uses(&deref->dest.ssa,
1219                               &parent->dest.ssa);
1220      nir_instr_remove(&deref->instr);
1221      return true;
1222   }
1223
1224   if (parent->deref_type != nir_deref_type_array &&
1225       parent->deref_type != nir_deref_type_ptr_as_array)
1226      return false;
1227
1228   assert(parent->parent.is_ssa);
1229   assert(parent->arr.index.is_ssa);
1230   assert(deref->arr.index.is_ssa);
1231
1232   deref->arr.in_bounds &= parent->arr.in_bounds;
1233
1234   nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
1235                                      deref->arr.index.ssa);
1236
1237   deref->deref_type = parent->deref_type;
1238   nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
1239   nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
1240                         nir_src_for_ssa(new_idx));
1241   return true;
1242}
1243
1244static bool
1245is_vector_bitcast_deref(nir_deref_instr *cast,
1246                        nir_component_mask_t mask,
1247                        bool is_write)
1248{
1249   if (cast->deref_type != nir_deref_type_cast)
1250      return false;
1251
1252   /* Don't throw away useful alignment information */
1253   if (cast->cast.align_mul > 0)
1254      return false;
1255
1256   /* It has to be a cast of another deref */
1257   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
1258   if (parent == NULL)
1259      return false;
1260
1261   /* The parent has to be a vector or scalar */
1262   if (!glsl_type_is_vector_or_scalar(parent->type))
1263      return false;
1264
1265   /* Don't bother with 1-bit types */
1266   unsigned cast_bit_size = glsl_get_bit_size(cast->type);
1267   unsigned parent_bit_size = glsl_get_bit_size(parent->type);
1268   if (cast_bit_size == 1 || parent_bit_size == 1)
1269      return false;
1270
1271   /* A strided vector type means it's not tightly packed */
1272   if (glsl_get_explicit_stride(cast->type) ||
1273       glsl_get_explicit_stride(parent->type))
1274      return false;
1275
1276   assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
1277   assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
1278   unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
1279   unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
1280                           (parent_bit_size / 8);
1281   if (bytes_used > parent_bytes)
1282      return false;
1283
1284   if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
1285                                                       parent_bit_size))
1286      return false;
1287
1288   return true;
1289}
1290
1291static nir_ssa_def *
1292resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)
1293{
1294   if (num_components == data->num_components)
1295      return data;
1296
1297   unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
1298   for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
1299      swiz[i] = i;
1300
1301   return nir_swizzle(b, data, swiz, num_components);
1302}
1303
1304static bool
1305opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
1306{
1307   nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1308   nir_component_mask_t read_mask =
1309      nir_ssa_def_components_read(&load->dest.ssa);
1310
1311   /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1312    * vec4-aligned and so it can just read/write them as vec4s.  This
1313    * results in a LOT of vec4->vec3 casts on loads and stores.
1314    */
1315   if (is_vector_bitcast_deref(deref, read_mask, false)) {
1316      const unsigned old_num_comps = load->dest.ssa.num_components;
1317      const unsigned old_bit_size = load->dest.ssa.bit_size;
1318
1319      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1320      const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1321      const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1322
1323      /* Stomp it to reference the parent */
1324      nir_instr_rewrite_src(&load->instr, &load->src[0],
1325                            nir_src_for_ssa(&parent->dest.ssa));
1326      assert(load->dest.is_ssa);
1327      load->dest.ssa.bit_size = new_bit_size;
1328      load->dest.ssa.num_components = new_num_comps;
1329      load->num_components = new_num_comps;
1330
1331      b->cursor = nir_after_instr(&load->instr);
1332      nir_ssa_def *data = &load->dest.ssa;
1333      if (old_bit_size != new_bit_size)
1334         data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);
1335      data = resize_vector(b, data, old_num_comps);
1336
1337      nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,
1338                                     data->parent_instr);
1339      return true;
1340   }
1341
1342   return false;
1343}
1344
1345static bool
1346opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
1347{
1348   nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
1349   nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
1350
1351   /* LLVM loves take advantage of the fact that vec3s in OpenCL are
1352    * vec4-aligned and so it can just read/write them as vec4s.  This
1353    * results in a LOT of vec4->vec3 casts on loads and stores.
1354    */
1355   if (is_vector_bitcast_deref(deref, write_mask, true)) {
1356      assert(store->src[1].is_ssa);
1357      nir_ssa_def *data = store->src[1].ssa;
1358
1359      const unsigned old_bit_size = data->bit_size;
1360
1361      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
1362      const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
1363      const unsigned new_bit_size = glsl_get_bit_size(parent->type);
1364
1365      nir_instr_rewrite_src(&store->instr, &store->src[0],
1366                            nir_src_for_ssa(&parent->dest.ssa));
1367
1368      /* Restrict things down as needed so the bitcast doesn't fail */
1369      data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);
1370      if (old_bit_size != new_bit_size)
1371         data = nir_bitcast_vector(b, data, new_bit_size);
1372      data = resize_vector(b, data, new_num_comps);
1373      nir_instr_rewrite_src(&store->instr, &store->src[1],
1374                            nir_src_for_ssa(data));
1375      store->num_components = new_num_comps;
1376
1377      /* Adjust the write mask */
1378      write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
1379                                                  new_bit_size);
1380      nir_intrinsic_set_write_mask(store, write_mask);
1381      return true;
1382   }
1383
1384   return false;
1385}
1386
1387static bool
1388opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
1389{
1390   nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
1391   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1392   if (deref == NULL)
1393      return false;
1394
1395   nir_ssa_def *deref_is = NULL;
1396
1397   if (nir_deref_mode_must_be(deref, modes))
1398      deref_is = nir_imm_true(b);
1399
1400   if (!nir_deref_mode_may_be(deref, modes))
1401      deref_is = nir_imm_false(b);
1402
1403   if (deref_is == NULL)
1404      return false;
1405
1406   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);
1407   nir_instr_remove(&intrin->instr);
1408   return true;
1409}
1410
1411bool
1412nir_opt_deref_impl(nir_function_impl *impl)
1413{
1414   bool progress = false;
1415
1416   nir_builder b;
1417   nir_builder_init(&b, impl);
1418
1419   nir_foreach_block(block, impl) {
1420      nir_foreach_instr_safe(instr, block) {
1421         b.cursor = nir_before_instr(instr);
1422
1423         switch (instr->type) {
1424         case nir_instr_type_alu: {
1425            nir_alu_instr *alu = nir_instr_as_alu(instr);
1426            if (opt_alu_of_cast(alu))
1427               progress = true;
1428            break;
1429         }
1430
1431         case nir_instr_type_deref: {
1432            nir_deref_instr *deref = nir_instr_as_deref(instr);
1433
1434            if (opt_restrict_deref_modes(deref))
1435               progress = true;
1436
1437            switch (deref->deref_type) {
1438            case nir_deref_type_ptr_as_array:
1439               if (opt_deref_ptr_as_array(&b, deref))
1440                  progress = true;
1441               break;
1442
1443            case nir_deref_type_cast:
1444               if (opt_deref_cast(&b, deref))
1445                  progress = true;
1446               break;
1447
1448            default:
1449               /* Do nothing */
1450               break;
1451            }
1452            break;
1453         }
1454
1455         case nir_instr_type_intrinsic: {
1456            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1457            switch (intrin->intrinsic) {
1458            case nir_intrinsic_load_deref:
1459               if (opt_load_vec_deref(&b, intrin))
1460                  progress = true;
1461               break;
1462
1463            case nir_intrinsic_store_deref:
1464               if (opt_store_vec_deref(&b, intrin))
1465                  progress = true;
1466               break;
1467
1468            case nir_intrinsic_deref_mode_is:
1469               if (opt_known_deref_mode_is(&b, intrin))
1470                  progress = true;
1471               break;
1472
1473            default:
1474               /* Do nothing */
1475               break;
1476            }
1477            break;
1478         }
1479
1480         default:
1481            /* Do nothing */
1482            break;
1483         }
1484      }
1485   }
1486
1487   if (progress) {
1488      nir_metadata_preserve(impl, nir_metadata_block_index |
1489                                  nir_metadata_dominance);
1490   } else {
1491      nir_metadata_preserve(impl, nir_metadata_all);
1492   }
1493
1494   return progress;
1495}
1496
1497bool
1498nir_opt_deref(nir_shader *shader)
1499{
1500   bool progress = false;
1501
1502   nir_foreach_function(func, shader) {
1503      if (func->impl && nir_opt_deref_impl(func->impl))
1504         progress = true;
1505   }
1506
1507   return progress;
1508}
1509