1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28 
29 #include "util/set.h"
30 #include "util/u_math.h"
31 
32 static struct set *
get_complex_used_vars(nir_shader *shader, void *mem_ctx)33 get_complex_used_vars(nir_shader *shader, void *mem_ctx)
34 {
35    struct set *complex_vars = _mesa_pointer_set_create(mem_ctx);
36 
37    nir_foreach_function(function, shader) {
38       if (!function->impl)
39          continue;
40 
41       nir_foreach_block(block, function->impl) {
42          nir_foreach_instr(instr, block) {
43             if (instr->type != nir_instr_type_deref)
44                continue;
45 
46             nir_deref_instr *deref = nir_instr_as_deref(instr);
47 
48             /* We only need to consider var derefs because
49              * nir_deref_instr_has_complex_use is recursive.
50              */
51             if (deref->deref_type == nir_deref_type_var &&
52                 nir_deref_instr_has_complex_use(deref, 0))
53                _mesa_set_add(complex_vars, deref->var);
54          }
55       }
56    }
57 
58    return complex_vars;
59 }
60 
61 struct split_var_state {
62    void *mem_ctx;
63 
64    nir_shader *shader;
65    nir_function_impl *impl;
66 
67    nir_variable *base_var;
68 };
69 
70 struct field {
71    struct field *parent;
72 
73    const struct glsl_type *type;
74 
75    unsigned num_fields;
76    struct field *fields;
77 
78    nir_variable *var;
79 };
80 
81 static int
num_array_levels_in_array_of_vector_type(const struct glsl_type *type)82 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
83 {
84    int num_levels = 0;
85    while (true) {
86       if (glsl_type_is_array_or_matrix(type)) {
87          num_levels++;
88          type = glsl_get_array_element(type);
89       } else if (glsl_type_is_vector_or_scalar(type)) {
90          return num_levels;
91       } else {
92          /* Not an array of vectors */
93          return -1;
94       }
95    }
96 }
97 
98 static void
init_field_for_type(struct field *field, struct field *parent, const struct glsl_type *type, const char *name, struct split_var_state *state)99 init_field_for_type(struct field *field, struct field *parent,
100                     const struct glsl_type *type,
101                     const char *name,
102                     struct split_var_state *state)
103 {
104    *field = (struct field) {
105       .parent = parent,
106       .type = type,
107    };
108 
109    const struct glsl_type *struct_type = glsl_without_array(type);
110    if (glsl_type_is_struct_or_ifc(struct_type)) {
111       field->num_fields = glsl_get_length(struct_type),
112       field->fields = ralloc_array(state->mem_ctx, struct field,
113                                    field->num_fields);
114       for (unsigned i = 0; i < field->num_fields; i++) {
115          char *field_name = NULL;
116          if (name) {
117             field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
118                                          glsl_get_struct_elem_name(struct_type, i));
119          } else {
120             field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
121                                          glsl_get_type_name(struct_type),
122                                          glsl_get_struct_elem_name(struct_type, i));
123          }
124          init_field_for_type(&field->fields[i], field,
125                              glsl_get_struct_field(struct_type, i),
126                              field_name, state);
127       }
128    } else {
129       const struct glsl_type *var_type = type;
130       for (struct field *f = field->parent; f; f = f->parent)
131          var_type = glsl_type_wrap_in_arrays(var_type, f->type);
132 
133       nir_variable_mode mode = state->base_var->data.mode;
134       if (mode == nir_var_function_temp) {
135          field->var = nir_local_variable_create(state->impl, var_type, name);
136       } else {
137          field->var = nir_variable_create(state->shader, mode, var_type, name);
138       }
139       field->var->data.ray_query = state->base_var->data.ray_query;
140    }
141 }
142 
143 static bool
split_var_list_structs(nir_shader *shader, nir_function_impl *impl, struct exec_list *vars, nir_variable_mode mode, struct hash_table *var_field_map, struct set **complex_vars, void *mem_ctx)144 split_var_list_structs(nir_shader *shader,
145                        nir_function_impl *impl,
146                        struct exec_list *vars,
147                        nir_variable_mode mode,
148                        struct hash_table *var_field_map,
149                        struct set **complex_vars,
150                        void *mem_ctx)
151 {
152    struct split_var_state state = {
153       .mem_ctx = mem_ctx,
154       .shader = shader,
155       .impl = impl,
156    };
157 
158    struct exec_list split_vars;
159    exec_list_make_empty(&split_vars);
160 
161    /* To avoid list confusion (we'll be adding things as we split variables),
162     * pull all of the variables we plan to split off of the list
163     */
164    nir_foreach_variable_in_list_safe(var, vars) {
165       if (var->data.mode != mode)
166          continue;
167 
168       if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
169          continue;
170 
171       if (*complex_vars == NULL)
172          *complex_vars = get_complex_used_vars(shader, mem_ctx);
173 
174       /* We can't split a variable that's referenced with deref that has any
175        * sort of complex usage.
176        */
177       if (_mesa_set_search(*complex_vars, var))
178          continue;
179 
180       exec_node_remove(&var->node);
181       exec_list_push_tail(&split_vars, &var->node);
182    }
183 
184    nir_foreach_variable_in_list(var, &split_vars) {
185       state.base_var = var;
186 
187       struct field *root_field = ralloc(mem_ctx, struct field);
188       init_field_for_type(root_field, NULL, var->type, var->name, &state);
189       _mesa_hash_table_insert(var_field_map, var, root_field);
190    }
191 
192    return !exec_list_is_empty(&split_vars);
193 }
194 
195 static void
split_struct_derefs_impl(nir_function_impl *impl, struct hash_table *var_field_map, nir_variable_mode modes, void *mem_ctx)196 split_struct_derefs_impl(nir_function_impl *impl,
197                          struct hash_table *var_field_map,
198                          nir_variable_mode modes,
199                          void *mem_ctx)
200 {
201    nir_builder b;
202    nir_builder_init(&b, impl);
203 
204    nir_foreach_block(block, impl) {
205       nir_foreach_instr_safe(instr, block) {
206          if (instr->type != nir_instr_type_deref)
207             continue;
208 
209          nir_deref_instr *deref = nir_instr_as_deref(instr);
210          if (!nir_deref_mode_may_be(deref, modes))
211             continue;
212 
213          /* Clean up any dead derefs we find lying around.  They may refer to
214           * variables we're planning to split.
215           */
216          if (nir_deref_instr_remove_if_unused(deref))
217             continue;
218 
219          if (!glsl_type_is_vector_or_scalar(deref->type))
220             continue;
221 
222          nir_variable *base_var = nir_deref_instr_get_variable(deref);
223          /* If we can't chase back to the variable, then we're a complex use.
224           * This should have been detected by get_complex_used_vars() and the
225           * variable should not have been split.  However, we have no way of
226           * knowing that here, so we just have to trust it.
227           */
228          if (base_var == NULL)
229             continue;
230 
231          struct hash_entry *entry =
232             _mesa_hash_table_search(var_field_map, base_var);
233          if (!entry)
234             continue;
235 
236          struct field *root_field = entry->data;
237 
238          nir_deref_path path;
239          nir_deref_path_init(&path, deref, mem_ctx);
240 
241          struct field *tail_field = root_field;
242          for (unsigned i = 0; path.path[i]; i++) {
243             if (path.path[i]->deref_type != nir_deref_type_struct)
244                continue;
245 
246             assert(i > 0);
247             assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
248             assert(path.path[i - 1]->type ==
249                    glsl_without_array(tail_field->type));
250 
251             tail_field = &tail_field->fields[path.path[i]->strct.index];
252          }
253          nir_variable *split_var = tail_field->var;
254 
255          nir_deref_instr *new_deref = NULL;
256          for (unsigned i = 0; path.path[i]; i++) {
257             nir_deref_instr *p = path.path[i];
258             b.cursor = nir_after_instr(&p->instr);
259 
260             switch (p->deref_type) {
261             case nir_deref_type_var:
262                assert(new_deref == NULL);
263                new_deref = nir_build_deref_var(&b, split_var);
264                break;
265 
266             case nir_deref_type_array:
267             case nir_deref_type_array_wildcard:
268                new_deref = nir_build_deref_follower(&b, new_deref, p);
269                break;
270 
271             case nir_deref_type_struct:
272                /* Nothing to do; we're splitting structs */
273                break;
274 
275             default:
276                unreachable("Invalid deref type in path");
277             }
278          }
279 
280          assert(new_deref->type == deref->type);
281          nir_ssa_def_rewrite_uses(&deref->dest.ssa,
282                                   &new_deref->dest.ssa);
283          nir_deref_instr_remove_if_unused(deref);
284       }
285    }
286 }
287 
288 /** A pass for splitting structs into multiple variables
289  *
290  * This pass splits arrays of structs into multiple variables, one for each
291  * (possibly nested) structure member.  After this pass completes, no
292  * variables of the given mode will contain a struct type.
293  */
294 bool
nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)295 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
296 {
297    void *mem_ctx = ralloc_context(NULL);
298    struct hash_table *var_field_map =
299       _mesa_pointer_hash_table_create(mem_ctx);
300    struct set *complex_vars = NULL;
301 
302    assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
303 
304    bool has_global_splits = false;
305    if (modes & nir_var_shader_temp) {
306       has_global_splits = split_var_list_structs(shader, NULL,
307                                                  &shader->variables,
308                                                  nir_var_shader_temp,
309                                                  var_field_map,
310                                                  &complex_vars,
311                                                  mem_ctx);
312    }
313 
314    bool progress = false;
315    nir_foreach_function(function, shader) {
316       if (!function->impl)
317          continue;
318 
319       bool has_local_splits = false;
320       if (modes & nir_var_function_temp) {
321          has_local_splits = split_var_list_structs(shader, function->impl,
322                                                    &function->impl->locals,
323                                                    nir_var_function_temp,
324                                                    var_field_map,
325                                                    &complex_vars,
326                                                    mem_ctx);
327       }
328 
329       if (has_global_splits || has_local_splits) {
330          split_struct_derefs_impl(function->impl, var_field_map,
331                                   modes, mem_ctx);
332 
333          nir_metadata_preserve(function->impl, nir_metadata_block_index |
334                                                nir_metadata_dominance);
335          progress = true;
336       } else {
337          nir_metadata_preserve(function->impl, nir_metadata_all);
338       }
339    }
340 
341    ralloc_free(mem_ctx);
342 
343    return progress;
344 }
345 
346 struct array_level_info {
347    unsigned array_len;
348    bool split;
349 };
350 
351 struct array_split {
352    /* Only set if this is the tail end of the splitting */
353    nir_variable *var;
354 
355    unsigned num_splits;
356    struct array_split *splits;
357 };
358 
359 struct array_var_info {
360    nir_variable *base_var;
361 
362    const struct glsl_type *split_var_type;
363 
364    bool split_var;
365    struct array_split root_split;
366 
367    unsigned num_levels;
368    struct array_level_info levels[0];
369 };
370 
371 static bool
init_var_list_array_infos(nir_shader *shader, struct exec_list *vars, nir_variable_mode mode, struct hash_table *var_info_map, struct set **complex_vars, void *mem_ctx)372 init_var_list_array_infos(nir_shader *shader,
373                           struct exec_list *vars,
374                           nir_variable_mode mode,
375                           struct hash_table *var_info_map,
376                           struct set **complex_vars,
377                           void *mem_ctx)
378 {
379    bool has_array = false;
380 
381    nir_foreach_variable_in_list(var, vars) {
382       if (var->data.mode != mode)
383          continue;
384 
385       int num_levels = num_array_levels_in_array_of_vector_type(var->type);
386       if (num_levels <= 0)
387          continue;
388 
389       if (*complex_vars == NULL)
390          *complex_vars = get_complex_used_vars(shader, mem_ctx);
391 
392       /* We can't split a variable that's referenced with deref that has any
393        * sort of complex usage.
394        */
395       if (_mesa_set_search(*complex_vars, var))
396          continue;
397 
398       struct array_var_info *info =
399          rzalloc_size(mem_ctx, sizeof(*info) +
400                                num_levels * sizeof(info->levels[0]));
401 
402       info->base_var = var;
403       info->num_levels = num_levels;
404 
405       const struct glsl_type *type = var->type;
406       for (int i = 0; i < num_levels; i++) {
407          info->levels[i].array_len = glsl_get_length(type);
408          type = glsl_get_array_element(type);
409 
410          /* All levels start out initially as split */
411          info->levels[i].split = true;
412       }
413 
414       _mesa_hash_table_insert(var_info_map, var, info);
415       has_array = true;
416    }
417 
418    return has_array;
419 }
420 
421 static struct array_var_info *
get_array_var_info(nir_variable *var, struct hash_table *var_info_map)422 get_array_var_info(nir_variable *var,
423                    struct hash_table *var_info_map)
424 {
425    struct hash_entry *entry =
426       _mesa_hash_table_search(var_info_map, var);
427    return entry ? entry->data : NULL;
428 }
429 
430 static struct array_var_info *
get_array_deref_info(nir_deref_instr *deref, struct hash_table *var_info_map, nir_variable_mode modes)431 get_array_deref_info(nir_deref_instr *deref,
432                      struct hash_table *var_info_map,
433                      nir_variable_mode modes)
434 {
435    if (!nir_deref_mode_may_be(deref, modes))
436       return NULL;
437 
438    nir_variable *var = nir_deref_instr_get_variable(deref);
439    if (var == NULL)
440       return NULL;
441 
442    return get_array_var_info(var, var_info_map);
443 }
444 
445 static void
mark_array_deref_used(nir_deref_instr *deref, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx)446 mark_array_deref_used(nir_deref_instr *deref,
447                       struct hash_table *var_info_map,
448                       nir_variable_mode modes,
449                       void *mem_ctx)
450 {
451    struct array_var_info *info =
452       get_array_deref_info(deref, var_info_map, modes);
453    if (!info)
454       return;
455 
456    nir_deref_path path;
457    nir_deref_path_init(&path, deref, mem_ctx);
458 
459    /* Walk the path and look for indirects.  If we have an array deref with an
460     * indirect, mark the given level as not being split.
461     */
462    for (unsigned i = 0; i < info->num_levels; i++) {
463       nir_deref_instr *p = path.path[i + 1];
464       if (p->deref_type == nir_deref_type_array &&
465           !nir_src_is_const(p->arr.index))
466          info->levels[i].split = false;
467    }
468 }
469 
470 static void
mark_array_usage_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx)471 mark_array_usage_impl(nir_function_impl *impl,
472                       struct hash_table *var_info_map,
473                       nir_variable_mode modes,
474                       void *mem_ctx)
475 {
476    nir_foreach_block(block, impl) {
477       nir_foreach_instr(instr, block) {
478          if (instr->type != nir_instr_type_intrinsic)
479             continue;
480 
481          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
482          switch (intrin->intrinsic) {
483          case nir_intrinsic_copy_deref:
484             mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
485                                   var_info_map, modes, mem_ctx);
486             FALLTHROUGH;
487 
488          case nir_intrinsic_load_deref:
489          case nir_intrinsic_store_deref:
490             mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
491                                   var_info_map, modes, mem_ctx);
492             break;
493 
494          default:
495             break;
496          }
497       }
498    }
499 }
500 
501 static void
create_split_array_vars(struct array_var_info *var_info, unsigned level, struct array_split *split, const char *name, nir_shader *shader, nir_function_impl *impl, void *mem_ctx)502 create_split_array_vars(struct array_var_info *var_info,
503                         unsigned level,
504                         struct array_split *split,
505                         const char *name,
506                         nir_shader *shader,
507                         nir_function_impl *impl,
508                         void *mem_ctx)
509 {
510    while (level < var_info->num_levels && !var_info->levels[level].split) {
511       name = ralloc_asprintf(mem_ctx, "%s[*]", name);
512       level++;
513    }
514 
515    if (level == var_info->num_levels) {
516       /* We add parens to the variable name so it looks like "(foo[2][*])" so
517        * that further derefs will look like "(foo[2][*])[ssa_6]"
518        */
519       name = ralloc_asprintf(mem_ctx, "(%s)", name);
520 
521       nir_variable_mode mode = var_info->base_var->data.mode;
522       if (mode == nir_var_function_temp) {
523          split->var = nir_local_variable_create(impl,
524                                                 var_info->split_var_type, name);
525       } else {
526          split->var = nir_variable_create(shader, mode,
527                                           var_info->split_var_type, name);
528       }
529       split->var->data.ray_query = var_info->base_var->data.ray_query;
530    } else {
531       assert(var_info->levels[level].split);
532       split->num_splits = var_info->levels[level].array_len;
533       split->splits = rzalloc_array(mem_ctx, struct array_split,
534                                     split->num_splits);
535       for (unsigned i = 0; i < split->num_splits; i++) {
536          create_split_array_vars(var_info, level + 1, &split->splits[i],
537                                  ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
538                                  shader, impl, mem_ctx);
539       }
540    }
541 }
542 
543 static bool
split_var_list_arrays(nir_shader *shader, nir_function_impl *impl, struct exec_list *vars, nir_variable_mode mode, struct hash_table *var_info_map, void *mem_ctx)544 split_var_list_arrays(nir_shader *shader,
545                       nir_function_impl *impl,
546                       struct exec_list *vars,
547                       nir_variable_mode mode,
548                       struct hash_table *var_info_map,
549                       void *mem_ctx)
550 {
551    struct exec_list split_vars;
552    exec_list_make_empty(&split_vars);
553 
554    nir_foreach_variable_in_list_safe(var, vars) {
555       if (var->data.mode != mode)
556          continue;
557 
558       struct array_var_info *info = get_array_var_info(var, var_info_map);
559       if (!info)
560          continue;
561 
562       bool has_split = false;
563       const struct glsl_type *split_type =
564          glsl_without_array_or_matrix(var->type);
565       for (int i = info->num_levels - 1; i >= 0; i--) {
566          if (info->levels[i].split) {
567             has_split = true;
568             continue;
569          }
570 
571          /* If the original type was a matrix type, we'd like to keep that so
572           * we don't convert matrices into arrays.
573           */
574          if (i == info->num_levels - 1 &&
575              glsl_type_is_matrix(glsl_without_array(var->type))) {
576             split_type = glsl_matrix_type(glsl_get_base_type(split_type),
577                                           glsl_get_components(split_type),
578                                           info->levels[i].array_len);
579          } else {
580             split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
581          }
582       }
583 
584       if (has_split) {
585          info->split_var_type = split_type;
586          /* To avoid list confusion (we'll be adding things as we split
587           * variables), pull all of the variables we plan to split off of the
588           * main variable list.
589           */
590          exec_node_remove(&var->node);
591          exec_list_push_tail(&split_vars, &var->node);
592       } else {
593          assert(split_type == glsl_get_bare_type(var->type));
594          /* If we're not modifying this variable, delete the info so we skip
595           * it faster in later passes.
596           */
597          _mesa_hash_table_remove_key(var_info_map, var);
598       }
599    }
600 
601    nir_foreach_variable_in_list(var, &split_vars) {
602       struct array_var_info *info = get_array_var_info(var, var_info_map);
603       create_split_array_vars(info, 0, &info->root_split, var->name,
604                               shader, impl, mem_ctx);
605    }
606 
607    return !exec_list_is_empty(&split_vars);
608 }
609 
610 static bool
deref_has_split_wildcard(nir_deref_path *path, struct array_var_info *info)611 deref_has_split_wildcard(nir_deref_path *path,
612                          struct array_var_info *info)
613 {
614    if (info == NULL)
615       return false;
616 
617    assert(path->path[0]->var == info->base_var);
618    for (unsigned i = 0; i < info->num_levels; i++) {
619       if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
620           info->levels[i].split)
621          return true;
622    }
623 
624    return false;
625 }
626 
627 static bool
array_path_is_out_of_bounds(nir_deref_path *path, struct array_var_info *info)628 array_path_is_out_of_bounds(nir_deref_path *path,
629                             struct array_var_info *info)
630 {
631    if (info == NULL)
632       return false;
633 
634    assert(path->path[0]->var == info->base_var);
635    for (unsigned i = 0; i < info->num_levels; i++) {
636       nir_deref_instr *p = path->path[i + 1];
637       if (p->deref_type == nir_deref_type_array_wildcard)
638          continue;
639 
640       if (nir_src_is_const(p->arr.index) &&
641           nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
642          return true;
643    }
644 
645    return false;
646 }
647 
648 static void
emit_split_copies(nir_builder *b, struct array_var_info *dst_info, nir_deref_path *dst_path, unsigned dst_level, nir_deref_instr *dst, struct array_var_info *src_info, nir_deref_path *src_path, unsigned src_level, nir_deref_instr *src)649 emit_split_copies(nir_builder *b,
650                   struct array_var_info *dst_info, nir_deref_path *dst_path,
651                   unsigned dst_level, nir_deref_instr *dst,
652                   struct array_var_info *src_info, nir_deref_path *src_path,
653                   unsigned src_level, nir_deref_instr *src)
654 {
655    nir_deref_instr *dst_p, *src_p;
656 
657    while ((dst_p = dst_path->path[dst_level + 1])) {
658       if (dst_p->deref_type == nir_deref_type_array_wildcard)
659          break;
660 
661       dst = nir_build_deref_follower(b, dst, dst_p);
662       dst_level++;
663    }
664 
665    while ((src_p = src_path->path[src_level + 1])) {
666       if (src_p->deref_type == nir_deref_type_array_wildcard)
667          break;
668 
669       src = nir_build_deref_follower(b, src, src_p);
670       src_level++;
671    }
672 
673    if (src_p == NULL || dst_p == NULL) {
674       assert(src_p == NULL && dst_p == NULL);
675       nir_copy_deref(b, dst, src);
676    } else {
677       assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
678              src_p->deref_type == nir_deref_type_array_wildcard);
679 
680       if ((dst_info && dst_info->levels[dst_level].split) ||
681           (src_info && src_info->levels[src_level].split)) {
682          /* There are no indirects at this level on one of the source or the
683           * destination so we are lowering it.
684           */
685          assert(glsl_get_length(dst_path->path[dst_level]->type) ==
686                 glsl_get_length(src_path->path[src_level]->type));
687          unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
688          for (unsigned i = 0; i < len; i++) {
689             emit_split_copies(b, dst_info, dst_path, dst_level + 1,
690                               nir_build_deref_array_imm(b, dst, i),
691                               src_info, src_path, src_level + 1,
692                               nir_build_deref_array_imm(b, src, i));
693          }
694       } else {
695          /* Neither side is being split so we just keep going */
696          emit_split_copies(b, dst_info, dst_path, dst_level + 1,
697                            nir_build_deref_array_wildcard(b, dst),
698                            src_info, src_path, src_level + 1,
699                            nir_build_deref_array_wildcard(b, src));
700       }
701    }
702 }
703 
704 static void
split_array_copies_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx)705 split_array_copies_impl(nir_function_impl *impl,
706                         struct hash_table *var_info_map,
707                         nir_variable_mode modes,
708                         void *mem_ctx)
709 {
710    nir_builder b;
711    nir_builder_init(&b, impl);
712 
713    nir_foreach_block(block, impl) {
714       nir_foreach_instr_safe(instr, block) {
715          if (instr->type != nir_instr_type_intrinsic)
716             continue;
717 
718          nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
719          if (copy->intrinsic != nir_intrinsic_copy_deref)
720             continue;
721 
722          nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
723          nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
724 
725          struct array_var_info *dst_info =
726             get_array_deref_info(dst_deref, var_info_map, modes);
727          struct array_var_info *src_info =
728             get_array_deref_info(src_deref, var_info_map, modes);
729 
730          if (!src_info && !dst_info)
731             continue;
732 
733          nir_deref_path dst_path, src_path;
734          nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
735          nir_deref_path_init(&src_path, src_deref, mem_ctx);
736 
737          if (!deref_has_split_wildcard(&dst_path, dst_info) &&
738              !deref_has_split_wildcard(&src_path, src_info))
739             continue;
740 
741          b.cursor = nir_instr_remove(&copy->instr);
742 
743          emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
744                                src_info, &src_path, 0, src_path.path[0]);
745       }
746    }
747 }
748 
749 static void
split_array_access_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx)750 split_array_access_impl(nir_function_impl *impl,
751                         struct hash_table *var_info_map,
752                         nir_variable_mode modes,
753                         void *mem_ctx)
754 {
755    nir_builder b;
756    nir_builder_init(&b, impl);
757 
758    nir_foreach_block(block, impl) {
759       nir_foreach_instr_safe(instr, block) {
760          if (instr->type == nir_instr_type_deref) {
761             /* Clean up any dead derefs we find lying around.  They may refer
762              * to variables we're planning to split.
763              */
764             nir_deref_instr *deref = nir_instr_as_deref(instr);
765             if (nir_deref_mode_may_be(deref, modes))
766                nir_deref_instr_remove_if_unused(deref);
767             continue;
768          }
769 
770          if (instr->type != nir_instr_type_intrinsic)
771             continue;
772 
773          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
774          if (intrin->intrinsic != nir_intrinsic_load_deref &&
775              intrin->intrinsic != nir_intrinsic_store_deref &&
776              intrin->intrinsic != nir_intrinsic_copy_deref)
777             continue;
778 
779          const unsigned num_derefs =
780             intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
781 
782          for (unsigned d = 0; d < num_derefs; d++) {
783             nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
784 
785             struct array_var_info *info =
786                get_array_deref_info(deref, var_info_map, modes);
787             if (!info)
788                continue;
789 
790             nir_deref_path path;
791             nir_deref_path_init(&path, deref, mem_ctx);
792 
793             b.cursor = nir_before_instr(&intrin->instr);
794 
795             if (array_path_is_out_of_bounds(&path, info)) {
796                /* If one of the derefs is out-of-bounds, we just delete the
797                 * instruction.  If a destination is out of bounds, then it may
798                 * have been in-bounds prior to shrinking so we don't want to
799                 * accidentally stomp something.  However, we've already proven
800                 * that it will never be read so it's safe to delete.  If a
801                 * source is out of bounds then it is loading random garbage.
802                 * For loads, we replace their uses with an undef instruction
803                 * and for copies we just delete the copy since it was writing
804                 * undefined garbage anyway and we may as well leave the random
805                 * garbage in the destination alone.
806                 */
807                if (intrin->intrinsic == nir_intrinsic_load_deref) {
808                   nir_ssa_def *u =
809                      nir_ssa_undef(&b, intrin->dest.ssa.num_components,
810                                        intrin->dest.ssa.bit_size);
811                   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
812                                            u);
813                }
814                nir_instr_remove(&intrin->instr);
815                for (unsigned i = 0; i < num_derefs; i++)
816                   nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
817                break;
818             }
819 
820             struct array_split *split = &info->root_split;
821             for (unsigned i = 0; i < info->num_levels; i++) {
822                if (info->levels[i].split) {
823                   nir_deref_instr *p = path.path[i + 1];
824                   unsigned index = nir_src_as_uint(p->arr.index);
825                   assert(index < info->levels[i].array_len);
826                   split = &split->splits[index];
827                }
828             }
829             assert(!split->splits && split->var);
830 
831             nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
832             for (unsigned i = 0; i < info->num_levels; i++) {
833                if (!info->levels[i].split) {
834                   new_deref = nir_build_deref_follower(&b, new_deref,
835                                                        path.path[i + 1]);
836                }
837             }
838             assert(new_deref->type == deref->type);
839 
840             /* Rewrite the deref source to point to the split one */
841             nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
842                                   nir_src_for_ssa(&new_deref->dest.ssa));
843             nir_deref_instr_remove_if_unused(deref);
844          }
845       }
846    }
847 }
848 
849 /** A pass for splitting arrays of vectors into multiple variables
850  *
851  * This pass looks at arrays (possibly multiple levels) of vectors (not
852  * structures or other types) and tries to split them into piles of variables,
853  * one for each array element.  The heuristic used is simple: If a given array
854  * level is never used with an indirect, that array level will get split.
855  *
856  * This pass probably could handles structures easily enough but making a pass
857  * that could see through an array of structures of arrays would be difficult
858  * so it's best to just run nir_split_struct_vars first.
859  */
860 bool
nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)861 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
862 {
863    void *mem_ctx = ralloc_context(NULL);
864    struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
865    struct set *complex_vars = NULL;
866 
867    assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
868 
869    bool has_global_array = false;
870    if (modes & nir_var_shader_temp) {
871       has_global_array = init_var_list_array_infos(shader,
872                                                    &shader->variables,
873                                                    nir_var_shader_temp,
874                                                    var_info_map,
875                                                    &complex_vars,
876                                                    mem_ctx);
877    }
878 
879    bool has_any_array = false;
880    nir_foreach_function(function, shader) {
881       if (!function->impl)
882          continue;
883 
884       bool has_local_array = false;
885       if (modes & nir_var_function_temp) {
886          has_local_array = init_var_list_array_infos(shader,
887                                                      &function->impl->locals,
888                                                      nir_var_function_temp,
889                                                      var_info_map,
890                                                      &complex_vars,
891                                                      mem_ctx);
892       }
893 
894       if (has_global_array || has_local_array) {
895          has_any_array = true;
896          mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
897       }
898    }
899 
900    /* If we failed to find any arrays of arrays, bail early. */
901    if (!has_any_array) {
902       ralloc_free(mem_ctx);
903       nir_shader_preserve_all_metadata(shader);
904       return false;
905    }
906 
907    bool has_global_splits = false;
908    if (modes & nir_var_shader_temp) {
909       has_global_splits = split_var_list_arrays(shader, NULL,
910                                                 &shader->variables,
911                                                 nir_var_shader_temp,
912                                                 var_info_map, mem_ctx);
913    }
914 
915    bool progress = false;
916    nir_foreach_function(function, shader) {
917       if (!function->impl)
918          continue;
919 
920       bool has_local_splits = false;
921       if (modes & nir_var_function_temp) {
922          has_local_splits = split_var_list_arrays(shader, function->impl,
923                                                   &function->impl->locals,
924                                                   nir_var_function_temp,
925                                                   var_info_map, mem_ctx);
926       }
927 
928       if (has_global_splits || has_local_splits) {
929          split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
930          split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
931 
932          nir_metadata_preserve(function->impl, nir_metadata_block_index |
933                                                nir_metadata_dominance);
934          progress = true;
935       } else {
936          nir_metadata_preserve(function->impl, nir_metadata_all);
937       }
938    }
939 
940    ralloc_free(mem_ctx);
941 
942    return progress;
943 }
944 
945 struct array_level_usage {
946    unsigned array_len;
947 
948    /* The value UINT_MAX will be used to indicate an indirect */
949    unsigned max_read;
950    unsigned max_written;
951 
952    /* True if there is a copy that isn't to/from a shrinkable array */
953    bool has_external_copy;
954    struct set *levels_copied;
955 };
956 
957 struct vec_var_usage {
958    /* Convenience set of all components this variable has */
959    nir_component_mask_t all_comps;
960 
961    nir_component_mask_t comps_read;
962    nir_component_mask_t comps_written;
963 
964    nir_component_mask_t comps_kept;
965 
966    /* True if there is a copy that isn't to/from a shrinkable vector */
967    bool has_external_copy;
968    bool has_complex_use;
969    struct set *vars_copied;
970 
971    unsigned num_levels;
972    struct array_level_usage levels[0];
973 };
974 
975 static struct vec_var_usage *
get_vec_var_usage(nir_variable *var, struct hash_table *var_usage_map, bool add_usage_entry, void *mem_ctx)976 get_vec_var_usage(nir_variable *var,
977                   struct hash_table *var_usage_map,
978                   bool add_usage_entry, void *mem_ctx)
979 {
980    struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
981    if (entry)
982       return entry->data;
983 
984    if (!add_usage_entry)
985       return NULL;
986 
987    /* Check to make sure that we are working with an array of vectors.  We
988     * don't bother to shrink single vectors because we figure that we can
989     * clean it up better with SSA than by inserting piles of vecN instructions
990     * to compact results.
991     */
992    int num_levels = num_array_levels_in_array_of_vector_type(var->type);
993    if (num_levels < 1)
994       return NULL; /* Not an array of vectors */
995 
996    struct vec_var_usage *usage =
997       rzalloc_size(mem_ctx, sizeof(*usage) +
998                             num_levels * sizeof(usage->levels[0]));
999 
1000    usage->num_levels = num_levels;
1001    const struct glsl_type *type = var->type;
1002    for (unsigned i = 0; i < num_levels; i++) {
1003       usage->levels[i].array_len = glsl_get_length(type);
1004       type = glsl_get_array_element(type);
1005    }
1006    assert(glsl_type_is_vector_or_scalar(type));
1007 
1008    usage->all_comps = (1 << glsl_get_components(type)) - 1;
1009 
1010    _mesa_hash_table_insert(var_usage_map, var, usage);
1011 
1012    return usage;
1013 }
1014 
1015 static struct vec_var_usage *
get_vec_deref_usage(nir_deref_instr *deref, struct hash_table *var_usage_map, nir_variable_mode modes, bool add_usage_entry, void *mem_ctx)1016 get_vec_deref_usage(nir_deref_instr *deref,
1017                     struct hash_table *var_usage_map,
1018                     nir_variable_mode modes,
1019                     bool add_usage_entry, void *mem_ctx)
1020 {
1021    if (!nir_deref_mode_may_be(deref, modes))
1022       return NULL;
1023 
1024    nir_variable *var = nir_deref_instr_get_variable(deref);
1025    if (var == NULL)
1026       return NULL;
1027 
1028    return get_vec_var_usage(nir_deref_instr_get_variable(deref),
1029                             var_usage_map, add_usage_entry, mem_ctx);
1030 }
1031 
1032 static void
mark_deref_if_complex(nir_deref_instr *deref, struct hash_table *var_usage_map, nir_variable_mode modes, void *mem_ctx)1033 mark_deref_if_complex(nir_deref_instr *deref,
1034                       struct hash_table *var_usage_map,
1035                       nir_variable_mode modes,
1036                       void *mem_ctx)
1037 {
1038    /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1039     * recursive.
1040     */
1041    if (deref->deref_type != nir_deref_type_var)
1042       return;
1043 
1044    if (!(deref->var->data.mode & modes))
1045       return;
1046 
1047    if (!nir_deref_instr_has_complex_use(deref, 0))
1048       return;
1049 
1050    struct vec_var_usage *usage =
1051       get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx);
1052    if (!usage)
1053       return;
1054 
1055    usage->has_complex_use = true;
1056 }
1057 
1058 static void
mark_deref_used(nir_deref_instr *deref, nir_component_mask_t comps_read, nir_component_mask_t comps_written, nir_deref_instr *copy_deref, struct hash_table *var_usage_map, nir_variable_mode modes, void *mem_ctx)1059 mark_deref_used(nir_deref_instr *deref,
1060                 nir_component_mask_t comps_read,
1061                 nir_component_mask_t comps_written,
1062                 nir_deref_instr *copy_deref,
1063                 struct hash_table *var_usage_map,
1064                 nir_variable_mode modes,
1065                 void *mem_ctx)
1066 {
1067    if (!nir_deref_mode_may_be(deref, modes))
1068       return;
1069 
1070    nir_variable *var = nir_deref_instr_get_variable(deref);
1071    if (var == NULL)
1072       return;
1073 
1074    struct vec_var_usage *usage =
1075       get_vec_var_usage(var, var_usage_map, true, mem_ctx);
1076    if (!usage)
1077       return;
1078 
1079    usage->comps_read |= comps_read & usage->all_comps;
1080    usage->comps_written |= comps_written & usage->all_comps;
1081 
1082    struct vec_var_usage *copy_usage = NULL;
1083    if (copy_deref) {
1084       copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
1085                                        true, mem_ctx);
1086       if (copy_usage) {
1087          if (usage->vars_copied == NULL) {
1088             usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
1089          }
1090          _mesa_set_add(usage->vars_copied, copy_usage);
1091       } else {
1092          usage->has_external_copy = true;
1093       }
1094    }
1095 
1096    nir_deref_path path;
1097    nir_deref_path_init(&path, deref, mem_ctx);
1098 
1099    nir_deref_path copy_path;
1100    if (copy_usage)
1101       nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
1102 
1103    unsigned copy_i = 0;
1104    for (unsigned i = 0; i < usage->num_levels; i++) {
1105       struct array_level_usage *level = &usage->levels[i];
1106       nir_deref_instr *deref = path.path[i + 1];
1107       assert(deref->deref_type == nir_deref_type_array ||
1108              deref->deref_type == nir_deref_type_array_wildcard);
1109 
1110       unsigned max_used;
1111       if (deref->deref_type == nir_deref_type_array) {
1112          max_used = nir_src_is_const(deref->arr.index) ?
1113                     nir_src_as_uint(deref->arr.index) : UINT_MAX;
1114       } else {
1115          /* For wildcards, we read or wrote the whole thing. */
1116          assert(deref->deref_type == nir_deref_type_array_wildcard);
1117          max_used = level->array_len - 1;
1118 
1119          if (copy_usage) {
1120             /* Match each wildcard level with the level on copy_usage */
1121             for (; copy_path.path[copy_i + 1]; copy_i++) {
1122                if (copy_path.path[copy_i + 1]->deref_type ==
1123                    nir_deref_type_array_wildcard)
1124                   break;
1125             }
1126             struct array_level_usage *copy_level =
1127                &copy_usage->levels[copy_i++];
1128 
1129             if (level->levels_copied == NULL) {
1130                level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1131             }
1132             _mesa_set_add(level->levels_copied, copy_level);
1133          } else {
1134             /* We have a wildcard and it comes from a variable we aren't
1135              * tracking; flag it and we'll know to not shorten this array.
1136              */
1137             level->has_external_copy = true;
1138          }
1139       }
1140 
1141       if (comps_written)
1142          level->max_written = MAX2(level->max_written, max_used);
1143       if (comps_read)
1144          level->max_read = MAX2(level->max_read, max_used);
1145    }
1146 }
1147 
1148 static bool
src_is_load_deref(nir_src src, nir_src deref_src)1149 src_is_load_deref(nir_src src, nir_src deref_src)
1150 {
1151    nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1152    if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1153       return false;
1154 
1155    assert(load->src[0].is_ssa);
1156 
1157    return load->src[0].ssa == deref_src.ssa;
1158 }
1159 
1160 /* Returns all non-self-referential components of a store instruction.  A
1161  * component is self-referential if it comes from the same component of a load
1162  * instruction on the same deref.  If the only data in a particular component
1163  * of a variable came directly from that component then it's undefined.  The
1164  * only way to get defined data into a component of a variable is for it to
1165  * get written there by something outside or from a different component.
1166  *
1167  * This is a fairly common pattern in shaders that come from either GLSL IR or
1168  * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1169  * load-vec-store.
1170  */
1171 static nir_component_mask_t
get_non_self_referential_store_comps(nir_intrinsic_instr *store)1172 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1173 {
1174    nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1175 
1176    assert(store->src[1].is_ssa);
1177    nir_instr *src_instr = store->src[1].ssa->parent_instr;
1178    if (src_instr->type != nir_instr_type_alu)
1179       return comps;
1180 
1181    nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1182 
1183    if (src_alu->op == nir_op_mov) {
1184       /* If it's just a swizzle of a load from the same deref, discount any
1185        * channels that don't move in the swizzle.
1186        */
1187       if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1188          for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1189             if (src_alu->src[0].swizzle[i] == i)
1190                comps &= ~(1u << i);
1191          }
1192       }
1193    } else if (nir_op_is_vec(src_alu->op)) {
1194       /* If it's a vec, discount any channels that are just loads from the
1195        * same deref put in the same spot.
1196        */
1197       for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1198          if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1199              src_alu->src[i].swizzle[0] == i)
1200             comps &= ~(1u << i);
1201       }
1202    }
1203 
1204    return comps;
1205 }
1206 
1207 static void
find_used_components_impl(nir_function_impl *impl, struct hash_table *var_usage_map, nir_variable_mode modes, void *mem_ctx)1208 find_used_components_impl(nir_function_impl *impl,
1209                           struct hash_table *var_usage_map,
1210                           nir_variable_mode modes,
1211                           void *mem_ctx)
1212 {
1213    nir_foreach_block(block, impl) {
1214       nir_foreach_instr(instr, block) {
1215          if (instr->type == nir_instr_type_deref) {
1216             mark_deref_if_complex(nir_instr_as_deref(instr),
1217                                   var_usage_map, modes, mem_ctx);
1218          }
1219 
1220          if (instr->type != nir_instr_type_intrinsic)
1221             continue;
1222 
1223          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1224          switch (intrin->intrinsic) {
1225          case nir_intrinsic_load_deref:
1226             mark_deref_used(nir_src_as_deref(intrin->src[0]),
1227                             nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1228                             NULL, var_usage_map, modes, mem_ctx);
1229             break;
1230 
1231          case nir_intrinsic_store_deref:
1232             mark_deref_used(nir_src_as_deref(intrin->src[0]),
1233                             0, get_non_self_referential_store_comps(intrin),
1234                             NULL, var_usage_map, modes, mem_ctx);
1235             break;
1236 
1237          case nir_intrinsic_copy_deref: {
1238             /* Just mark everything used for copies. */
1239             nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1240             nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1241             mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1242             mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1243             break;
1244          }
1245 
1246          default:
1247             break;
1248          }
1249       }
1250    }
1251 }
1252 
1253 static bool
shrink_vec_var_list(struct exec_list *vars, nir_variable_mode mode, struct hash_table *var_usage_map)1254 shrink_vec_var_list(struct exec_list *vars,
1255                     nir_variable_mode mode,
1256                     struct hash_table *var_usage_map)
1257 {
1258    /* Initialize the components kept field of each variable.  This is the
1259     * AND of the components written and components read.  If a component is
1260     * written but never read, it's dead.  If it is read but never written,
1261     * then all values read are undefined garbage and we may as well not read
1262     * them.
1263     *
1264     * The same logic applies to the array length.  We make the array length
1265     * the minimum needed required length between read and write and plan to
1266     * discard any OOB access.  The one exception here is indirect writes
1267     * because we don't know where they will land and we can't shrink an array
1268     * with indirect writes because previously in-bounds writes may become
1269     * out-of-bounds and have undefined behavior.
1270     *
1271     * Also, if we have a copy that to/from something we can't shrink, we need
1272     * to leave components and array_len of any wildcards alone.
1273     */
1274    nir_foreach_variable_in_list(var, vars) {
1275       if (var->data.mode != mode)
1276          continue;
1277 
1278       struct vec_var_usage *usage =
1279          get_vec_var_usage(var, var_usage_map, false, NULL);
1280       if (!usage)
1281          continue;
1282 
1283       assert(usage->comps_kept == 0);
1284       if (usage->has_external_copy || usage->has_complex_use)
1285          usage->comps_kept = usage->all_comps;
1286       else
1287          usage->comps_kept = usage->comps_read & usage->comps_written;
1288 
1289       for (unsigned i = 0; i < usage->num_levels; i++) {
1290          struct array_level_usage *level = &usage->levels[i];
1291          assert(level->array_len > 0);
1292 
1293          if (level->max_written == UINT_MAX || level->has_external_copy ||
1294              usage->has_complex_use)
1295             continue; /* Can't shrink */
1296 
1297          unsigned max_used = MIN2(level->max_read, level->max_written);
1298          level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1299       }
1300    }
1301 
1302    /* In order for variable copies to work, we have to have the same data type
1303     * on the source and the destination.  In order to satisfy this, we run a
1304     * little fixed-point algorithm to transitively ensure that we get enough
1305     * components and array elements for this to hold for all copies.
1306     */
1307    bool fp_progress;
1308    do {
1309       fp_progress = false;
1310       nir_foreach_variable_in_list(var, vars) {
1311          if (var->data.mode != mode)
1312             continue;
1313 
1314          struct vec_var_usage *var_usage =
1315             get_vec_var_usage(var, var_usage_map, false, NULL);
1316          if (!var_usage || !var_usage->vars_copied)
1317             continue;
1318 
1319          set_foreach(var_usage->vars_copied, copy_entry) {
1320             struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1321             if (copy_usage->comps_kept != var_usage->comps_kept) {
1322                nir_component_mask_t comps_kept =
1323                   (var_usage->comps_kept | copy_usage->comps_kept);
1324                var_usage->comps_kept = comps_kept;
1325                copy_usage->comps_kept = comps_kept;
1326                fp_progress = true;
1327             }
1328          }
1329 
1330          for (unsigned i = 0; i < var_usage->num_levels; i++) {
1331             struct array_level_usage *var_level = &var_usage->levels[i];
1332             if (!var_level->levels_copied)
1333                continue;
1334 
1335             set_foreach(var_level->levels_copied, copy_entry) {
1336                struct array_level_usage *copy_level = (void *)copy_entry->key;
1337                if (var_level->array_len != copy_level->array_len) {
1338                   unsigned array_len =
1339                      MAX2(var_level->array_len, copy_level->array_len);
1340                   var_level->array_len = array_len;
1341                   copy_level->array_len = array_len;
1342                   fp_progress = true;
1343                }
1344             }
1345          }
1346       }
1347    } while (fp_progress);
1348 
1349    bool vars_shrunk = false;
1350    nir_foreach_variable_in_list_safe(var, vars) {
1351       if (var->data.mode != mode)
1352          continue;
1353 
1354       struct vec_var_usage *usage =
1355          get_vec_var_usage(var, var_usage_map, false, NULL);
1356       if (!usage)
1357          continue;
1358 
1359       bool shrunk = false;
1360       const struct glsl_type *vec_type = var->type;
1361       for (unsigned i = 0; i < usage->num_levels; i++) {
1362          /* If we've reduced the array to zero elements at some level, just
1363           * set comps_kept to 0 and delete the variable.
1364           */
1365          if (usage->levels[i].array_len == 0) {
1366             usage->comps_kept = 0;
1367             break;
1368          }
1369 
1370          assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1371          if (usage->levels[i].array_len < glsl_get_length(vec_type))
1372             shrunk = true;
1373          vec_type = glsl_get_array_element(vec_type);
1374       }
1375       assert(glsl_type_is_vector_or_scalar(vec_type));
1376 
1377       assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1378       if (usage->comps_kept != usage->all_comps)
1379          shrunk = true;
1380 
1381       if (usage->comps_kept == 0) {
1382          /* This variable is dead, remove it */
1383          vars_shrunk = true;
1384          exec_node_remove(&var->node);
1385          continue;
1386       }
1387 
1388       if (!shrunk) {
1389          /* This variable doesn't need to be shrunk.  Remove it from the
1390           * hash table so later steps will ignore it.
1391           */
1392          _mesa_hash_table_remove_key(var_usage_map, var);
1393          continue;
1394       }
1395 
1396       /* Build the new var type */
1397       unsigned new_num_comps = util_bitcount(usage->comps_kept);
1398       const struct glsl_type *new_type =
1399          glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1400       for (int i = usage->num_levels - 1; i >= 0; i--) {
1401          assert(usage->levels[i].array_len > 0);
1402          /* If the original type was a matrix type, we'd like to keep that so
1403           * we don't convert matrices into arrays.
1404           */
1405          if (i == usage->num_levels - 1 &&
1406              glsl_type_is_matrix(glsl_without_array(var->type)) &&
1407              new_num_comps > 1 && usage->levels[i].array_len > 1) {
1408             new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1409                                         new_num_comps,
1410                                         usage->levels[i].array_len);
1411          } else {
1412             new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1413          }
1414       }
1415       var->type = new_type;
1416 
1417       vars_shrunk = true;
1418    }
1419 
1420    return vars_shrunk;
1421 }
1422 
1423 static bool
vec_deref_is_oob(nir_deref_instr *deref, struct vec_var_usage *usage)1424 vec_deref_is_oob(nir_deref_instr *deref,
1425                  struct vec_var_usage *usage)
1426 {
1427    nir_deref_path path;
1428    nir_deref_path_init(&path, deref, NULL);
1429 
1430    bool oob = false;
1431    for (unsigned i = 0; i < usage->num_levels; i++) {
1432       nir_deref_instr *p = path.path[i + 1];
1433       if (p->deref_type == nir_deref_type_array_wildcard)
1434          continue;
1435 
1436       if (nir_src_is_const(p->arr.index) &&
1437           nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1438          oob = true;
1439          break;
1440       }
1441    }
1442 
1443    nir_deref_path_finish(&path);
1444 
1445    return oob;
1446 }
1447 
1448 static bool
vec_deref_is_dead_or_oob(nir_deref_instr *deref, struct hash_table *var_usage_map, nir_variable_mode modes)1449 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1450                          struct hash_table *var_usage_map,
1451                          nir_variable_mode modes)
1452 {
1453    struct vec_var_usage *usage =
1454       get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1455    if (!usage)
1456       return false;
1457 
1458    return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1459 }
1460 
1461 static void
shrink_vec_var_access_impl(nir_function_impl *impl, struct hash_table *var_usage_map, nir_variable_mode modes)1462 shrink_vec_var_access_impl(nir_function_impl *impl,
1463                            struct hash_table *var_usage_map,
1464                            nir_variable_mode modes)
1465 {
1466    nir_builder b;
1467    nir_builder_init(&b, impl);
1468 
1469    nir_foreach_block(block, impl) {
1470       nir_foreach_instr_safe(instr, block) {
1471          switch (instr->type) {
1472          case nir_instr_type_deref: {
1473             nir_deref_instr *deref = nir_instr_as_deref(instr);
1474             if (!nir_deref_mode_may_be(deref, modes))
1475                break;
1476 
1477             /* Clean up any dead derefs we find lying around.  They may refer
1478              * to variables we've deleted.
1479              */
1480             if (nir_deref_instr_remove_if_unused(deref))
1481                break;
1482 
1483             /* Update the type in the deref to keep the types consistent as
1484              * you walk down the chain.  We don't need to check if this is one
1485              * of the derefs we're shrinking because this is a no-op if it
1486              * isn't.  The worst that could happen is that we accidentally fix
1487              * an invalid deref.
1488              */
1489             if (deref->deref_type == nir_deref_type_var) {
1490                deref->type = deref->var->type;
1491             } else if (deref->deref_type == nir_deref_type_array ||
1492                        deref->deref_type == nir_deref_type_array_wildcard) {
1493                nir_deref_instr *parent = nir_deref_instr_parent(deref);
1494                assert(glsl_type_is_array(parent->type) ||
1495                       glsl_type_is_matrix(parent->type));
1496                deref->type = glsl_get_array_element(parent->type);
1497             }
1498             break;
1499          }
1500 
1501          case nir_instr_type_intrinsic: {
1502             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1503 
1504             /* If we have a copy whose source or destination has been deleted
1505              * because we determined the variable was dead, then we just
1506              * delete the copy instruction.  If the source variable was dead
1507              * then it was writing undefined garbage anyway and if it's the
1508              * destination variable that's dead then the write isn't needed.
1509              */
1510             if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1511                nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1512                nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1513                if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1514                    vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1515                   nir_instr_remove(&intrin->instr);
1516                   nir_deref_instr_remove_if_unused(dst);
1517                   nir_deref_instr_remove_if_unused(src);
1518                }
1519                continue;
1520             }
1521 
1522             if (intrin->intrinsic != nir_intrinsic_load_deref &&
1523                 intrin->intrinsic != nir_intrinsic_store_deref)
1524                continue;
1525 
1526             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1527             if (!nir_deref_mode_may_be(deref, modes))
1528                continue;
1529 
1530             struct vec_var_usage *usage =
1531                get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1532             if (!usage)
1533                continue;
1534 
1535             if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1536                if (intrin->intrinsic == nir_intrinsic_load_deref) {
1537                   nir_ssa_def *u =
1538                      nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1539                                        intrin->dest.ssa.bit_size);
1540                   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1541                                            u);
1542                }
1543                nir_instr_remove(&intrin->instr);
1544                nir_deref_instr_remove_if_unused(deref);
1545                continue;
1546             }
1547 
1548             /* If we're not dropping any components, there's no need to
1549              * compact vectors.
1550              */
1551             if (usage->comps_kept == usage->all_comps)
1552                continue;
1553 
1554             if (intrin->intrinsic == nir_intrinsic_load_deref) {
1555                b.cursor = nir_after_instr(&intrin->instr);
1556 
1557                nir_ssa_def *undef =
1558                   nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1559                nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1560                unsigned c = 0;
1561                for (unsigned i = 0; i < intrin->num_components; i++) {
1562                   if (usage->comps_kept & (1u << i))
1563                      vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1564                   else
1565                      vec_srcs[i] = undef;
1566                }
1567                nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1568 
1569                nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1570                                               vec,
1571                                               vec->parent_instr);
1572 
1573                /* The SSA def is now only used by the swizzle.  It's safe to
1574                 * shrink the number of components.
1575                 */
1576                assert(list_length(&intrin->dest.ssa.uses) == c);
1577                intrin->num_components = c;
1578                intrin->dest.ssa.num_components = c;
1579             } else {
1580                nir_component_mask_t write_mask =
1581                   nir_intrinsic_write_mask(intrin);
1582 
1583                unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1584                nir_component_mask_t new_write_mask = 0;
1585                unsigned c = 0;
1586                for (unsigned i = 0; i < intrin->num_components; i++) {
1587                   if (usage->comps_kept & (1u << i)) {
1588                      swizzle[c] = i;
1589                      if (write_mask & (1u << i))
1590                         new_write_mask |= 1u << c;
1591                      c++;
1592                   }
1593                }
1594 
1595                b.cursor = nir_before_instr(&intrin->instr);
1596 
1597                nir_ssa_def *swizzled =
1598                   nir_swizzle(&b, intrin->src[1].ssa, swizzle, c);
1599 
1600                /* Rewrite to use the compacted source */
1601                nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1602                                      nir_src_for_ssa(swizzled));
1603                nir_intrinsic_set_write_mask(intrin, new_write_mask);
1604                intrin->num_components = c;
1605             }
1606             break;
1607          }
1608 
1609          default:
1610             break;
1611          }
1612       }
1613    }
1614 }
1615 
1616 static bool
function_impl_has_vars_with_modes(nir_function_impl *impl, nir_variable_mode modes)1617 function_impl_has_vars_with_modes(nir_function_impl *impl,
1618                                   nir_variable_mode modes)
1619 {
1620    nir_shader *shader = impl->function->shader;
1621 
1622    if (modes & ~nir_var_function_temp) {
1623       nir_foreach_variable_with_modes(var, shader,
1624                                       modes & ~nir_var_function_temp)
1625          return true;
1626    }
1627 
1628    if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1629       return true;
1630 
1631    return false;
1632 }
1633 
1634 /** Attempt to shrink arrays of vectors
1635  *
1636  * This pass looks at variables which contain a vector or an array (possibly
1637  * multiple dimensions) of vectors and attempts to lower to a smaller vector
1638  * or array.  If the pass can prove that a component of a vector (or array of
1639  * vectors) is never really used, then that component will be removed.
1640  * Similarly, the pass attempts to shorten arrays based on what elements it
1641  * can prove are never read or never contain valid data.
1642  */
1643 bool
nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)1644 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1645 {
1646    assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1647 
1648    void *mem_ctx = ralloc_context(NULL);
1649 
1650    struct hash_table *var_usage_map =
1651       _mesa_pointer_hash_table_create(mem_ctx);
1652 
1653    bool has_vars_to_shrink = false;
1654    nir_foreach_function(function, shader) {
1655       if (!function->impl)
1656          continue;
1657 
1658       /* Don't even bother crawling the IR if we don't have any variables.
1659        * Given that this pass deletes any unused variables, it's likely that
1660        * we will be in this scenario eventually.
1661        */
1662       if (function_impl_has_vars_with_modes(function->impl, modes)) {
1663          has_vars_to_shrink = true;
1664          find_used_components_impl(function->impl, var_usage_map,
1665                                    modes, mem_ctx);
1666       }
1667    }
1668    if (!has_vars_to_shrink) {
1669       ralloc_free(mem_ctx);
1670       nir_shader_preserve_all_metadata(shader);
1671       return false;
1672    }
1673 
1674    bool globals_shrunk = false;
1675    if (modes & nir_var_shader_temp) {
1676       globals_shrunk = shrink_vec_var_list(&shader->variables,
1677                                            nir_var_shader_temp,
1678                                            var_usage_map);
1679    }
1680 
1681    bool progress = false;
1682    nir_foreach_function(function, shader) {
1683       if (!function->impl)
1684          continue;
1685 
1686       bool locals_shrunk = false;
1687       if (modes & nir_var_function_temp) {
1688          locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1689                                              nir_var_function_temp,
1690                                              var_usage_map);
1691       }
1692 
1693       if (globals_shrunk || locals_shrunk) {
1694          shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1695 
1696          nir_metadata_preserve(function->impl, nir_metadata_block_index |
1697                                                nir_metadata_dominance);
1698          progress = true;
1699       } else {
1700          nir_metadata_preserve(function->impl, nir_metadata_all);
1701       }
1702    }
1703 
1704    ralloc_free(mem_ctx);
1705 
1706    return progress;
1707 }
1708