1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/set.h"
30 #include "util/u_math.h"
31
32 static struct set *
get_complex_used_vars(nir_shader *shader, void *mem_ctx)33 get_complex_used_vars(nir_shader *shader, void *mem_ctx)
34 {
35 struct set *complex_vars = _mesa_pointer_set_create(mem_ctx);
36
37 nir_foreach_function(function, shader) {
38 if (!function->impl)
39 continue;
40
41 nir_foreach_block(block, function->impl) {
42 nir_foreach_instr(instr, block) {
43 if (instr->type != nir_instr_type_deref)
44 continue;
45
46 nir_deref_instr *deref = nir_instr_as_deref(instr);
47
48 /* We only need to consider var derefs because
49 * nir_deref_instr_has_complex_use is recursive.
50 */
51 if (deref->deref_type == nir_deref_type_var &&
52 nir_deref_instr_has_complex_use(deref, 0))
53 _mesa_set_add(complex_vars, deref->var);
54 }
55 }
56 }
57
58 return complex_vars;
59 }
60
61 struct split_var_state {
62 void *mem_ctx;
63
64 nir_shader *shader;
65 nir_function_impl *impl;
66
67 nir_variable *base_var;
68 };
69
70 struct field {
71 struct field *parent;
72
73 const struct glsl_type *type;
74
75 unsigned num_fields;
76 struct field *fields;
77
78 nir_variable *var;
79 };
80
81 static int
num_array_levels_in_array_of_vector_type(const struct glsl_type *type)82 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
83 {
84 int num_levels = 0;
85 while (true) {
86 if (glsl_type_is_array_or_matrix(type)) {
87 num_levels++;
88 type = glsl_get_array_element(type);
89 } else if (glsl_type_is_vector_or_scalar(type)) {
90 return num_levels;
91 } else {
92 /* Not an array of vectors */
93 return -1;
94 }
95 }
96 }
97
98 static void
init_field_for_type(struct field *field, struct field *parent, const struct glsl_type *type, const char *name, struct split_var_state *state)99 init_field_for_type(struct field *field, struct field *parent,
100 const struct glsl_type *type,
101 const char *name,
102 struct split_var_state *state)
103 {
104 *field = (struct field) {
105 .parent = parent,
106 .type = type,
107 };
108
109 const struct glsl_type *struct_type = glsl_without_array(type);
110 if (glsl_type_is_struct_or_ifc(struct_type)) {
111 field->num_fields = glsl_get_length(struct_type),
112 field->fields = ralloc_array(state->mem_ctx, struct field,
113 field->num_fields);
114 for (unsigned i = 0; i < field->num_fields; i++) {
115 char *field_name = NULL;
116 if (name) {
117 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
118 glsl_get_struct_elem_name(struct_type, i));
119 } else {
120 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
121 glsl_get_type_name(struct_type),
122 glsl_get_struct_elem_name(struct_type, i));
123 }
124 init_field_for_type(&field->fields[i], field,
125 glsl_get_struct_field(struct_type, i),
126 field_name, state);
127 }
128 } else {
129 const struct glsl_type *var_type = type;
130 for (struct field *f = field->parent; f; f = f->parent)
131 var_type = glsl_type_wrap_in_arrays(var_type, f->type);
132
133 nir_variable_mode mode = state->base_var->data.mode;
134 if (mode == nir_var_function_temp) {
135 field->var = nir_local_variable_create(state->impl, var_type, name);
136 } else {
137 field->var = nir_variable_create(state->shader, mode, var_type, name);
138 }
139 field->var->data.ray_query = state->base_var->data.ray_query;
140 }
141 }
142
143 static bool
split_var_list_structs(nir_shader *shader, nir_function_impl *impl, struct exec_list *vars, nir_variable_mode mode, struct hash_table *var_field_map, struct set **complex_vars, void *mem_ctx)144 split_var_list_structs(nir_shader *shader,
145 nir_function_impl *impl,
146 struct exec_list *vars,
147 nir_variable_mode mode,
148 struct hash_table *var_field_map,
149 struct set **complex_vars,
150 void *mem_ctx)
151 {
152 struct split_var_state state = {
153 .mem_ctx = mem_ctx,
154 .shader = shader,
155 .impl = impl,
156 };
157
158 struct exec_list split_vars;
159 exec_list_make_empty(&split_vars);
160
161 /* To avoid list confusion (we'll be adding things as we split variables),
162 * pull all of the variables we plan to split off of the list
163 */
164 nir_foreach_variable_in_list_safe(var, vars) {
165 if (var->data.mode != mode)
166 continue;
167
168 if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
169 continue;
170
171 if (*complex_vars == NULL)
172 *complex_vars = get_complex_used_vars(shader, mem_ctx);
173
174 /* We can't split a variable that's referenced with deref that has any
175 * sort of complex usage.
176 */
177 if (_mesa_set_search(*complex_vars, var))
178 continue;
179
180 exec_node_remove(&var->node);
181 exec_list_push_tail(&split_vars, &var->node);
182 }
183
184 nir_foreach_variable_in_list(var, &split_vars) {
185 state.base_var = var;
186
187 struct field *root_field = ralloc(mem_ctx, struct field);
188 init_field_for_type(root_field, NULL, var->type, var->name, &state);
189 _mesa_hash_table_insert(var_field_map, var, root_field);
190 }
191
192 return !exec_list_is_empty(&split_vars);
193 }
194
195 static void
split_struct_derefs_impl(nir_function_impl *impl, struct hash_table *var_field_map, nir_variable_mode modes, void *mem_ctx)196 split_struct_derefs_impl(nir_function_impl *impl,
197 struct hash_table *var_field_map,
198 nir_variable_mode modes,
199 void *mem_ctx)
200 {
201 nir_builder b;
202 nir_builder_init(&b, impl);
203
204 nir_foreach_block(block, impl) {
205 nir_foreach_instr_safe(instr, block) {
206 if (instr->type != nir_instr_type_deref)
207 continue;
208
209 nir_deref_instr *deref = nir_instr_as_deref(instr);
210 if (!nir_deref_mode_may_be(deref, modes))
211 continue;
212
213 /* Clean up any dead derefs we find lying around. They may refer to
214 * variables we're planning to split.
215 */
216 if (nir_deref_instr_remove_if_unused(deref))
217 continue;
218
219 if (!glsl_type_is_vector_or_scalar(deref->type))
220 continue;
221
222 nir_variable *base_var = nir_deref_instr_get_variable(deref);
223 /* If we can't chase back to the variable, then we're a complex use.
224 * This should have been detected by get_complex_used_vars() and the
225 * variable should not have been split. However, we have no way of
226 * knowing that here, so we just have to trust it.
227 */
228 if (base_var == NULL)
229 continue;
230
231 struct hash_entry *entry =
232 _mesa_hash_table_search(var_field_map, base_var);
233 if (!entry)
234 continue;
235
236 struct field *root_field = entry->data;
237
238 nir_deref_path path;
239 nir_deref_path_init(&path, deref, mem_ctx);
240
241 struct field *tail_field = root_field;
242 for (unsigned i = 0; path.path[i]; i++) {
243 if (path.path[i]->deref_type != nir_deref_type_struct)
244 continue;
245
246 assert(i > 0);
247 assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
248 assert(path.path[i - 1]->type ==
249 glsl_without_array(tail_field->type));
250
251 tail_field = &tail_field->fields[path.path[i]->strct.index];
252 }
253 nir_variable *split_var = tail_field->var;
254
255 nir_deref_instr *new_deref = NULL;
256 for (unsigned i = 0; path.path[i]; i++) {
257 nir_deref_instr *p = path.path[i];
258 b.cursor = nir_after_instr(&p->instr);
259
260 switch (p->deref_type) {
261 case nir_deref_type_var:
262 assert(new_deref == NULL);
263 new_deref = nir_build_deref_var(&b, split_var);
264 break;
265
266 case nir_deref_type_array:
267 case nir_deref_type_array_wildcard:
268 new_deref = nir_build_deref_follower(&b, new_deref, p);
269 break;
270
271 case nir_deref_type_struct:
272 /* Nothing to do; we're splitting structs */
273 break;
274
275 default:
276 unreachable("Invalid deref type in path");
277 }
278 }
279
280 assert(new_deref->type == deref->type);
281 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
282 &new_deref->dest.ssa);
283 nir_deref_instr_remove_if_unused(deref);
284 }
285 }
286 }
287
288 /** A pass for splitting structs into multiple variables
289 *
290 * This pass splits arrays of structs into multiple variables, one for each
291 * (possibly nested) structure member. After this pass completes, no
292 * variables of the given mode will contain a struct type.
293 */
294 bool
nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)295 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
296 {
297 void *mem_ctx = ralloc_context(NULL);
298 struct hash_table *var_field_map =
299 _mesa_pointer_hash_table_create(mem_ctx);
300 struct set *complex_vars = NULL;
301
302 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
303
304 bool has_global_splits = false;
305 if (modes & nir_var_shader_temp) {
306 has_global_splits = split_var_list_structs(shader, NULL,
307 &shader->variables,
308 nir_var_shader_temp,
309 var_field_map,
310 &complex_vars,
311 mem_ctx);
312 }
313
314 bool progress = false;
315 nir_foreach_function(function, shader) {
316 if (!function->impl)
317 continue;
318
319 bool has_local_splits = false;
320 if (modes & nir_var_function_temp) {
321 has_local_splits = split_var_list_structs(shader, function->impl,
322 &function->impl->locals,
323 nir_var_function_temp,
324 var_field_map,
325 &complex_vars,
326 mem_ctx);
327 }
328
329 if (has_global_splits || has_local_splits) {
330 split_struct_derefs_impl(function->impl, var_field_map,
331 modes, mem_ctx);
332
333 nir_metadata_preserve(function->impl, nir_metadata_block_index |
334 nir_metadata_dominance);
335 progress = true;
336 } else {
337 nir_metadata_preserve(function->impl, nir_metadata_all);
338 }
339 }
340
341 ralloc_free(mem_ctx);
342
343 return progress;
344 }
345
346 struct array_level_info {
347 unsigned array_len;
348 bool split;
349 };
350
351 struct array_split {
352 /* Only set if this is the tail end of the splitting */
353 nir_variable *var;
354
355 unsigned num_splits;
356 struct array_split *splits;
357 };
358
359 struct array_var_info {
360 nir_variable *base_var;
361
362 const struct glsl_type *split_var_type;
363
364 bool split_var;
365 struct array_split root_split;
366
367 unsigned num_levels;
368 struct array_level_info levels[0];
369 };
370
371 static bool
init_var_list_array_infos(nir_shader *shader, struct exec_list *vars, nir_variable_mode mode, struct hash_table *var_info_map, struct set **complex_vars, void *mem_ctx)372 init_var_list_array_infos(nir_shader *shader,
373 struct exec_list *vars,
374 nir_variable_mode mode,
375 struct hash_table *var_info_map,
376 struct set **complex_vars,
377 void *mem_ctx)
378 {
379 bool has_array = false;
380
381 nir_foreach_variable_in_list(var, vars) {
382 if (var->data.mode != mode)
383 continue;
384
385 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
386 if (num_levels <= 0)
387 continue;
388
389 if (*complex_vars == NULL)
390 *complex_vars = get_complex_used_vars(shader, mem_ctx);
391
392 /* We can't split a variable that's referenced with deref that has any
393 * sort of complex usage.
394 */
395 if (_mesa_set_search(*complex_vars, var))
396 continue;
397
398 struct array_var_info *info =
399 rzalloc_size(mem_ctx, sizeof(*info) +
400 num_levels * sizeof(info->levels[0]));
401
402 info->base_var = var;
403 info->num_levels = num_levels;
404
405 const struct glsl_type *type = var->type;
406 for (int i = 0; i < num_levels; i++) {
407 info->levels[i].array_len = glsl_get_length(type);
408 type = glsl_get_array_element(type);
409
410 /* All levels start out initially as split */
411 info->levels[i].split = true;
412 }
413
414 _mesa_hash_table_insert(var_info_map, var, info);
415 has_array = true;
416 }
417
418 return has_array;
419 }
420
421 static struct array_var_info *
get_array_var_info(nir_variable *var, struct hash_table *var_info_map)422 get_array_var_info(nir_variable *var,
423 struct hash_table *var_info_map)
424 {
425 struct hash_entry *entry =
426 _mesa_hash_table_search(var_info_map, var);
427 return entry ? entry->data : NULL;
428 }
429
430 static struct array_var_info *
get_array_deref_info(nir_deref_instr *deref, struct hash_table *var_info_map, nir_variable_mode modes)431 get_array_deref_info(nir_deref_instr *deref,
432 struct hash_table *var_info_map,
433 nir_variable_mode modes)
434 {
435 if (!nir_deref_mode_may_be(deref, modes))
436 return NULL;
437
438 nir_variable *var = nir_deref_instr_get_variable(deref);
439 if (var == NULL)
440 return NULL;
441
442 return get_array_var_info(var, var_info_map);
443 }
444
445 static void
mark_array_deref_used(nir_deref_instr *deref, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx)446 mark_array_deref_used(nir_deref_instr *deref,
447 struct hash_table *var_info_map,
448 nir_variable_mode modes,
449 void *mem_ctx)
450 {
451 struct array_var_info *info =
452 get_array_deref_info(deref, var_info_map, modes);
453 if (!info)
454 return;
455
456 nir_deref_path path;
457 nir_deref_path_init(&path, deref, mem_ctx);
458
459 /* Walk the path and look for indirects. If we have an array deref with an
460 * indirect, mark the given level as not being split.
461 */
462 for (unsigned i = 0; i < info->num_levels; i++) {
463 nir_deref_instr *p = path.path[i + 1];
464 if (p->deref_type == nir_deref_type_array &&
465 !nir_src_is_const(p->arr.index))
466 info->levels[i].split = false;
467 }
468 }
469
470 static void
mark_array_usage_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx)471 mark_array_usage_impl(nir_function_impl *impl,
472 struct hash_table *var_info_map,
473 nir_variable_mode modes,
474 void *mem_ctx)
475 {
476 nir_foreach_block(block, impl) {
477 nir_foreach_instr(instr, block) {
478 if (instr->type != nir_instr_type_intrinsic)
479 continue;
480
481 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
482 switch (intrin->intrinsic) {
483 case nir_intrinsic_copy_deref:
484 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
485 var_info_map, modes, mem_ctx);
486 FALLTHROUGH;
487
488 case nir_intrinsic_load_deref:
489 case nir_intrinsic_store_deref:
490 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
491 var_info_map, modes, mem_ctx);
492 break;
493
494 default:
495 break;
496 }
497 }
498 }
499 }
500
501 static void
create_split_array_vars(struct array_var_info *var_info, unsigned level, struct array_split *split, const char *name, nir_shader *shader, nir_function_impl *impl, void *mem_ctx)502 create_split_array_vars(struct array_var_info *var_info,
503 unsigned level,
504 struct array_split *split,
505 const char *name,
506 nir_shader *shader,
507 nir_function_impl *impl,
508 void *mem_ctx)
509 {
510 while (level < var_info->num_levels && !var_info->levels[level].split) {
511 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
512 level++;
513 }
514
515 if (level == var_info->num_levels) {
516 /* We add parens to the variable name so it looks like "(foo[2][*])" so
517 * that further derefs will look like "(foo[2][*])[ssa_6]"
518 */
519 name = ralloc_asprintf(mem_ctx, "(%s)", name);
520
521 nir_variable_mode mode = var_info->base_var->data.mode;
522 if (mode == nir_var_function_temp) {
523 split->var = nir_local_variable_create(impl,
524 var_info->split_var_type, name);
525 } else {
526 split->var = nir_variable_create(shader, mode,
527 var_info->split_var_type, name);
528 }
529 split->var->data.ray_query = var_info->base_var->data.ray_query;
530 } else {
531 assert(var_info->levels[level].split);
532 split->num_splits = var_info->levels[level].array_len;
533 split->splits = rzalloc_array(mem_ctx, struct array_split,
534 split->num_splits);
535 for (unsigned i = 0; i < split->num_splits; i++) {
536 create_split_array_vars(var_info, level + 1, &split->splits[i],
537 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
538 shader, impl, mem_ctx);
539 }
540 }
541 }
542
543 static bool
split_var_list_arrays(nir_shader *shader, nir_function_impl *impl, struct exec_list *vars, nir_variable_mode mode, struct hash_table *var_info_map, void *mem_ctx)544 split_var_list_arrays(nir_shader *shader,
545 nir_function_impl *impl,
546 struct exec_list *vars,
547 nir_variable_mode mode,
548 struct hash_table *var_info_map,
549 void *mem_ctx)
550 {
551 struct exec_list split_vars;
552 exec_list_make_empty(&split_vars);
553
554 nir_foreach_variable_in_list_safe(var, vars) {
555 if (var->data.mode != mode)
556 continue;
557
558 struct array_var_info *info = get_array_var_info(var, var_info_map);
559 if (!info)
560 continue;
561
562 bool has_split = false;
563 const struct glsl_type *split_type =
564 glsl_without_array_or_matrix(var->type);
565 for (int i = info->num_levels - 1; i >= 0; i--) {
566 if (info->levels[i].split) {
567 has_split = true;
568 continue;
569 }
570
571 /* If the original type was a matrix type, we'd like to keep that so
572 * we don't convert matrices into arrays.
573 */
574 if (i == info->num_levels - 1 &&
575 glsl_type_is_matrix(glsl_without_array(var->type))) {
576 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
577 glsl_get_components(split_type),
578 info->levels[i].array_len);
579 } else {
580 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
581 }
582 }
583
584 if (has_split) {
585 info->split_var_type = split_type;
586 /* To avoid list confusion (we'll be adding things as we split
587 * variables), pull all of the variables we plan to split off of the
588 * main variable list.
589 */
590 exec_node_remove(&var->node);
591 exec_list_push_tail(&split_vars, &var->node);
592 } else {
593 assert(split_type == glsl_get_bare_type(var->type));
594 /* If we're not modifying this variable, delete the info so we skip
595 * it faster in later passes.
596 */
597 _mesa_hash_table_remove_key(var_info_map, var);
598 }
599 }
600
601 nir_foreach_variable_in_list(var, &split_vars) {
602 struct array_var_info *info = get_array_var_info(var, var_info_map);
603 create_split_array_vars(info, 0, &info->root_split, var->name,
604 shader, impl, mem_ctx);
605 }
606
607 return !exec_list_is_empty(&split_vars);
608 }
609
610 static bool
deref_has_split_wildcard(nir_deref_path *path, struct array_var_info *info)611 deref_has_split_wildcard(nir_deref_path *path,
612 struct array_var_info *info)
613 {
614 if (info == NULL)
615 return false;
616
617 assert(path->path[0]->var == info->base_var);
618 for (unsigned i = 0; i < info->num_levels; i++) {
619 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
620 info->levels[i].split)
621 return true;
622 }
623
624 return false;
625 }
626
627 static bool
array_path_is_out_of_bounds(nir_deref_path *path, struct array_var_info *info)628 array_path_is_out_of_bounds(nir_deref_path *path,
629 struct array_var_info *info)
630 {
631 if (info == NULL)
632 return false;
633
634 assert(path->path[0]->var == info->base_var);
635 for (unsigned i = 0; i < info->num_levels; i++) {
636 nir_deref_instr *p = path->path[i + 1];
637 if (p->deref_type == nir_deref_type_array_wildcard)
638 continue;
639
640 if (nir_src_is_const(p->arr.index) &&
641 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
642 return true;
643 }
644
645 return false;
646 }
647
648 static void
emit_split_copies(nir_builder *b, struct array_var_info *dst_info, nir_deref_path *dst_path, unsigned dst_level, nir_deref_instr *dst, struct array_var_info *src_info, nir_deref_path *src_path, unsigned src_level, nir_deref_instr *src)649 emit_split_copies(nir_builder *b,
650 struct array_var_info *dst_info, nir_deref_path *dst_path,
651 unsigned dst_level, nir_deref_instr *dst,
652 struct array_var_info *src_info, nir_deref_path *src_path,
653 unsigned src_level, nir_deref_instr *src)
654 {
655 nir_deref_instr *dst_p, *src_p;
656
657 while ((dst_p = dst_path->path[dst_level + 1])) {
658 if (dst_p->deref_type == nir_deref_type_array_wildcard)
659 break;
660
661 dst = nir_build_deref_follower(b, dst, dst_p);
662 dst_level++;
663 }
664
665 while ((src_p = src_path->path[src_level + 1])) {
666 if (src_p->deref_type == nir_deref_type_array_wildcard)
667 break;
668
669 src = nir_build_deref_follower(b, src, src_p);
670 src_level++;
671 }
672
673 if (src_p == NULL || dst_p == NULL) {
674 assert(src_p == NULL && dst_p == NULL);
675 nir_copy_deref(b, dst, src);
676 } else {
677 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
678 src_p->deref_type == nir_deref_type_array_wildcard);
679
680 if ((dst_info && dst_info->levels[dst_level].split) ||
681 (src_info && src_info->levels[src_level].split)) {
682 /* There are no indirects at this level on one of the source or the
683 * destination so we are lowering it.
684 */
685 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
686 glsl_get_length(src_path->path[src_level]->type));
687 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
688 for (unsigned i = 0; i < len; i++) {
689 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
690 nir_build_deref_array_imm(b, dst, i),
691 src_info, src_path, src_level + 1,
692 nir_build_deref_array_imm(b, src, i));
693 }
694 } else {
695 /* Neither side is being split so we just keep going */
696 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
697 nir_build_deref_array_wildcard(b, dst),
698 src_info, src_path, src_level + 1,
699 nir_build_deref_array_wildcard(b, src));
700 }
701 }
702 }
703
704 static void
split_array_copies_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx)705 split_array_copies_impl(nir_function_impl *impl,
706 struct hash_table *var_info_map,
707 nir_variable_mode modes,
708 void *mem_ctx)
709 {
710 nir_builder b;
711 nir_builder_init(&b, impl);
712
713 nir_foreach_block(block, impl) {
714 nir_foreach_instr_safe(instr, block) {
715 if (instr->type != nir_instr_type_intrinsic)
716 continue;
717
718 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
719 if (copy->intrinsic != nir_intrinsic_copy_deref)
720 continue;
721
722 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
723 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
724
725 struct array_var_info *dst_info =
726 get_array_deref_info(dst_deref, var_info_map, modes);
727 struct array_var_info *src_info =
728 get_array_deref_info(src_deref, var_info_map, modes);
729
730 if (!src_info && !dst_info)
731 continue;
732
733 nir_deref_path dst_path, src_path;
734 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
735 nir_deref_path_init(&src_path, src_deref, mem_ctx);
736
737 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
738 !deref_has_split_wildcard(&src_path, src_info))
739 continue;
740
741 b.cursor = nir_instr_remove(©->instr);
742
743 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
744 src_info, &src_path, 0, src_path.path[0]);
745 }
746 }
747 }
748
749 static void
split_array_access_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx)750 split_array_access_impl(nir_function_impl *impl,
751 struct hash_table *var_info_map,
752 nir_variable_mode modes,
753 void *mem_ctx)
754 {
755 nir_builder b;
756 nir_builder_init(&b, impl);
757
758 nir_foreach_block(block, impl) {
759 nir_foreach_instr_safe(instr, block) {
760 if (instr->type == nir_instr_type_deref) {
761 /* Clean up any dead derefs we find lying around. They may refer
762 * to variables we're planning to split.
763 */
764 nir_deref_instr *deref = nir_instr_as_deref(instr);
765 if (nir_deref_mode_may_be(deref, modes))
766 nir_deref_instr_remove_if_unused(deref);
767 continue;
768 }
769
770 if (instr->type != nir_instr_type_intrinsic)
771 continue;
772
773 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
774 if (intrin->intrinsic != nir_intrinsic_load_deref &&
775 intrin->intrinsic != nir_intrinsic_store_deref &&
776 intrin->intrinsic != nir_intrinsic_copy_deref)
777 continue;
778
779 const unsigned num_derefs =
780 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
781
782 for (unsigned d = 0; d < num_derefs; d++) {
783 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
784
785 struct array_var_info *info =
786 get_array_deref_info(deref, var_info_map, modes);
787 if (!info)
788 continue;
789
790 nir_deref_path path;
791 nir_deref_path_init(&path, deref, mem_ctx);
792
793 b.cursor = nir_before_instr(&intrin->instr);
794
795 if (array_path_is_out_of_bounds(&path, info)) {
796 /* If one of the derefs is out-of-bounds, we just delete the
797 * instruction. If a destination is out of bounds, then it may
798 * have been in-bounds prior to shrinking so we don't want to
799 * accidentally stomp something. However, we've already proven
800 * that it will never be read so it's safe to delete. If a
801 * source is out of bounds then it is loading random garbage.
802 * For loads, we replace their uses with an undef instruction
803 * and for copies we just delete the copy since it was writing
804 * undefined garbage anyway and we may as well leave the random
805 * garbage in the destination alone.
806 */
807 if (intrin->intrinsic == nir_intrinsic_load_deref) {
808 nir_ssa_def *u =
809 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
810 intrin->dest.ssa.bit_size);
811 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
812 u);
813 }
814 nir_instr_remove(&intrin->instr);
815 for (unsigned i = 0; i < num_derefs; i++)
816 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
817 break;
818 }
819
820 struct array_split *split = &info->root_split;
821 for (unsigned i = 0; i < info->num_levels; i++) {
822 if (info->levels[i].split) {
823 nir_deref_instr *p = path.path[i + 1];
824 unsigned index = nir_src_as_uint(p->arr.index);
825 assert(index < info->levels[i].array_len);
826 split = &split->splits[index];
827 }
828 }
829 assert(!split->splits && split->var);
830
831 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
832 for (unsigned i = 0; i < info->num_levels; i++) {
833 if (!info->levels[i].split) {
834 new_deref = nir_build_deref_follower(&b, new_deref,
835 path.path[i + 1]);
836 }
837 }
838 assert(new_deref->type == deref->type);
839
840 /* Rewrite the deref source to point to the split one */
841 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
842 nir_src_for_ssa(&new_deref->dest.ssa));
843 nir_deref_instr_remove_if_unused(deref);
844 }
845 }
846 }
847 }
848
849 /** A pass for splitting arrays of vectors into multiple variables
850 *
851 * This pass looks at arrays (possibly multiple levels) of vectors (not
852 * structures or other types) and tries to split them into piles of variables,
853 * one for each array element. The heuristic used is simple: If a given array
854 * level is never used with an indirect, that array level will get split.
855 *
856 * This pass probably could handles structures easily enough but making a pass
857 * that could see through an array of structures of arrays would be difficult
858 * so it's best to just run nir_split_struct_vars first.
859 */
860 bool
nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)861 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
862 {
863 void *mem_ctx = ralloc_context(NULL);
864 struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
865 struct set *complex_vars = NULL;
866
867 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
868
869 bool has_global_array = false;
870 if (modes & nir_var_shader_temp) {
871 has_global_array = init_var_list_array_infos(shader,
872 &shader->variables,
873 nir_var_shader_temp,
874 var_info_map,
875 &complex_vars,
876 mem_ctx);
877 }
878
879 bool has_any_array = false;
880 nir_foreach_function(function, shader) {
881 if (!function->impl)
882 continue;
883
884 bool has_local_array = false;
885 if (modes & nir_var_function_temp) {
886 has_local_array = init_var_list_array_infos(shader,
887 &function->impl->locals,
888 nir_var_function_temp,
889 var_info_map,
890 &complex_vars,
891 mem_ctx);
892 }
893
894 if (has_global_array || has_local_array) {
895 has_any_array = true;
896 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
897 }
898 }
899
900 /* If we failed to find any arrays of arrays, bail early. */
901 if (!has_any_array) {
902 ralloc_free(mem_ctx);
903 nir_shader_preserve_all_metadata(shader);
904 return false;
905 }
906
907 bool has_global_splits = false;
908 if (modes & nir_var_shader_temp) {
909 has_global_splits = split_var_list_arrays(shader, NULL,
910 &shader->variables,
911 nir_var_shader_temp,
912 var_info_map, mem_ctx);
913 }
914
915 bool progress = false;
916 nir_foreach_function(function, shader) {
917 if (!function->impl)
918 continue;
919
920 bool has_local_splits = false;
921 if (modes & nir_var_function_temp) {
922 has_local_splits = split_var_list_arrays(shader, function->impl,
923 &function->impl->locals,
924 nir_var_function_temp,
925 var_info_map, mem_ctx);
926 }
927
928 if (has_global_splits || has_local_splits) {
929 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
930 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
931
932 nir_metadata_preserve(function->impl, nir_metadata_block_index |
933 nir_metadata_dominance);
934 progress = true;
935 } else {
936 nir_metadata_preserve(function->impl, nir_metadata_all);
937 }
938 }
939
940 ralloc_free(mem_ctx);
941
942 return progress;
943 }
944
945 struct array_level_usage {
946 unsigned array_len;
947
948 /* The value UINT_MAX will be used to indicate an indirect */
949 unsigned max_read;
950 unsigned max_written;
951
952 /* True if there is a copy that isn't to/from a shrinkable array */
953 bool has_external_copy;
954 struct set *levels_copied;
955 };
956
957 struct vec_var_usage {
958 /* Convenience set of all components this variable has */
959 nir_component_mask_t all_comps;
960
961 nir_component_mask_t comps_read;
962 nir_component_mask_t comps_written;
963
964 nir_component_mask_t comps_kept;
965
966 /* True if there is a copy that isn't to/from a shrinkable vector */
967 bool has_external_copy;
968 bool has_complex_use;
969 struct set *vars_copied;
970
971 unsigned num_levels;
972 struct array_level_usage levels[0];
973 };
974
975 static struct vec_var_usage *
get_vec_var_usage(nir_variable *var, struct hash_table *var_usage_map, bool add_usage_entry, void *mem_ctx)976 get_vec_var_usage(nir_variable *var,
977 struct hash_table *var_usage_map,
978 bool add_usage_entry, void *mem_ctx)
979 {
980 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
981 if (entry)
982 return entry->data;
983
984 if (!add_usage_entry)
985 return NULL;
986
987 /* Check to make sure that we are working with an array of vectors. We
988 * don't bother to shrink single vectors because we figure that we can
989 * clean it up better with SSA than by inserting piles of vecN instructions
990 * to compact results.
991 */
992 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
993 if (num_levels < 1)
994 return NULL; /* Not an array of vectors */
995
996 struct vec_var_usage *usage =
997 rzalloc_size(mem_ctx, sizeof(*usage) +
998 num_levels * sizeof(usage->levels[0]));
999
1000 usage->num_levels = num_levels;
1001 const struct glsl_type *type = var->type;
1002 for (unsigned i = 0; i < num_levels; i++) {
1003 usage->levels[i].array_len = glsl_get_length(type);
1004 type = glsl_get_array_element(type);
1005 }
1006 assert(glsl_type_is_vector_or_scalar(type));
1007
1008 usage->all_comps = (1 << glsl_get_components(type)) - 1;
1009
1010 _mesa_hash_table_insert(var_usage_map, var, usage);
1011
1012 return usage;
1013 }
1014
1015 static struct vec_var_usage *
get_vec_deref_usage(nir_deref_instr *deref, struct hash_table *var_usage_map, nir_variable_mode modes, bool add_usage_entry, void *mem_ctx)1016 get_vec_deref_usage(nir_deref_instr *deref,
1017 struct hash_table *var_usage_map,
1018 nir_variable_mode modes,
1019 bool add_usage_entry, void *mem_ctx)
1020 {
1021 if (!nir_deref_mode_may_be(deref, modes))
1022 return NULL;
1023
1024 nir_variable *var = nir_deref_instr_get_variable(deref);
1025 if (var == NULL)
1026 return NULL;
1027
1028 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
1029 var_usage_map, add_usage_entry, mem_ctx);
1030 }
1031
1032 static void
mark_deref_if_complex(nir_deref_instr *deref, struct hash_table *var_usage_map, nir_variable_mode modes, void *mem_ctx)1033 mark_deref_if_complex(nir_deref_instr *deref,
1034 struct hash_table *var_usage_map,
1035 nir_variable_mode modes,
1036 void *mem_ctx)
1037 {
1038 /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1039 * recursive.
1040 */
1041 if (deref->deref_type != nir_deref_type_var)
1042 return;
1043
1044 if (!(deref->var->data.mode & modes))
1045 return;
1046
1047 if (!nir_deref_instr_has_complex_use(deref, 0))
1048 return;
1049
1050 struct vec_var_usage *usage =
1051 get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx);
1052 if (!usage)
1053 return;
1054
1055 usage->has_complex_use = true;
1056 }
1057
1058 static void
mark_deref_used(nir_deref_instr *deref, nir_component_mask_t comps_read, nir_component_mask_t comps_written, nir_deref_instr *copy_deref, struct hash_table *var_usage_map, nir_variable_mode modes, void *mem_ctx)1059 mark_deref_used(nir_deref_instr *deref,
1060 nir_component_mask_t comps_read,
1061 nir_component_mask_t comps_written,
1062 nir_deref_instr *copy_deref,
1063 struct hash_table *var_usage_map,
1064 nir_variable_mode modes,
1065 void *mem_ctx)
1066 {
1067 if (!nir_deref_mode_may_be(deref, modes))
1068 return;
1069
1070 nir_variable *var = nir_deref_instr_get_variable(deref);
1071 if (var == NULL)
1072 return;
1073
1074 struct vec_var_usage *usage =
1075 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
1076 if (!usage)
1077 return;
1078
1079 usage->comps_read |= comps_read & usage->all_comps;
1080 usage->comps_written |= comps_written & usage->all_comps;
1081
1082 struct vec_var_usage *copy_usage = NULL;
1083 if (copy_deref) {
1084 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
1085 true, mem_ctx);
1086 if (copy_usage) {
1087 if (usage->vars_copied == NULL) {
1088 usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
1089 }
1090 _mesa_set_add(usage->vars_copied, copy_usage);
1091 } else {
1092 usage->has_external_copy = true;
1093 }
1094 }
1095
1096 nir_deref_path path;
1097 nir_deref_path_init(&path, deref, mem_ctx);
1098
1099 nir_deref_path copy_path;
1100 if (copy_usage)
1101 nir_deref_path_init(©_path, copy_deref, mem_ctx);
1102
1103 unsigned copy_i = 0;
1104 for (unsigned i = 0; i < usage->num_levels; i++) {
1105 struct array_level_usage *level = &usage->levels[i];
1106 nir_deref_instr *deref = path.path[i + 1];
1107 assert(deref->deref_type == nir_deref_type_array ||
1108 deref->deref_type == nir_deref_type_array_wildcard);
1109
1110 unsigned max_used;
1111 if (deref->deref_type == nir_deref_type_array) {
1112 max_used = nir_src_is_const(deref->arr.index) ?
1113 nir_src_as_uint(deref->arr.index) : UINT_MAX;
1114 } else {
1115 /* For wildcards, we read or wrote the whole thing. */
1116 assert(deref->deref_type == nir_deref_type_array_wildcard);
1117 max_used = level->array_len - 1;
1118
1119 if (copy_usage) {
1120 /* Match each wildcard level with the level on copy_usage */
1121 for (; copy_path.path[copy_i + 1]; copy_i++) {
1122 if (copy_path.path[copy_i + 1]->deref_type ==
1123 nir_deref_type_array_wildcard)
1124 break;
1125 }
1126 struct array_level_usage *copy_level =
1127 ©_usage->levels[copy_i++];
1128
1129 if (level->levels_copied == NULL) {
1130 level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1131 }
1132 _mesa_set_add(level->levels_copied, copy_level);
1133 } else {
1134 /* We have a wildcard and it comes from a variable we aren't
1135 * tracking; flag it and we'll know to not shorten this array.
1136 */
1137 level->has_external_copy = true;
1138 }
1139 }
1140
1141 if (comps_written)
1142 level->max_written = MAX2(level->max_written, max_used);
1143 if (comps_read)
1144 level->max_read = MAX2(level->max_read, max_used);
1145 }
1146 }
1147
1148 static bool
src_is_load_deref(nir_src src, nir_src deref_src)1149 src_is_load_deref(nir_src src, nir_src deref_src)
1150 {
1151 nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1152 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1153 return false;
1154
1155 assert(load->src[0].is_ssa);
1156
1157 return load->src[0].ssa == deref_src.ssa;
1158 }
1159
1160 /* Returns all non-self-referential components of a store instruction. A
1161 * component is self-referential if it comes from the same component of a load
1162 * instruction on the same deref. If the only data in a particular component
1163 * of a variable came directly from that component then it's undefined. The
1164 * only way to get defined data into a component of a variable is for it to
1165 * get written there by something outside or from a different component.
1166 *
1167 * This is a fairly common pattern in shaders that come from either GLSL IR or
1168 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1169 * load-vec-store.
1170 */
1171 static nir_component_mask_t
get_non_self_referential_store_comps(nir_intrinsic_instr *store)1172 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1173 {
1174 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1175
1176 assert(store->src[1].is_ssa);
1177 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1178 if (src_instr->type != nir_instr_type_alu)
1179 return comps;
1180
1181 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1182
1183 if (src_alu->op == nir_op_mov) {
1184 /* If it's just a swizzle of a load from the same deref, discount any
1185 * channels that don't move in the swizzle.
1186 */
1187 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1188 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1189 if (src_alu->src[0].swizzle[i] == i)
1190 comps &= ~(1u << i);
1191 }
1192 }
1193 } else if (nir_op_is_vec(src_alu->op)) {
1194 /* If it's a vec, discount any channels that are just loads from the
1195 * same deref put in the same spot.
1196 */
1197 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1198 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1199 src_alu->src[i].swizzle[0] == i)
1200 comps &= ~(1u << i);
1201 }
1202 }
1203
1204 return comps;
1205 }
1206
1207 static void
find_used_components_impl(nir_function_impl *impl, struct hash_table *var_usage_map, nir_variable_mode modes, void *mem_ctx)1208 find_used_components_impl(nir_function_impl *impl,
1209 struct hash_table *var_usage_map,
1210 nir_variable_mode modes,
1211 void *mem_ctx)
1212 {
1213 nir_foreach_block(block, impl) {
1214 nir_foreach_instr(instr, block) {
1215 if (instr->type == nir_instr_type_deref) {
1216 mark_deref_if_complex(nir_instr_as_deref(instr),
1217 var_usage_map, modes, mem_ctx);
1218 }
1219
1220 if (instr->type != nir_instr_type_intrinsic)
1221 continue;
1222
1223 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1224 switch (intrin->intrinsic) {
1225 case nir_intrinsic_load_deref:
1226 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1227 nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1228 NULL, var_usage_map, modes, mem_ctx);
1229 break;
1230
1231 case nir_intrinsic_store_deref:
1232 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1233 0, get_non_self_referential_store_comps(intrin),
1234 NULL, var_usage_map, modes, mem_ctx);
1235 break;
1236
1237 case nir_intrinsic_copy_deref: {
1238 /* Just mark everything used for copies. */
1239 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1240 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1241 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1242 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1243 break;
1244 }
1245
1246 default:
1247 break;
1248 }
1249 }
1250 }
1251 }
1252
1253 static bool
shrink_vec_var_list(struct exec_list *vars, nir_variable_mode mode, struct hash_table *var_usage_map)1254 shrink_vec_var_list(struct exec_list *vars,
1255 nir_variable_mode mode,
1256 struct hash_table *var_usage_map)
1257 {
1258 /* Initialize the components kept field of each variable. This is the
1259 * AND of the components written and components read. If a component is
1260 * written but never read, it's dead. If it is read but never written,
1261 * then all values read are undefined garbage and we may as well not read
1262 * them.
1263 *
1264 * The same logic applies to the array length. We make the array length
1265 * the minimum needed required length between read and write and plan to
1266 * discard any OOB access. The one exception here is indirect writes
1267 * because we don't know where they will land and we can't shrink an array
1268 * with indirect writes because previously in-bounds writes may become
1269 * out-of-bounds and have undefined behavior.
1270 *
1271 * Also, if we have a copy that to/from something we can't shrink, we need
1272 * to leave components and array_len of any wildcards alone.
1273 */
1274 nir_foreach_variable_in_list(var, vars) {
1275 if (var->data.mode != mode)
1276 continue;
1277
1278 struct vec_var_usage *usage =
1279 get_vec_var_usage(var, var_usage_map, false, NULL);
1280 if (!usage)
1281 continue;
1282
1283 assert(usage->comps_kept == 0);
1284 if (usage->has_external_copy || usage->has_complex_use)
1285 usage->comps_kept = usage->all_comps;
1286 else
1287 usage->comps_kept = usage->comps_read & usage->comps_written;
1288
1289 for (unsigned i = 0; i < usage->num_levels; i++) {
1290 struct array_level_usage *level = &usage->levels[i];
1291 assert(level->array_len > 0);
1292
1293 if (level->max_written == UINT_MAX || level->has_external_copy ||
1294 usage->has_complex_use)
1295 continue; /* Can't shrink */
1296
1297 unsigned max_used = MIN2(level->max_read, level->max_written);
1298 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1299 }
1300 }
1301
1302 /* In order for variable copies to work, we have to have the same data type
1303 * on the source and the destination. In order to satisfy this, we run a
1304 * little fixed-point algorithm to transitively ensure that we get enough
1305 * components and array elements for this to hold for all copies.
1306 */
1307 bool fp_progress;
1308 do {
1309 fp_progress = false;
1310 nir_foreach_variable_in_list(var, vars) {
1311 if (var->data.mode != mode)
1312 continue;
1313
1314 struct vec_var_usage *var_usage =
1315 get_vec_var_usage(var, var_usage_map, false, NULL);
1316 if (!var_usage || !var_usage->vars_copied)
1317 continue;
1318
1319 set_foreach(var_usage->vars_copied, copy_entry) {
1320 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1321 if (copy_usage->comps_kept != var_usage->comps_kept) {
1322 nir_component_mask_t comps_kept =
1323 (var_usage->comps_kept | copy_usage->comps_kept);
1324 var_usage->comps_kept = comps_kept;
1325 copy_usage->comps_kept = comps_kept;
1326 fp_progress = true;
1327 }
1328 }
1329
1330 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1331 struct array_level_usage *var_level = &var_usage->levels[i];
1332 if (!var_level->levels_copied)
1333 continue;
1334
1335 set_foreach(var_level->levels_copied, copy_entry) {
1336 struct array_level_usage *copy_level = (void *)copy_entry->key;
1337 if (var_level->array_len != copy_level->array_len) {
1338 unsigned array_len =
1339 MAX2(var_level->array_len, copy_level->array_len);
1340 var_level->array_len = array_len;
1341 copy_level->array_len = array_len;
1342 fp_progress = true;
1343 }
1344 }
1345 }
1346 }
1347 } while (fp_progress);
1348
1349 bool vars_shrunk = false;
1350 nir_foreach_variable_in_list_safe(var, vars) {
1351 if (var->data.mode != mode)
1352 continue;
1353
1354 struct vec_var_usage *usage =
1355 get_vec_var_usage(var, var_usage_map, false, NULL);
1356 if (!usage)
1357 continue;
1358
1359 bool shrunk = false;
1360 const struct glsl_type *vec_type = var->type;
1361 for (unsigned i = 0; i < usage->num_levels; i++) {
1362 /* If we've reduced the array to zero elements at some level, just
1363 * set comps_kept to 0 and delete the variable.
1364 */
1365 if (usage->levels[i].array_len == 0) {
1366 usage->comps_kept = 0;
1367 break;
1368 }
1369
1370 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1371 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1372 shrunk = true;
1373 vec_type = glsl_get_array_element(vec_type);
1374 }
1375 assert(glsl_type_is_vector_or_scalar(vec_type));
1376
1377 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1378 if (usage->comps_kept != usage->all_comps)
1379 shrunk = true;
1380
1381 if (usage->comps_kept == 0) {
1382 /* This variable is dead, remove it */
1383 vars_shrunk = true;
1384 exec_node_remove(&var->node);
1385 continue;
1386 }
1387
1388 if (!shrunk) {
1389 /* This variable doesn't need to be shrunk. Remove it from the
1390 * hash table so later steps will ignore it.
1391 */
1392 _mesa_hash_table_remove_key(var_usage_map, var);
1393 continue;
1394 }
1395
1396 /* Build the new var type */
1397 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1398 const struct glsl_type *new_type =
1399 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1400 for (int i = usage->num_levels - 1; i >= 0; i--) {
1401 assert(usage->levels[i].array_len > 0);
1402 /* If the original type was a matrix type, we'd like to keep that so
1403 * we don't convert matrices into arrays.
1404 */
1405 if (i == usage->num_levels - 1 &&
1406 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1407 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1408 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1409 new_num_comps,
1410 usage->levels[i].array_len);
1411 } else {
1412 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1413 }
1414 }
1415 var->type = new_type;
1416
1417 vars_shrunk = true;
1418 }
1419
1420 return vars_shrunk;
1421 }
1422
1423 static bool
vec_deref_is_oob(nir_deref_instr *deref, struct vec_var_usage *usage)1424 vec_deref_is_oob(nir_deref_instr *deref,
1425 struct vec_var_usage *usage)
1426 {
1427 nir_deref_path path;
1428 nir_deref_path_init(&path, deref, NULL);
1429
1430 bool oob = false;
1431 for (unsigned i = 0; i < usage->num_levels; i++) {
1432 nir_deref_instr *p = path.path[i + 1];
1433 if (p->deref_type == nir_deref_type_array_wildcard)
1434 continue;
1435
1436 if (nir_src_is_const(p->arr.index) &&
1437 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1438 oob = true;
1439 break;
1440 }
1441 }
1442
1443 nir_deref_path_finish(&path);
1444
1445 return oob;
1446 }
1447
1448 static bool
vec_deref_is_dead_or_oob(nir_deref_instr *deref, struct hash_table *var_usage_map, nir_variable_mode modes)1449 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1450 struct hash_table *var_usage_map,
1451 nir_variable_mode modes)
1452 {
1453 struct vec_var_usage *usage =
1454 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1455 if (!usage)
1456 return false;
1457
1458 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1459 }
1460
1461 static void
shrink_vec_var_access_impl(nir_function_impl *impl, struct hash_table *var_usage_map, nir_variable_mode modes)1462 shrink_vec_var_access_impl(nir_function_impl *impl,
1463 struct hash_table *var_usage_map,
1464 nir_variable_mode modes)
1465 {
1466 nir_builder b;
1467 nir_builder_init(&b, impl);
1468
1469 nir_foreach_block(block, impl) {
1470 nir_foreach_instr_safe(instr, block) {
1471 switch (instr->type) {
1472 case nir_instr_type_deref: {
1473 nir_deref_instr *deref = nir_instr_as_deref(instr);
1474 if (!nir_deref_mode_may_be(deref, modes))
1475 break;
1476
1477 /* Clean up any dead derefs we find lying around. They may refer
1478 * to variables we've deleted.
1479 */
1480 if (nir_deref_instr_remove_if_unused(deref))
1481 break;
1482
1483 /* Update the type in the deref to keep the types consistent as
1484 * you walk down the chain. We don't need to check if this is one
1485 * of the derefs we're shrinking because this is a no-op if it
1486 * isn't. The worst that could happen is that we accidentally fix
1487 * an invalid deref.
1488 */
1489 if (deref->deref_type == nir_deref_type_var) {
1490 deref->type = deref->var->type;
1491 } else if (deref->deref_type == nir_deref_type_array ||
1492 deref->deref_type == nir_deref_type_array_wildcard) {
1493 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1494 assert(glsl_type_is_array(parent->type) ||
1495 glsl_type_is_matrix(parent->type));
1496 deref->type = glsl_get_array_element(parent->type);
1497 }
1498 break;
1499 }
1500
1501 case nir_instr_type_intrinsic: {
1502 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1503
1504 /* If we have a copy whose source or destination has been deleted
1505 * because we determined the variable was dead, then we just
1506 * delete the copy instruction. If the source variable was dead
1507 * then it was writing undefined garbage anyway and if it's the
1508 * destination variable that's dead then the write isn't needed.
1509 */
1510 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1511 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1512 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1513 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1514 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1515 nir_instr_remove(&intrin->instr);
1516 nir_deref_instr_remove_if_unused(dst);
1517 nir_deref_instr_remove_if_unused(src);
1518 }
1519 continue;
1520 }
1521
1522 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1523 intrin->intrinsic != nir_intrinsic_store_deref)
1524 continue;
1525
1526 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1527 if (!nir_deref_mode_may_be(deref, modes))
1528 continue;
1529
1530 struct vec_var_usage *usage =
1531 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1532 if (!usage)
1533 continue;
1534
1535 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1536 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1537 nir_ssa_def *u =
1538 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1539 intrin->dest.ssa.bit_size);
1540 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1541 u);
1542 }
1543 nir_instr_remove(&intrin->instr);
1544 nir_deref_instr_remove_if_unused(deref);
1545 continue;
1546 }
1547
1548 /* If we're not dropping any components, there's no need to
1549 * compact vectors.
1550 */
1551 if (usage->comps_kept == usage->all_comps)
1552 continue;
1553
1554 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1555 b.cursor = nir_after_instr(&intrin->instr);
1556
1557 nir_ssa_def *undef =
1558 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1559 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1560 unsigned c = 0;
1561 for (unsigned i = 0; i < intrin->num_components; i++) {
1562 if (usage->comps_kept & (1u << i))
1563 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1564 else
1565 vec_srcs[i] = undef;
1566 }
1567 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1568
1569 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1570 vec,
1571 vec->parent_instr);
1572
1573 /* The SSA def is now only used by the swizzle. It's safe to
1574 * shrink the number of components.
1575 */
1576 assert(list_length(&intrin->dest.ssa.uses) == c);
1577 intrin->num_components = c;
1578 intrin->dest.ssa.num_components = c;
1579 } else {
1580 nir_component_mask_t write_mask =
1581 nir_intrinsic_write_mask(intrin);
1582
1583 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1584 nir_component_mask_t new_write_mask = 0;
1585 unsigned c = 0;
1586 for (unsigned i = 0; i < intrin->num_components; i++) {
1587 if (usage->comps_kept & (1u << i)) {
1588 swizzle[c] = i;
1589 if (write_mask & (1u << i))
1590 new_write_mask |= 1u << c;
1591 c++;
1592 }
1593 }
1594
1595 b.cursor = nir_before_instr(&intrin->instr);
1596
1597 nir_ssa_def *swizzled =
1598 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c);
1599
1600 /* Rewrite to use the compacted source */
1601 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1602 nir_src_for_ssa(swizzled));
1603 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1604 intrin->num_components = c;
1605 }
1606 break;
1607 }
1608
1609 default:
1610 break;
1611 }
1612 }
1613 }
1614 }
1615
1616 static bool
function_impl_has_vars_with_modes(nir_function_impl *impl, nir_variable_mode modes)1617 function_impl_has_vars_with_modes(nir_function_impl *impl,
1618 nir_variable_mode modes)
1619 {
1620 nir_shader *shader = impl->function->shader;
1621
1622 if (modes & ~nir_var_function_temp) {
1623 nir_foreach_variable_with_modes(var, shader,
1624 modes & ~nir_var_function_temp)
1625 return true;
1626 }
1627
1628 if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1629 return true;
1630
1631 return false;
1632 }
1633
1634 /** Attempt to shrink arrays of vectors
1635 *
1636 * This pass looks at variables which contain a vector or an array (possibly
1637 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1638 * or array. If the pass can prove that a component of a vector (or array of
1639 * vectors) is never really used, then that component will be removed.
1640 * Similarly, the pass attempts to shorten arrays based on what elements it
1641 * can prove are never read or never contain valid data.
1642 */
1643 bool
nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)1644 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1645 {
1646 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1647
1648 void *mem_ctx = ralloc_context(NULL);
1649
1650 struct hash_table *var_usage_map =
1651 _mesa_pointer_hash_table_create(mem_ctx);
1652
1653 bool has_vars_to_shrink = false;
1654 nir_foreach_function(function, shader) {
1655 if (!function->impl)
1656 continue;
1657
1658 /* Don't even bother crawling the IR if we don't have any variables.
1659 * Given that this pass deletes any unused variables, it's likely that
1660 * we will be in this scenario eventually.
1661 */
1662 if (function_impl_has_vars_with_modes(function->impl, modes)) {
1663 has_vars_to_shrink = true;
1664 find_used_components_impl(function->impl, var_usage_map,
1665 modes, mem_ctx);
1666 }
1667 }
1668 if (!has_vars_to_shrink) {
1669 ralloc_free(mem_ctx);
1670 nir_shader_preserve_all_metadata(shader);
1671 return false;
1672 }
1673
1674 bool globals_shrunk = false;
1675 if (modes & nir_var_shader_temp) {
1676 globals_shrunk = shrink_vec_var_list(&shader->variables,
1677 nir_var_shader_temp,
1678 var_usage_map);
1679 }
1680
1681 bool progress = false;
1682 nir_foreach_function(function, shader) {
1683 if (!function->impl)
1684 continue;
1685
1686 bool locals_shrunk = false;
1687 if (modes & nir_var_function_temp) {
1688 locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1689 nir_var_function_temp,
1690 var_usage_map);
1691 }
1692
1693 if (globals_shrunk || locals_shrunk) {
1694 shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1695
1696 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1697 nir_metadata_dominance);
1698 progress = true;
1699 } else {
1700 nir_metadata_preserve(function->impl, nir_metadata_all);
1701 }
1702 }
1703
1704 ralloc_free(mem_ctx);
1705
1706 return progress;
1707 }
1708