1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci#include "nir_deref.h" 27bf215546Sopenharmony_ci#include "nir_vla.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "util/set.h" 30bf215546Sopenharmony_ci#include "util/u_math.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_cistatic struct set * 33bf215546Sopenharmony_ciget_complex_used_vars(nir_shader *shader, void *mem_ctx) 34bf215546Sopenharmony_ci{ 35bf215546Sopenharmony_ci struct set *complex_vars = _mesa_pointer_set_create(mem_ctx); 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 38bf215546Sopenharmony_ci if (!function->impl) 39bf215546Sopenharmony_ci continue; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ci nir_foreach_block(block, function->impl) { 42bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 43bf215546Sopenharmony_ci if (instr->type != nir_instr_type_deref) 44bf215546Sopenharmony_ci continue; 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci /* We only need to consider var derefs because 49bf215546Sopenharmony_ci * nir_deref_instr_has_complex_use is recursive. 50bf215546Sopenharmony_ci */ 51bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var && 52bf215546Sopenharmony_ci nir_deref_instr_has_complex_use(deref, 0)) 53bf215546Sopenharmony_ci _mesa_set_add(complex_vars, deref->var); 54bf215546Sopenharmony_ci } 55bf215546Sopenharmony_ci } 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci return complex_vars; 59bf215546Sopenharmony_ci} 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_cistruct split_var_state { 62bf215546Sopenharmony_ci void *mem_ctx; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci nir_shader *shader; 65bf215546Sopenharmony_ci nir_function_impl *impl; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci nir_variable *base_var; 68bf215546Sopenharmony_ci}; 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_cistruct field { 71bf215546Sopenharmony_ci struct field *parent; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci const struct glsl_type *type; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci unsigned num_fields; 76bf215546Sopenharmony_ci struct field *fields; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci nir_variable *var; 79bf215546Sopenharmony_ci}; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_cistatic int 82bf215546Sopenharmony_cinum_array_levels_in_array_of_vector_type(const struct glsl_type *type) 83bf215546Sopenharmony_ci{ 84bf215546Sopenharmony_ci int num_levels = 0; 85bf215546Sopenharmony_ci while (true) { 86bf215546Sopenharmony_ci if (glsl_type_is_array_or_matrix(type)) { 87bf215546Sopenharmony_ci num_levels++; 88bf215546Sopenharmony_ci type = glsl_get_array_element(type); 89bf215546Sopenharmony_ci } else if (glsl_type_is_vector_or_scalar(type)) { 90bf215546Sopenharmony_ci return num_levels; 91bf215546Sopenharmony_ci } else { 92bf215546Sopenharmony_ci /* Not an array of vectors */ 93bf215546Sopenharmony_ci return -1; 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci } 96bf215546Sopenharmony_ci} 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_cistatic void 99bf215546Sopenharmony_ciinit_field_for_type(struct field *field, struct field *parent, 100bf215546Sopenharmony_ci const struct glsl_type *type, 101bf215546Sopenharmony_ci const char *name, 102bf215546Sopenharmony_ci struct split_var_state *state) 103bf215546Sopenharmony_ci{ 104bf215546Sopenharmony_ci *field = (struct field) { 105bf215546Sopenharmony_ci .parent = parent, 106bf215546Sopenharmony_ci .type = type, 107bf215546Sopenharmony_ci }; 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci const struct glsl_type *struct_type = glsl_without_array(type); 110bf215546Sopenharmony_ci if (glsl_type_is_struct_or_ifc(struct_type)) { 111bf215546Sopenharmony_ci field->num_fields = glsl_get_length(struct_type), 112bf215546Sopenharmony_ci field->fields = ralloc_array(state->mem_ctx, struct field, 113bf215546Sopenharmony_ci field->num_fields); 114bf215546Sopenharmony_ci for (unsigned i = 0; i < field->num_fields; i++) { 115bf215546Sopenharmony_ci char *field_name = NULL; 116bf215546Sopenharmony_ci if (name) { 117bf215546Sopenharmony_ci field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name, 118bf215546Sopenharmony_ci glsl_get_struct_elem_name(struct_type, i)); 119bf215546Sopenharmony_ci } else { 120bf215546Sopenharmony_ci field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s", 121bf215546Sopenharmony_ci glsl_get_type_name(struct_type), 122bf215546Sopenharmony_ci glsl_get_struct_elem_name(struct_type, i)); 123bf215546Sopenharmony_ci } 124bf215546Sopenharmony_ci init_field_for_type(&field->fields[i], field, 125bf215546Sopenharmony_ci glsl_get_struct_field(struct_type, i), 126bf215546Sopenharmony_ci field_name, state); 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci } else { 129bf215546Sopenharmony_ci const struct glsl_type *var_type = type; 130bf215546Sopenharmony_ci for (struct field *f = field->parent; f; f = f->parent) 131bf215546Sopenharmony_ci var_type = glsl_type_wrap_in_arrays(var_type, f->type); 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci nir_variable_mode mode = state->base_var->data.mode; 134bf215546Sopenharmony_ci if (mode == nir_var_function_temp) { 135bf215546Sopenharmony_ci field->var = nir_local_variable_create(state->impl, var_type, name); 136bf215546Sopenharmony_ci } else { 137bf215546Sopenharmony_ci field->var = nir_variable_create(state->shader, mode, var_type, name); 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci field->var->data.ray_query = state->base_var->data.ray_query; 140bf215546Sopenharmony_ci } 141bf215546Sopenharmony_ci} 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_cistatic bool 144bf215546Sopenharmony_cisplit_var_list_structs(nir_shader *shader, 145bf215546Sopenharmony_ci nir_function_impl *impl, 146bf215546Sopenharmony_ci struct exec_list *vars, 147bf215546Sopenharmony_ci nir_variable_mode mode, 148bf215546Sopenharmony_ci struct hash_table *var_field_map, 149bf215546Sopenharmony_ci struct set **complex_vars, 150bf215546Sopenharmony_ci void *mem_ctx) 151bf215546Sopenharmony_ci{ 152bf215546Sopenharmony_ci struct split_var_state state = { 153bf215546Sopenharmony_ci .mem_ctx = mem_ctx, 154bf215546Sopenharmony_ci .shader = shader, 155bf215546Sopenharmony_ci .impl = impl, 156bf215546Sopenharmony_ci }; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci struct exec_list split_vars; 159bf215546Sopenharmony_ci exec_list_make_empty(&split_vars); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci /* To avoid list confusion (we'll be adding things as we split variables), 162bf215546Sopenharmony_ci * pull all of the variables we plan to split off of the list 163bf215546Sopenharmony_ci */ 164bf215546Sopenharmony_ci nir_foreach_variable_in_list_safe(var, vars) { 165bf215546Sopenharmony_ci if (var->data.mode != mode) 166bf215546Sopenharmony_ci continue; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 169bf215546Sopenharmony_ci continue; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci if (*complex_vars == NULL) 172bf215546Sopenharmony_ci *complex_vars = get_complex_used_vars(shader, mem_ctx); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* We can't split a variable that's referenced with deref that has any 175bf215546Sopenharmony_ci * sort of complex usage. 176bf215546Sopenharmony_ci */ 177bf215546Sopenharmony_ci if (_mesa_set_search(*complex_vars, var)) 178bf215546Sopenharmony_ci continue; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci exec_node_remove(&var->node); 181bf215546Sopenharmony_ci exec_list_push_tail(&split_vars, &var->node); 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci nir_foreach_variable_in_list(var, &split_vars) { 185bf215546Sopenharmony_ci state.base_var = var; 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci struct field *root_field = ralloc(mem_ctx, struct field); 188bf215546Sopenharmony_ci init_field_for_type(root_field, NULL, var->type, var->name, &state); 189bf215546Sopenharmony_ci _mesa_hash_table_insert(var_field_map, var, root_field); 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci return !exec_list_is_empty(&split_vars); 193bf215546Sopenharmony_ci} 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_cistatic void 196bf215546Sopenharmony_cisplit_struct_derefs_impl(nir_function_impl *impl, 197bf215546Sopenharmony_ci struct hash_table *var_field_map, 198bf215546Sopenharmony_ci nir_variable_mode modes, 199bf215546Sopenharmony_ci void *mem_ctx) 200bf215546Sopenharmony_ci{ 201bf215546Sopenharmony_ci nir_builder b; 202bf215546Sopenharmony_ci nir_builder_init(&b, impl); 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 205bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 206bf215546Sopenharmony_ci if (instr->type != nir_instr_type_deref) 207bf215546Sopenharmony_ci continue; 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 210bf215546Sopenharmony_ci if (!nir_deref_mode_may_be(deref, modes)) 211bf215546Sopenharmony_ci continue; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci /* Clean up any dead derefs we find lying around. They may refer to 214bf215546Sopenharmony_ci * variables we're planning to split. 215bf215546Sopenharmony_ci */ 216bf215546Sopenharmony_ci if (nir_deref_instr_remove_if_unused(deref)) 217bf215546Sopenharmony_ci continue; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci if (!glsl_type_is_vector_or_scalar(deref->type)) 220bf215546Sopenharmony_ci continue; 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci nir_variable *base_var = nir_deref_instr_get_variable(deref); 223bf215546Sopenharmony_ci /* If we can't chase back to the variable, then we're a complex use. 224bf215546Sopenharmony_ci * This should have been detected by get_complex_used_vars() and the 225bf215546Sopenharmony_ci * variable should not have been split. However, we have no way of 226bf215546Sopenharmony_ci * knowing that here, so we just have to trust it. 227bf215546Sopenharmony_ci */ 228bf215546Sopenharmony_ci if (base_var == NULL) 229bf215546Sopenharmony_ci continue; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci struct hash_entry *entry = 232bf215546Sopenharmony_ci _mesa_hash_table_search(var_field_map, base_var); 233bf215546Sopenharmony_ci if (!entry) 234bf215546Sopenharmony_ci continue; 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci struct field *root_field = entry->data; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci nir_deref_path path; 239bf215546Sopenharmony_ci nir_deref_path_init(&path, deref, mem_ctx); 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci struct field *tail_field = root_field; 242bf215546Sopenharmony_ci for (unsigned i = 0; path.path[i]; i++) { 243bf215546Sopenharmony_ci if (path.path[i]->deref_type != nir_deref_type_struct) 244bf215546Sopenharmony_ci continue; 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci assert(i > 0); 247bf215546Sopenharmony_ci assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type)); 248bf215546Sopenharmony_ci assert(path.path[i - 1]->type == 249bf215546Sopenharmony_ci glsl_without_array(tail_field->type)); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci tail_field = &tail_field->fields[path.path[i]->strct.index]; 252bf215546Sopenharmony_ci } 253bf215546Sopenharmony_ci nir_variable *split_var = tail_field->var; 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci nir_deref_instr *new_deref = NULL; 256bf215546Sopenharmony_ci for (unsigned i = 0; path.path[i]; i++) { 257bf215546Sopenharmony_ci nir_deref_instr *p = path.path[i]; 258bf215546Sopenharmony_ci b.cursor = nir_after_instr(&p->instr); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci switch (p->deref_type) { 261bf215546Sopenharmony_ci case nir_deref_type_var: 262bf215546Sopenharmony_ci assert(new_deref == NULL); 263bf215546Sopenharmony_ci new_deref = nir_build_deref_var(&b, split_var); 264bf215546Sopenharmony_ci break; 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci case nir_deref_type_array: 267bf215546Sopenharmony_ci case nir_deref_type_array_wildcard: 268bf215546Sopenharmony_ci new_deref = nir_build_deref_follower(&b, new_deref, p); 269bf215546Sopenharmony_ci break; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci case nir_deref_type_struct: 272bf215546Sopenharmony_ci /* Nothing to do; we're splitting structs */ 273bf215546Sopenharmony_ci break; 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci default: 276bf215546Sopenharmony_ci unreachable("Invalid deref type in path"); 277bf215546Sopenharmony_ci } 278bf215546Sopenharmony_ci } 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci assert(new_deref->type == deref->type); 281bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&deref->dest.ssa, 282bf215546Sopenharmony_ci &new_deref->dest.ssa); 283bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(deref); 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci } 286bf215546Sopenharmony_ci} 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci/** A pass for splitting structs into multiple variables 289bf215546Sopenharmony_ci * 290bf215546Sopenharmony_ci * This pass splits arrays of structs into multiple variables, one for each 291bf215546Sopenharmony_ci * (possibly nested) structure member. After this pass completes, no 292bf215546Sopenharmony_ci * variables of the given mode will contain a struct type. 293bf215546Sopenharmony_ci */ 294bf215546Sopenharmony_cibool 295bf215546Sopenharmony_cinir_split_struct_vars(nir_shader *shader, nir_variable_mode modes) 296bf215546Sopenharmony_ci{ 297bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 298bf215546Sopenharmony_ci struct hash_table *var_field_map = 299bf215546Sopenharmony_ci _mesa_pointer_hash_table_create(mem_ctx); 300bf215546Sopenharmony_ci struct set *complex_vars = NULL; 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci bool has_global_splits = false; 305bf215546Sopenharmony_ci if (modes & nir_var_shader_temp) { 306bf215546Sopenharmony_ci has_global_splits = split_var_list_structs(shader, NULL, 307bf215546Sopenharmony_ci &shader->variables, 308bf215546Sopenharmony_ci nir_var_shader_temp, 309bf215546Sopenharmony_ci var_field_map, 310bf215546Sopenharmony_ci &complex_vars, 311bf215546Sopenharmony_ci mem_ctx); 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci bool progress = false; 315bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 316bf215546Sopenharmony_ci if (!function->impl) 317bf215546Sopenharmony_ci continue; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci bool has_local_splits = false; 320bf215546Sopenharmony_ci if (modes & nir_var_function_temp) { 321bf215546Sopenharmony_ci has_local_splits = split_var_list_structs(shader, function->impl, 322bf215546Sopenharmony_ci &function->impl->locals, 323bf215546Sopenharmony_ci nir_var_function_temp, 324bf215546Sopenharmony_ci var_field_map, 325bf215546Sopenharmony_ci &complex_vars, 326bf215546Sopenharmony_ci mem_ctx); 327bf215546Sopenharmony_ci } 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci if (has_global_splits || has_local_splits) { 330bf215546Sopenharmony_ci split_struct_derefs_impl(function->impl, var_field_map, 331bf215546Sopenharmony_ci modes, mem_ctx); 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_block_index | 334bf215546Sopenharmony_ci nir_metadata_dominance); 335bf215546Sopenharmony_ci progress = true; 336bf215546Sopenharmony_ci } else { 337bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_all); 338bf215546Sopenharmony_ci } 339bf215546Sopenharmony_ci } 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci ralloc_free(mem_ctx); 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci return progress; 344bf215546Sopenharmony_ci} 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_cistruct array_level_info { 347bf215546Sopenharmony_ci unsigned array_len; 348bf215546Sopenharmony_ci bool split; 349bf215546Sopenharmony_ci}; 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_cistruct array_split { 352bf215546Sopenharmony_ci /* Only set if this is the tail end of the splitting */ 353bf215546Sopenharmony_ci nir_variable *var; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci unsigned num_splits; 356bf215546Sopenharmony_ci struct array_split *splits; 357bf215546Sopenharmony_ci}; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_cistruct array_var_info { 360bf215546Sopenharmony_ci nir_variable *base_var; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci const struct glsl_type *split_var_type; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci bool split_var; 365bf215546Sopenharmony_ci struct array_split root_split; 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci unsigned num_levels; 368bf215546Sopenharmony_ci struct array_level_info levels[0]; 369bf215546Sopenharmony_ci}; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_cistatic bool 372bf215546Sopenharmony_ciinit_var_list_array_infos(nir_shader *shader, 373bf215546Sopenharmony_ci struct exec_list *vars, 374bf215546Sopenharmony_ci nir_variable_mode mode, 375bf215546Sopenharmony_ci struct hash_table *var_info_map, 376bf215546Sopenharmony_ci struct set **complex_vars, 377bf215546Sopenharmony_ci void *mem_ctx) 378bf215546Sopenharmony_ci{ 379bf215546Sopenharmony_ci bool has_array = false; 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci nir_foreach_variable_in_list(var, vars) { 382bf215546Sopenharmony_ci if (var->data.mode != mode) 383bf215546Sopenharmony_ci continue; 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci int num_levels = num_array_levels_in_array_of_vector_type(var->type); 386bf215546Sopenharmony_ci if (num_levels <= 0) 387bf215546Sopenharmony_ci continue; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci if (*complex_vars == NULL) 390bf215546Sopenharmony_ci *complex_vars = get_complex_used_vars(shader, mem_ctx); 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci /* We can't split a variable that's referenced with deref that has any 393bf215546Sopenharmony_ci * sort of complex usage. 394bf215546Sopenharmony_ci */ 395bf215546Sopenharmony_ci if (_mesa_set_search(*complex_vars, var)) 396bf215546Sopenharmony_ci continue; 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci struct array_var_info *info = 399bf215546Sopenharmony_ci rzalloc_size(mem_ctx, sizeof(*info) + 400bf215546Sopenharmony_ci num_levels * sizeof(info->levels[0])); 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci info->base_var = var; 403bf215546Sopenharmony_ci info->num_levels = num_levels; 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci const struct glsl_type *type = var->type; 406bf215546Sopenharmony_ci for (int i = 0; i < num_levels; i++) { 407bf215546Sopenharmony_ci info->levels[i].array_len = glsl_get_length(type); 408bf215546Sopenharmony_ci type = glsl_get_array_element(type); 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci /* All levels start out initially as split */ 411bf215546Sopenharmony_ci info->levels[i].split = true; 412bf215546Sopenharmony_ci } 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci _mesa_hash_table_insert(var_info_map, var, info); 415bf215546Sopenharmony_ci has_array = true; 416bf215546Sopenharmony_ci } 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci return has_array; 419bf215546Sopenharmony_ci} 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_cistatic struct array_var_info * 422bf215546Sopenharmony_ciget_array_var_info(nir_variable *var, 423bf215546Sopenharmony_ci struct hash_table *var_info_map) 424bf215546Sopenharmony_ci{ 425bf215546Sopenharmony_ci struct hash_entry *entry = 426bf215546Sopenharmony_ci _mesa_hash_table_search(var_info_map, var); 427bf215546Sopenharmony_ci return entry ? entry->data : NULL; 428bf215546Sopenharmony_ci} 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_cistatic struct array_var_info * 431bf215546Sopenharmony_ciget_array_deref_info(nir_deref_instr *deref, 432bf215546Sopenharmony_ci struct hash_table *var_info_map, 433bf215546Sopenharmony_ci nir_variable_mode modes) 434bf215546Sopenharmony_ci{ 435bf215546Sopenharmony_ci if (!nir_deref_mode_may_be(deref, modes)) 436bf215546Sopenharmony_ci return NULL; 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(deref); 439bf215546Sopenharmony_ci if (var == NULL) 440bf215546Sopenharmony_ci return NULL; 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci return get_array_var_info(var, var_info_map); 443bf215546Sopenharmony_ci} 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_cistatic void 446bf215546Sopenharmony_cimark_array_deref_used(nir_deref_instr *deref, 447bf215546Sopenharmony_ci struct hash_table *var_info_map, 448bf215546Sopenharmony_ci nir_variable_mode modes, 449bf215546Sopenharmony_ci void *mem_ctx) 450bf215546Sopenharmony_ci{ 451bf215546Sopenharmony_ci struct array_var_info *info = 452bf215546Sopenharmony_ci get_array_deref_info(deref, var_info_map, modes); 453bf215546Sopenharmony_ci if (!info) 454bf215546Sopenharmony_ci return; 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci nir_deref_path path; 457bf215546Sopenharmony_ci nir_deref_path_init(&path, deref, mem_ctx); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci /* Walk the path and look for indirects. If we have an array deref with an 460bf215546Sopenharmony_ci * indirect, mark the given level as not being split. 461bf215546Sopenharmony_ci */ 462bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_levels; i++) { 463bf215546Sopenharmony_ci nir_deref_instr *p = path.path[i + 1]; 464bf215546Sopenharmony_ci if (p->deref_type == nir_deref_type_array && 465bf215546Sopenharmony_ci !nir_src_is_const(p->arr.index)) 466bf215546Sopenharmony_ci info->levels[i].split = false; 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci} 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_cistatic void 471bf215546Sopenharmony_cimark_array_usage_impl(nir_function_impl *impl, 472bf215546Sopenharmony_ci struct hash_table *var_info_map, 473bf215546Sopenharmony_ci nir_variable_mode modes, 474bf215546Sopenharmony_ci void *mem_ctx) 475bf215546Sopenharmony_ci{ 476bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 477bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 478bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 479bf215546Sopenharmony_ci continue; 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 482bf215546Sopenharmony_ci switch (intrin->intrinsic) { 483bf215546Sopenharmony_ci case nir_intrinsic_copy_deref: 484bf215546Sopenharmony_ci mark_array_deref_used(nir_src_as_deref(intrin->src[1]), 485bf215546Sopenharmony_ci var_info_map, modes, mem_ctx); 486bf215546Sopenharmony_ci FALLTHROUGH; 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 489bf215546Sopenharmony_ci case nir_intrinsic_store_deref: 490bf215546Sopenharmony_ci mark_array_deref_used(nir_src_as_deref(intrin->src[0]), 491bf215546Sopenharmony_ci var_info_map, modes, mem_ctx); 492bf215546Sopenharmony_ci break; 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci default: 495bf215546Sopenharmony_ci break; 496bf215546Sopenharmony_ci } 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci } 499bf215546Sopenharmony_ci} 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_cistatic void 502bf215546Sopenharmony_cicreate_split_array_vars(struct array_var_info *var_info, 503bf215546Sopenharmony_ci unsigned level, 504bf215546Sopenharmony_ci struct array_split *split, 505bf215546Sopenharmony_ci const char *name, 506bf215546Sopenharmony_ci nir_shader *shader, 507bf215546Sopenharmony_ci nir_function_impl *impl, 508bf215546Sopenharmony_ci void *mem_ctx) 509bf215546Sopenharmony_ci{ 510bf215546Sopenharmony_ci while (level < var_info->num_levels && !var_info->levels[level].split) { 511bf215546Sopenharmony_ci name = ralloc_asprintf(mem_ctx, "%s[*]", name); 512bf215546Sopenharmony_ci level++; 513bf215546Sopenharmony_ci } 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci if (level == var_info->num_levels) { 516bf215546Sopenharmony_ci /* We add parens to the variable name so it looks like "(foo[2][*])" so 517bf215546Sopenharmony_ci * that further derefs will look like "(foo[2][*])[ssa_6]" 518bf215546Sopenharmony_ci */ 519bf215546Sopenharmony_ci name = ralloc_asprintf(mem_ctx, "(%s)", name); 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci nir_variable_mode mode = var_info->base_var->data.mode; 522bf215546Sopenharmony_ci if (mode == nir_var_function_temp) { 523bf215546Sopenharmony_ci split->var = nir_local_variable_create(impl, 524bf215546Sopenharmony_ci var_info->split_var_type, name); 525bf215546Sopenharmony_ci } else { 526bf215546Sopenharmony_ci split->var = nir_variable_create(shader, mode, 527bf215546Sopenharmony_ci var_info->split_var_type, name); 528bf215546Sopenharmony_ci } 529bf215546Sopenharmony_ci split->var->data.ray_query = var_info->base_var->data.ray_query; 530bf215546Sopenharmony_ci } else { 531bf215546Sopenharmony_ci assert(var_info->levels[level].split); 532bf215546Sopenharmony_ci split->num_splits = var_info->levels[level].array_len; 533bf215546Sopenharmony_ci split->splits = rzalloc_array(mem_ctx, struct array_split, 534bf215546Sopenharmony_ci split->num_splits); 535bf215546Sopenharmony_ci for (unsigned i = 0; i < split->num_splits; i++) { 536bf215546Sopenharmony_ci create_split_array_vars(var_info, level + 1, &split->splits[i], 537bf215546Sopenharmony_ci ralloc_asprintf(mem_ctx, "%s[%d]", name, i), 538bf215546Sopenharmony_ci shader, impl, mem_ctx); 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci } 541bf215546Sopenharmony_ci} 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_cistatic bool 544bf215546Sopenharmony_cisplit_var_list_arrays(nir_shader *shader, 545bf215546Sopenharmony_ci nir_function_impl *impl, 546bf215546Sopenharmony_ci struct exec_list *vars, 547bf215546Sopenharmony_ci nir_variable_mode mode, 548bf215546Sopenharmony_ci struct hash_table *var_info_map, 549bf215546Sopenharmony_ci void *mem_ctx) 550bf215546Sopenharmony_ci{ 551bf215546Sopenharmony_ci struct exec_list split_vars; 552bf215546Sopenharmony_ci exec_list_make_empty(&split_vars); 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci nir_foreach_variable_in_list_safe(var, vars) { 555bf215546Sopenharmony_ci if (var->data.mode != mode) 556bf215546Sopenharmony_ci continue; 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci struct array_var_info *info = get_array_var_info(var, var_info_map); 559bf215546Sopenharmony_ci if (!info) 560bf215546Sopenharmony_ci continue; 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci bool has_split = false; 563bf215546Sopenharmony_ci const struct glsl_type *split_type = 564bf215546Sopenharmony_ci glsl_without_array_or_matrix(var->type); 565bf215546Sopenharmony_ci for (int i = info->num_levels - 1; i >= 0; i--) { 566bf215546Sopenharmony_ci if (info->levels[i].split) { 567bf215546Sopenharmony_ci has_split = true; 568bf215546Sopenharmony_ci continue; 569bf215546Sopenharmony_ci } 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci /* If the original type was a matrix type, we'd like to keep that so 572bf215546Sopenharmony_ci * we don't convert matrices into arrays. 573bf215546Sopenharmony_ci */ 574bf215546Sopenharmony_ci if (i == info->num_levels - 1 && 575bf215546Sopenharmony_ci glsl_type_is_matrix(glsl_without_array(var->type))) { 576bf215546Sopenharmony_ci split_type = glsl_matrix_type(glsl_get_base_type(split_type), 577bf215546Sopenharmony_ci glsl_get_components(split_type), 578bf215546Sopenharmony_ci info->levels[i].array_len); 579bf215546Sopenharmony_ci } else { 580bf215546Sopenharmony_ci split_type = glsl_array_type(split_type, info->levels[i].array_len, 0); 581bf215546Sopenharmony_ci } 582bf215546Sopenharmony_ci } 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci if (has_split) { 585bf215546Sopenharmony_ci info->split_var_type = split_type; 586bf215546Sopenharmony_ci /* To avoid list confusion (we'll be adding things as we split 587bf215546Sopenharmony_ci * variables), pull all of the variables we plan to split off of the 588bf215546Sopenharmony_ci * main variable list. 589bf215546Sopenharmony_ci */ 590bf215546Sopenharmony_ci exec_node_remove(&var->node); 591bf215546Sopenharmony_ci exec_list_push_tail(&split_vars, &var->node); 592bf215546Sopenharmony_ci } else { 593bf215546Sopenharmony_ci assert(split_type == glsl_get_bare_type(var->type)); 594bf215546Sopenharmony_ci /* If we're not modifying this variable, delete the info so we skip 595bf215546Sopenharmony_ci * it faster in later passes. 596bf215546Sopenharmony_ci */ 597bf215546Sopenharmony_ci _mesa_hash_table_remove_key(var_info_map, var); 598bf215546Sopenharmony_ci } 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci nir_foreach_variable_in_list(var, &split_vars) { 602bf215546Sopenharmony_ci struct array_var_info *info = get_array_var_info(var, var_info_map); 603bf215546Sopenharmony_ci create_split_array_vars(info, 0, &info->root_split, var->name, 604bf215546Sopenharmony_ci shader, impl, mem_ctx); 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci return !exec_list_is_empty(&split_vars); 608bf215546Sopenharmony_ci} 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_cistatic bool 611bf215546Sopenharmony_cideref_has_split_wildcard(nir_deref_path *path, 612bf215546Sopenharmony_ci struct array_var_info *info) 613bf215546Sopenharmony_ci{ 614bf215546Sopenharmony_ci if (info == NULL) 615bf215546Sopenharmony_ci return false; 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci assert(path->path[0]->var == info->base_var); 618bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_levels; i++) { 619bf215546Sopenharmony_ci if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard && 620bf215546Sopenharmony_ci info->levels[i].split) 621bf215546Sopenharmony_ci return true; 622bf215546Sopenharmony_ci } 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci return false; 625bf215546Sopenharmony_ci} 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_cistatic bool 628bf215546Sopenharmony_ciarray_path_is_out_of_bounds(nir_deref_path *path, 629bf215546Sopenharmony_ci struct array_var_info *info) 630bf215546Sopenharmony_ci{ 631bf215546Sopenharmony_ci if (info == NULL) 632bf215546Sopenharmony_ci return false; 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci assert(path->path[0]->var == info->base_var); 635bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_levels; i++) { 636bf215546Sopenharmony_ci nir_deref_instr *p = path->path[i + 1]; 637bf215546Sopenharmony_ci if (p->deref_type == nir_deref_type_array_wildcard) 638bf215546Sopenharmony_ci continue; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci if (nir_src_is_const(p->arr.index) && 641bf215546Sopenharmony_ci nir_src_as_uint(p->arr.index) >= info->levels[i].array_len) 642bf215546Sopenharmony_ci return true; 643bf215546Sopenharmony_ci } 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci return false; 646bf215546Sopenharmony_ci} 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_cistatic void 649bf215546Sopenharmony_ciemit_split_copies(nir_builder *b, 650bf215546Sopenharmony_ci struct array_var_info *dst_info, nir_deref_path *dst_path, 651bf215546Sopenharmony_ci unsigned dst_level, nir_deref_instr *dst, 652bf215546Sopenharmony_ci struct array_var_info *src_info, nir_deref_path *src_path, 653bf215546Sopenharmony_ci unsigned src_level, nir_deref_instr *src) 654bf215546Sopenharmony_ci{ 655bf215546Sopenharmony_ci nir_deref_instr *dst_p, *src_p; 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci while ((dst_p = dst_path->path[dst_level + 1])) { 658bf215546Sopenharmony_ci if (dst_p->deref_type == nir_deref_type_array_wildcard) 659bf215546Sopenharmony_ci break; 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci dst = nir_build_deref_follower(b, dst, dst_p); 662bf215546Sopenharmony_ci dst_level++; 663bf215546Sopenharmony_ci } 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci while ((src_p = src_path->path[src_level + 1])) { 666bf215546Sopenharmony_ci if (src_p->deref_type == nir_deref_type_array_wildcard) 667bf215546Sopenharmony_ci break; 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci src = nir_build_deref_follower(b, src, src_p); 670bf215546Sopenharmony_ci src_level++; 671bf215546Sopenharmony_ci } 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_ci if (src_p == NULL || dst_p == NULL) { 674bf215546Sopenharmony_ci assert(src_p == NULL && dst_p == NULL); 675bf215546Sopenharmony_ci nir_copy_deref(b, dst, src); 676bf215546Sopenharmony_ci } else { 677bf215546Sopenharmony_ci assert(dst_p->deref_type == nir_deref_type_array_wildcard && 678bf215546Sopenharmony_ci src_p->deref_type == nir_deref_type_array_wildcard); 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci if ((dst_info && dst_info->levels[dst_level].split) || 681bf215546Sopenharmony_ci (src_info && src_info->levels[src_level].split)) { 682bf215546Sopenharmony_ci /* There are no indirects at this level on one of the source or the 683bf215546Sopenharmony_ci * destination so we are lowering it. 684bf215546Sopenharmony_ci */ 685bf215546Sopenharmony_ci assert(glsl_get_length(dst_path->path[dst_level]->type) == 686bf215546Sopenharmony_ci glsl_get_length(src_path->path[src_level]->type)); 687bf215546Sopenharmony_ci unsigned len = glsl_get_length(dst_path->path[dst_level]->type); 688bf215546Sopenharmony_ci for (unsigned i = 0; i < len; i++) { 689bf215546Sopenharmony_ci emit_split_copies(b, dst_info, dst_path, dst_level + 1, 690bf215546Sopenharmony_ci nir_build_deref_array_imm(b, dst, i), 691bf215546Sopenharmony_ci src_info, src_path, src_level + 1, 692bf215546Sopenharmony_ci nir_build_deref_array_imm(b, src, i)); 693bf215546Sopenharmony_ci } 694bf215546Sopenharmony_ci } else { 695bf215546Sopenharmony_ci /* Neither side is being split so we just keep going */ 696bf215546Sopenharmony_ci emit_split_copies(b, dst_info, dst_path, dst_level + 1, 697bf215546Sopenharmony_ci nir_build_deref_array_wildcard(b, dst), 698bf215546Sopenharmony_ci src_info, src_path, src_level + 1, 699bf215546Sopenharmony_ci nir_build_deref_array_wildcard(b, src)); 700bf215546Sopenharmony_ci } 701bf215546Sopenharmony_ci } 702bf215546Sopenharmony_ci} 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_cistatic void 705bf215546Sopenharmony_cisplit_array_copies_impl(nir_function_impl *impl, 706bf215546Sopenharmony_ci struct hash_table *var_info_map, 707bf215546Sopenharmony_ci nir_variable_mode modes, 708bf215546Sopenharmony_ci void *mem_ctx) 709bf215546Sopenharmony_ci{ 710bf215546Sopenharmony_ci nir_builder b; 711bf215546Sopenharmony_ci nir_builder_init(&b, impl); 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 714bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 715bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 716bf215546Sopenharmony_ci continue; 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); 719bf215546Sopenharmony_ci if (copy->intrinsic != nir_intrinsic_copy_deref) 720bf215546Sopenharmony_ci continue; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]); 723bf215546Sopenharmony_ci nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]); 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci struct array_var_info *dst_info = 726bf215546Sopenharmony_ci get_array_deref_info(dst_deref, var_info_map, modes); 727bf215546Sopenharmony_ci struct array_var_info *src_info = 728bf215546Sopenharmony_ci get_array_deref_info(src_deref, var_info_map, modes); 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci if (!src_info && !dst_info) 731bf215546Sopenharmony_ci continue; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci nir_deref_path dst_path, src_path; 734bf215546Sopenharmony_ci nir_deref_path_init(&dst_path, dst_deref, mem_ctx); 735bf215546Sopenharmony_ci nir_deref_path_init(&src_path, src_deref, mem_ctx); 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci if (!deref_has_split_wildcard(&dst_path, dst_info) && 738bf215546Sopenharmony_ci !deref_has_split_wildcard(&src_path, src_info)) 739bf215546Sopenharmony_ci continue; 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci b.cursor = nir_instr_remove(©->instr); 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0], 744bf215546Sopenharmony_ci src_info, &src_path, 0, src_path.path[0]); 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci } 747bf215546Sopenharmony_ci} 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_cistatic void 750bf215546Sopenharmony_cisplit_array_access_impl(nir_function_impl *impl, 751bf215546Sopenharmony_ci struct hash_table *var_info_map, 752bf215546Sopenharmony_ci nir_variable_mode modes, 753bf215546Sopenharmony_ci void *mem_ctx) 754bf215546Sopenharmony_ci{ 755bf215546Sopenharmony_ci nir_builder b; 756bf215546Sopenharmony_ci nir_builder_init(&b, impl); 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 759bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 760bf215546Sopenharmony_ci if (instr->type == nir_instr_type_deref) { 761bf215546Sopenharmony_ci /* Clean up any dead derefs we find lying around. They may refer 762bf215546Sopenharmony_ci * to variables we're planning to split. 763bf215546Sopenharmony_ci */ 764bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 765bf215546Sopenharmony_ci if (nir_deref_mode_may_be(deref, modes)) 766bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(deref); 767bf215546Sopenharmony_ci continue; 768bf215546Sopenharmony_ci } 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 771bf215546Sopenharmony_ci continue; 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 774bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_load_deref && 775bf215546Sopenharmony_ci intrin->intrinsic != nir_intrinsic_store_deref && 776bf215546Sopenharmony_ci intrin->intrinsic != nir_intrinsic_copy_deref) 777bf215546Sopenharmony_ci continue; 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci const unsigned num_derefs = 780bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1; 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci for (unsigned d = 0; d < num_derefs; d++) { 783bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]); 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci struct array_var_info *info = 786bf215546Sopenharmony_ci get_array_deref_info(deref, var_info_map, modes); 787bf215546Sopenharmony_ci if (!info) 788bf215546Sopenharmony_ci continue; 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_ci nir_deref_path path; 791bf215546Sopenharmony_ci nir_deref_path_init(&path, deref, mem_ctx); 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci b.cursor = nir_before_instr(&intrin->instr); 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci if (array_path_is_out_of_bounds(&path, info)) { 796bf215546Sopenharmony_ci /* If one of the derefs is out-of-bounds, we just delete the 797bf215546Sopenharmony_ci * instruction. If a destination is out of bounds, then it may 798bf215546Sopenharmony_ci * have been in-bounds prior to shrinking so we don't want to 799bf215546Sopenharmony_ci * accidentally stomp something. However, we've already proven 800bf215546Sopenharmony_ci * that it will never be read so it's safe to delete. If a 801bf215546Sopenharmony_ci * source is out of bounds then it is loading random garbage. 802bf215546Sopenharmony_ci * For loads, we replace their uses with an undef instruction 803bf215546Sopenharmony_ci * and for copies we just delete the copy since it was writing 804bf215546Sopenharmony_ci * undefined garbage anyway and we may as well leave the random 805bf215546Sopenharmony_ci * garbage in the destination alone. 806bf215546Sopenharmony_ci */ 807bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref) { 808bf215546Sopenharmony_ci nir_ssa_def *u = 809bf215546Sopenharmony_ci nir_ssa_undef(&b, intrin->dest.ssa.num_components, 810bf215546Sopenharmony_ci intrin->dest.ssa.bit_size); 811bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 812bf215546Sopenharmony_ci u); 813bf215546Sopenharmony_ci } 814bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 815bf215546Sopenharmony_ci for (unsigned i = 0; i < num_derefs; i++) 816bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i])); 817bf215546Sopenharmony_ci break; 818bf215546Sopenharmony_ci } 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci struct array_split *split = &info->root_split; 821bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_levels; i++) { 822bf215546Sopenharmony_ci if (info->levels[i].split) { 823bf215546Sopenharmony_ci nir_deref_instr *p = path.path[i + 1]; 824bf215546Sopenharmony_ci unsigned index = nir_src_as_uint(p->arr.index); 825bf215546Sopenharmony_ci assert(index < info->levels[i].array_len); 826bf215546Sopenharmony_ci split = &split->splits[index]; 827bf215546Sopenharmony_ci } 828bf215546Sopenharmony_ci } 829bf215546Sopenharmony_ci assert(!split->splits && split->var); 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var); 832bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_levels; i++) { 833bf215546Sopenharmony_ci if (!info->levels[i].split) { 834bf215546Sopenharmony_ci new_deref = nir_build_deref_follower(&b, new_deref, 835bf215546Sopenharmony_ci path.path[i + 1]); 836bf215546Sopenharmony_ci } 837bf215546Sopenharmony_ci } 838bf215546Sopenharmony_ci assert(new_deref->type == deref->type); 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci /* Rewrite the deref source to point to the split one */ 841bf215546Sopenharmony_ci nir_instr_rewrite_src(&intrin->instr, &intrin->src[d], 842bf215546Sopenharmony_ci nir_src_for_ssa(&new_deref->dest.ssa)); 843bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(deref); 844bf215546Sopenharmony_ci } 845bf215546Sopenharmony_ci } 846bf215546Sopenharmony_ci } 847bf215546Sopenharmony_ci} 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci/** A pass for splitting arrays of vectors into multiple variables 850bf215546Sopenharmony_ci * 851bf215546Sopenharmony_ci * This pass looks at arrays (possibly multiple levels) of vectors (not 852bf215546Sopenharmony_ci * structures or other types) and tries to split them into piles of variables, 853bf215546Sopenharmony_ci * one for each array element. The heuristic used is simple: If a given array 854bf215546Sopenharmony_ci * level is never used with an indirect, that array level will get split. 855bf215546Sopenharmony_ci * 856bf215546Sopenharmony_ci * This pass probably could handles structures easily enough but making a pass 857bf215546Sopenharmony_ci * that could see through an array of structures of arrays would be difficult 858bf215546Sopenharmony_ci * so it's best to just run nir_split_struct_vars first. 859bf215546Sopenharmony_ci */ 860bf215546Sopenharmony_cibool 861bf215546Sopenharmony_cinir_split_array_vars(nir_shader *shader, nir_variable_mode modes) 862bf215546Sopenharmony_ci{ 863bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 864bf215546Sopenharmony_ci struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx); 865bf215546Sopenharmony_ci struct set *complex_vars = NULL; 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci bool has_global_array = false; 870bf215546Sopenharmony_ci if (modes & nir_var_shader_temp) { 871bf215546Sopenharmony_ci has_global_array = init_var_list_array_infos(shader, 872bf215546Sopenharmony_ci &shader->variables, 873bf215546Sopenharmony_ci nir_var_shader_temp, 874bf215546Sopenharmony_ci var_info_map, 875bf215546Sopenharmony_ci &complex_vars, 876bf215546Sopenharmony_ci mem_ctx); 877bf215546Sopenharmony_ci } 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci bool has_any_array = false; 880bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 881bf215546Sopenharmony_ci if (!function->impl) 882bf215546Sopenharmony_ci continue; 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci bool has_local_array = false; 885bf215546Sopenharmony_ci if (modes & nir_var_function_temp) { 886bf215546Sopenharmony_ci has_local_array = init_var_list_array_infos(shader, 887bf215546Sopenharmony_ci &function->impl->locals, 888bf215546Sopenharmony_ci nir_var_function_temp, 889bf215546Sopenharmony_ci var_info_map, 890bf215546Sopenharmony_ci &complex_vars, 891bf215546Sopenharmony_ci mem_ctx); 892bf215546Sopenharmony_ci } 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci if (has_global_array || has_local_array) { 895bf215546Sopenharmony_ci has_any_array = true; 896bf215546Sopenharmony_ci mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx); 897bf215546Sopenharmony_ci } 898bf215546Sopenharmony_ci } 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci /* If we failed to find any arrays of arrays, bail early. */ 901bf215546Sopenharmony_ci if (!has_any_array) { 902bf215546Sopenharmony_ci ralloc_free(mem_ctx); 903bf215546Sopenharmony_ci nir_shader_preserve_all_metadata(shader); 904bf215546Sopenharmony_ci return false; 905bf215546Sopenharmony_ci } 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_ci bool has_global_splits = false; 908bf215546Sopenharmony_ci if (modes & nir_var_shader_temp) { 909bf215546Sopenharmony_ci has_global_splits = split_var_list_arrays(shader, NULL, 910bf215546Sopenharmony_ci &shader->variables, 911bf215546Sopenharmony_ci nir_var_shader_temp, 912bf215546Sopenharmony_ci var_info_map, mem_ctx); 913bf215546Sopenharmony_ci } 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_ci bool progress = false; 916bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 917bf215546Sopenharmony_ci if (!function->impl) 918bf215546Sopenharmony_ci continue; 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci bool has_local_splits = false; 921bf215546Sopenharmony_ci if (modes & nir_var_function_temp) { 922bf215546Sopenharmony_ci has_local_splits = split_var_list_arrays(shader, function->impl, 923bf215546Sopenharmony_ci &function->impl->locals, 924bf215546Sopenharmony_ci nir_var_function_temp, 925bf215546Sopenharmony_ci var_info_map, mem_ctx); 926bf215546Sopenharmony_ci } 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci if (has_global_splits || has_local_splits) { 929bf215546Sopenharmony_ci split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx); 930bf215546Sopenharmony_ci split_array_access_impl(function->impl, var_info_map, modes, mem_ctx); 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_block_index | 933bf215546Sopenharmony_ci nir_metadata_dominance); 934bf215546Sopenharmony_ci progress = true; 935bf215546Sopenharmony_ci } else { 936bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_all); 937bf215546Sopenharmony_ci } 938bf215546Sopenharmony_ci } 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_ci ralloc_free(mem_ctx); 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci return progress; 943bf215546Sopenharmony_ci} 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_cistruct array_level_usage { 946bf215546Sopenharmony_ci unsigned array_len; 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci /* The value UINT_MAX will be used to indicate an indirect */ 949bf215546Sopenharmony_ci unsigned max_read; 950bf215546Sopenharmony_ci unsigned max_written; 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci /* True if there is a copy that isn't to/from a shrinkable array */ 953bf215546Sopenharmony_ci bool has_external_copy; 954bf215546Sopenharmony_ci struct set *levels_copied; 955bf215546Sopenharmony_ci}; 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_cistruct vec_var_usage { 958bf215546Sopenharmony_ci /* Convenience set of all components this variable has */ 959bf215546Sopenharmony_ci nir_component_mask_t all_comps; 960bf215546Sopenharmony_ci 961bf215546Sopenharmony_ci nir_component_mask_t comps_read; 962bf215546Sopenharmony_ci nir_component_mask_t comps_written; 963bf215546Sopenharmony_ci 964bf215546Sopenharmony_ci nir_component_mask_t comps_kept; 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci /* True if there is a copy that isn't to/from a shrinkable vector */ 967bf215546Sopenharmony_ci bool has_external_copy; 968bf215546Sopenharmony_ci bool has_complex_use; 969bf215546Sopenharmony_ci struct set *vars_copied; 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_ci unsigned num_levels; 972bf215546Sopenharmony_ci struct array_level_usage levels[0]; 973bf215546Sopenharmony_ci}; 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_cistatic struct vec_var_usage * 976bf215546Sopenharmony_ciget_vec_var_usage(nir_variable *var, 977bf215546Sopenharmony_ci struct hash_table *var_usage_map, 978bf215546Sopenharmony_ci bool add_usage_entry, void *mem_ctx) 979bf215546Sopenharmony_ci{ 980bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var); 981bf215546Sopenharmony_ci if (entry) 982bf215546Sopenharmony_ci return entry->data; 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci if (!add_usage_entry) 985bf215546Sopenharmony_ci return NULL; 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_ci /* Check to make sure that we are working with an array of vectors. We 988bf215546Sopenharmony_ci * don't bother to shrink single vectors because we figure that we can 989bf215546Sopenharmony_ci * clean it up better with SSA than by inserting piles of vecN instructions 990bf215546Sopenharmony_ci * to compact results. 991bf215546Sopenharmony_ci */ 992bf215546Sopenharmony_ci int num_levels = num_array_levels_in_array_of_vector_type(var->type); 993bf215546Sopenharmony_ci if (num_levels < 1) 994bf215546Sopenharmony_ci return NULL; /* Not an array of vectors */ 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci struct vec_var_usage *usage = 997bf215546Sopenharmony_ci rzalloc_size(mem_ctx, sizeof(*usage) + 998bf215546Sopenharmony_ci num_levels * sizeof(usage->levels[0])); 999bf215546Sopenharmony_ci 1000bf215546Sopenharmony_ci usage->num_levels = num_levels; 1001bf215546Sopenharmony_ci const struct glsl_type *type = var->type; 1002bf215546Sopenharmony_ci for (unsigned i = 0; i < num_levels; i++) { 1003bf215546Sopenharmony_ci usage->levels[i].array_len = glsl_get_length(type); 1004bf215546Sopenharmony_ci type = glsl_get_array_element(type); 1005bf215546Sopenharmony_ci } 1006bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(type)); 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci usage->all_comps = (1 << glsl_get_components(type)) - 1; 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_ci _mesa_hash_table_insert(var_usage_map, var, usage); 1011bf215546Sopenharmony_ci 1012bf215546Sopenharmony_ci return usage; 1013bf215546Sopenharmony_ci} 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_cistatic struct vec_var_usage * 1016bf215546Sopenharmony_ciget_vec_deref_usage(nir_deref_instr *deref, 1017bf215546Sopenharmony_ci struct hash_table *var_usage_map, 1018bf215546Sopenharmony_ci nir_variable_mode modes, 1019bf215546Sopenharmony_ci bool add_usage_entry, void *mem_ctx) 1020bf215546Sopenharmony_ci{ 1021bf215546Sopenharmony_ci if (!nir_deref_mode_may_be(deref, modes)) 1022bf215546Sopenharmony_ci return NULL; 1023bf215546Sopenharmony_ci 1024bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(deref); 1025bf215546Sopenharmony_ci if (var == NULL) 1026bf215546Sopenharmony_ci return NULL; 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci return get_vec_var_usage(nir_deref_instr_get_variable(deref), 1029bf215546Sopenharmony_ci var_usage_map, add_usage_entry, mem_ctx); 1030bf215546Sopenharmony_ci} 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_cistatic void 1033bf215546Sopenharmony_cimark_deref_if_complex(nir_deref_instr *deref, 1034bf215546Sopenharmony_ci struct hash_table *var_usage_map, 1035bf215546Sopenharmony_ci nir_variable_mode modes, 1036bf215546Sopenharmony_ci void *mem_ctx) 1037bf215546Sopenharmony_ci{ 1038bf215546Sopenharmony_ci /* Only bother with var derefs because nir_deref_instr_has_complex_use is 1039bf215546Sopenharmony_ci * recursive. 1040bf215546Sopenharmony_ci */ 1041bf215546Sopenharmony_ci if (deref->deref_type != nir_deref_type_var) 1042bf215546Sopenharmony_ci return; 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_ci if (!(deref->var->data.mode & modes)) 1045bf215546Sopenharmony_ci return; 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci if (!nir_deref_instr_has_complex_use(deref, 0)) 1048bf215546Sopenharmony_ci return; 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_ci struct vec_var_usage *usage = 1051bf215546Sopenharmony_ci get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx); 1052bf215546Sopenharmony_ci if (!usage) 1053bf215546Sopenharmony_ci return; 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_ci usage->has_complex_use = true; 1056bf215546Sopenharmony_ci} 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_cistatic void 1059bf215546Sopenharmony_cimark_deref_used(nir_deref_instr *deref, 1060bf215546Sopenharmony_ci nir_component_mask_t comps_read, 1061bf215546Sopenharmony_ci nir_component_mask_t comps_written, 1062bf215546Sopenharmony_ci nir_deref_instr *copy_deref, 1063bf215546Sopenharmony_ci struct hash_table *var_usage_map, 1064bf215546Sopenharmony_ci nir_variable_mode modes, 1065bf215546Sopenharmony_ci void *mem_ctx) 1066bf215546Sopenharmony_ci{ 1067bf215546Sopenharmony_ci if (!nir_deref_mode_may_be(deref, modes)) 1068bf215546Sopenharmony_ci return; 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(deref); 1071bf215546Sopenharmony_ci if (var == NULL) 1072bf215546Sopenharmony_ci return; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci struct vec_var_usage *usage = 1075bf215546Sopenharmony_ci get_vec_var_usage(var, var_usage_map, true, mem_ctx); 1076bf215546Sopenharmony_ci if (!usage) 1077bf215546Sopenharmony_ci return; 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci usage->comps_read |= comps_read & usage->all_comps; 1080bf215546Sopenharmony_ci usage->comps_written |= comps_written & usage->all_comps; 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci struct vec_var_usage *copy_usage = NULL; 1083bf215546Sopenharmony_ci if (copy_deref) { 1084bf215546Sopenharmony_ci copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes, 1085bf215546Sopenharmony_ci true, mem_ctx); 1086bf215546Sopenharmony_ci if (copy_usage) { 1087bf215546Sopenharmony_ci if (usage->vars_copied == NULL) { 1088bf215546Sopenharmony_ci usage->vars_copied = _mesa_pointer_set_create(mem_ctx); 1089bf215546Sopenharmony_ci } 1090bf215546Sopenharmony_ci _mesa_set_add(usage->vars_copied, copy_usage); 1091bf215546Sopenharmony_ci } else { 1092bf215546Sopenharmony_ci usage->has_external_copy = true; 1093bf215546Sopenharmony_ci } 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci nir_deref_path path; 1097bf215546Sopenharmony_ci nir_deref_path_init(&path, deref, mem_ctx); 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci nir_deref_path copy_path; 1100bf215546Sopenharmony_ci if (copy_usage) 1101bf215546Sopenharmony_ci nir_deref_path_init(©_path, copy_deref, mem_ctx); 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci unsigned copy_i = 0; 1104bf215546Sopenharmony_ci for (unsigned i = 0; i < usage->num_levels; i++) { 1105bf215546Sopenharmony_ci struct array_level_usage *level = &usage->levels[i]; 1106bf215546Sopenharmony_ci nir_deref_instr *deref = path.path[i + 1]; 1107bf215546Sopenharmony_ci assert(deref->deref_type == nir_deref_type_array || 1108bf215546Sopenharmony_ci deref->deref_type == nir_deref_type_array_wildcard); 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_ci unsigned max_used; 1111bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_array) { 1112bf215546Sopenharmony_ci max_used = nir_src_is_const(deref->arr.index) ? 1113bf215546Sopenharmony_ci nir_src_as_uint(deref->arr.index) : UINT_MAX; 1114bf215546Sopenharmony_ci } else { 1115bf215546Sopenharmony_ci /* For wildcards, we read or wrote the whole thing. */ 1116bf215546Sopenharmony_ci assert(deref->deref_type == nir_deref_type_array_wildcard); 1117bf215546Sopenharmony_ci max_used = level->array_len - 1; 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci if (copy_usage) { 1120bf215546Sopenharmony_ci /* Match each wildcard level with the level on copy_usage */ 1121bf215546Sopenharmony_ci for (; copy_path.path[copy_i + 1]; copy_i++) { 1122bf215546Sopenharmony_ci if (copy_path.path[copy_i + 1]->deref_type == 1123bf215546Sopenharmony_ci nir_deref_type_array_wildcard) 1124bf215546Sopenharmony_ci break; 1125bf215546Sopenharmony_ci } 1126bf215546Sopenharmony_ci struct array_level_usage *copy_level = 1127bf215546Sopenharmony_ci ©_usage->levels[copy_i++]; 1128bf215546Sopenharmony_ci 1129bf215546Sopenharmony_ci if (level->levels_copied == NULL) { 1130bf215546Sopenharmony_ci level->levels_copied = _mesa_pointer_set_create(mem_ctx); 1131bf215546Sopenharmony_ci } 1132bf215546Sopenharmony_ci _mesa_set_add(level->levels_copied, copy_level); 1133bf215546Sopenharmony_ci } else { 1134bf215546Sopenharmony_ci /* We have a wildcard and it comes from a variable we aren't 1135bf215546Sopenharmony_ci * tracking; flag it and we'll know to not shorten this array. 1136bf215546Sopenharmony_ci */ 1137bf215546Sopenharmony_ci level->has_external_copy = true; 1138bf215546Sopenharmony_ci } 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci if (comps_written) 1142bf215546Sopenharmony_ci level->max_written = MAX2(level->max_written, max_used); 1143bf215546Sopenharmony_ci if (comps_read) 1144bf215546Sopenharmony_ci level->max_read = MAX2(level->max_read, max_used); 1145bf215546Sopenharmony_ci } 1146bf215546Sopenharmony_ci} 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_cistatic bool 1149bf215546Sopenharmony_cisrc_is_load_deref(nir_src src, nir_src deref_src) 1150bf215546Sopenharmony_ci{ 1151bf215546Sopenharmony_ci nir_intrinsic_instr *load = nir_src_as_intrinsic(src); 1152bf215546Sopenharmony_ci if (load == NULL || load->intrinsic != nir_intrinsic_load_deref) 1153bf215546Sopenharmony_ci return false; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci assert(load->src[0].is_ssa); 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci return load->src[0].ssa == deref_src.ssa; 1158bf215546Sopenharmony_ci} 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_ci/* Returns all non-self-referential components of a store instruction. A 1161bf215546Sopenharmony_ci * component is self-referential if it comes from the same component of a load 1162bf215546Sopenharmony_ci * instruction on the same deref. If the only data in a particular component 1163bf215546Sopenharmony_ci * of a variable came directly from that component then it's undefined. The 1164bf215546Sopenharmony_ci * only way to get defined data into a component of a variable is for it to 1165bf215546Sopenharmony_ci * get written there by something outside or from a different component. 1166bf215546Sopenharmony_ci * 1167bf215546Sopenharmony_ci * This is a fairly common pattern in shaders that come from either GLSL IR or 1168bf215546Sopenharmony_ci * GLSLang because both glsl_to_nir and GLSLang implement write-masking with 1169bf215546Sopenharmony_ci * load-vec-store. 1170bf215546Sopenharmony_ci */ 1171bf215546Sopenharmony_cistatic nir_component_mask_t 1172bf215546Sopenharmony_ciget_non_self_referential_store_comps(nir_intrinsic_instr *store) 1173bf215546Sopenharmony_ci{ 1174bf215546Sopenharmony_ci nir_component_mask_t comps = nir_intrinsic_write_mask(store); 1175bf215546Sopenharmony_ci 1176bf215546Sopenharmony_ci assert(store->src[1].is_ssa); 1177bf215546Sopenharmony_ci nir_instr *src_instr = store->src[1].ssa->parent_instr; 1178bf215546Sopenharmony_ci if (src_instr->type != nir_instr_type_alu) 1179bf215546Sopenharmony_ci return comps; 1180bf215546Sopenharmony_ci 1181bf215546Sopenharmony_ci nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_ci if (src_alu->op == nir_op_mov) { 1184bf215546Sopenharmony_ci /* If it's just a swizzle of a load from the same deref, discount any 1185bf215546Sopenharmony_ci * channels that don't move in the swizzle. 1186bf215546Sopenharmony_ci */ 1187bf215546Sopenharmony_ci if (src_is_load_deref(src_alu->src[0].src, store->src[0])) { 1188bf215546Sopenharmony_ci for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { 1189bf215546Sopenharmony_ci if (src_alu->src[0].swizzle[i] == i) 1190bf215546Sopenharmony_ci comps &= ~(1u << i); 1191bf215546Sopenharmony_ci } 1192bf215546Sopenharmony_ci } 1193bf215546Sopenharmony_ci } else if (nir_op_is_vec(src_alu->op)) { 1194bf215546Sopenharmony_ci /* If it's a vec, discount any channels that are just loads from the 1195bf215546Sopenharmony_ci * same deref put in the same spot. 1196bf215546Sopenharmony_ci */ 1197bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) { 1198bf215546Sopenharmony_ci if (src_is_load_deref(src_alu->src[i].src, store->src[0]) && 1199bf215546Sopenharmony_ci src_alu->src[i].swizzle[0] == i) 1200bf215546Sopenharmony_ci comps &= ~(1u << i); 1201bf215546Sopenharmony_ci } 1202bf215546Sopenharmony_ci } 1203bf215546Sopenharmony_ci 1204bf215546Sopenharmony_ci return comps; 1205bf215546Sopenharmony_ci} 1206bf215546Sopenharmony_ci 1207bf215546Sopenharmony_cistatic void 1208bf215546Sopenharmony_cifind_used_components_impl(nir_function_impl *impl, 1209bf215546Sopenharmony_ci struct hash_table *var_usage_map, 1210bf215546Sopenharmony_ci nir_variable_mode modes, 1211bf215546Sopenharmony_ci void *mem_ctx) 1212bf215546Sopenharmony_ci{ 1213bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 1214bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 1215bf215546Sopenharmony_ci if (instr->type == nir_instr_type_deref) { 1216bf215546Sopenharmony_ci mark_deref_if_complex(nir_instr_as_deref(instr), 1217bf215546Sopenharmony_ci var_usage_map, modes, mem_ctx); 1218bf215546Sopenharmony_ci } 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1221bf215546Sopenharmony_ci continue; 1222bf215546Sopenharmony_ci 1223bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1224bf215546Sopenharmony_ci switch (intrin->intrinsic) { 1225bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 1226bf215546Sopenharmony_ci mark_deref_used(nir_src_as_deref(intrin->src[0]), 1227bf215546Sopenharmony_ci nir_ssa_def_components_read(&intrin->dest.ssa), 0, 1228bf215546Sopenharmony_ci NULL, var_usage_map, modes, mem_ctx); 1229bf215546Sopenharmony_ci break; 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci case nir_intrinsic_store_deref: 1232bf215546Sopenharmony_ci mark_deref_used(nir_src_as_deref(intrin->src[0]), 1233bf215546Sopenharmony_ci 0, get_non_self_referential_store_comps(intrin), 1234bf215546Sopenharmony_ci NULL, var_usage_map, modes, mem_ctx); 1235bf215546Sopenharmony_ci break; 1236bf215546Sopenharmony_ci 1237bf215546Sopenharmony_ci case nir_intrinsic_copy_deref: { 1238bf215546Sopenharmony_ci /* Just mark everything used for copies. */ 1239bf215546Sopenharmony_ci nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 1240bf215546Sopenharmony_ci nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 1241bf215546Sopenharmony_ci mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx); 1242bf215546Sopenharmony_ci mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx); 1243bf215546Sopenharmony_ci break; 1244bf215546Sopenharmony_ci } 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_ci default: 1247bf215546Sopenharmony_ci break; 1248bf215546Sopenharmony_ci } 1249bf215546Sopenharmony_ci } 1250bf215546Sopenharmony_ci } 1251bf215546Sopenharmony_ci} 1252bf215546Sopenharmony_ci 1253bf215546Sopenharmony_cistatic bool 1254bf215546Sopenharmony_cishrink_vec_var_list(struct exec_list *vars, 1255bf215546Sopenharmony_ci nir_variable_mode mode, 1256bf215546Sopenharmony_ci struct hash_table *var_usage_map) 1257bf215546Sopenharmony_ci{ 1258bf215546Sopenharmony_ci /* Initialize the components kept field of each variable. This is the 1259bf215546Sopenharmony_ci * AND of the components written and components read. If a component is 1260bf215546Sopenharmony_ci * written but never read, it's dead. If it is read but never written, 1261bf215546Sopenharmony_ci * then all values read are undefined garbage and we may as well not read 1262bf215546Sopenharmony_ci * them. 1263bf215546Sopenharmony_ci * 1264bf215546Sopenharmony_ci * The same logic applies to the array length. We make the array length 1265bf215546Sopenharmony_ci * the minimum needed required length between read and write and plan to 1266bf215546Sopenharmony_ci * discard any OOB access. The one exception here is indirect writes 1267bf215546Sopenharmony_ci * because we don't know where they will land and we can't shrink an array 1268bf215546Sopenharmony_ci * with indirect writes because previously in-bounds writes may become 1269bf215546Sopenharmony_ci * out-of-bounds and have undefined behavior. 1270bf215546Sopenharmony_ci * 1271bf215546Sopenharmony_ci * Also, if we have a copy that to/from something we can't shrink, we need 1272bf215546Sopenharmony_ci * to leave components and array_len of any wildcards alone. 1273bf215546Sopenharmony_ci */ 1274bf215546Sopenharmony_ci nir_foreach_variable_in_list(var, vars) { 1275bf215546Sopenharmony_ci if (var->data.mode != mode) 1276bf215546Sopenharmony_ci continue; 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci struct vec_var_usage *usage = 1279bf215546Sopenharmony_ci get_vec_var_usage(var, var_usage_map, false, NULL); 1280bf215546Sopenharmony_ci if (!usage) 1281bf215546Sopenharmony_ci continue; 1282bf215546Sopenharmony_ci 1283bf215546Sopenharmony_ci assert(usage->comps_kept == 0); 1284bf215546Sopenharmony_ci if (usage->has_external_copy || usage->has_complex_use) 1285bf215546Sopenharmony_ci usage->comps_kept = usage->all_comps; 1286bf215546Sopenharmony_ci else 1287bf215546Sopenharmony_ci usage->comps_kept = usage->comps_read & usage->comps_written; 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci for (unsigned i = 0; i < usage->num_levels; i++) { 1290bf215546Sopenharmony_ci struct array_level_usage *level = &usage->levels[i]; 1291bf215546Sopenharmony_ci assert(level->array_len > 0); 1292bf215546Sopenharmony_ci 1293bf215546Sopenharmony_ci if (level->max_written == UINT_MAX || level->has_external_copy || 1294bf215546Sopenharmony_ci usage->has_complex_use) 1295bf215546Sopenharmony_ci continue; /* Can't shrink */ 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_ci unsigned max_used = MIN2(level->max_read, level->max_written); 1298bf215546Sopenharmony_ci level->array_len = MIN2(max_used, level->array_len - 1) + 1; 1299bf215546Sopenharmony_ci } 1300bf215546Sopenharmony_ci } 1301bf215546Sopenharmony_ci 1302bf215546Sopenharmony_ci /* In order for variable copies to work, we have to have the same data type 1303bf215546Sopenharmony_ci * on the source and the destination. In order to satisfy this, we run a 1304bf215546Sopenharmony_ci * little fixed-point algorithm to transitively ensure that we get enough 1305bf215546Sopenharmony_ci * components and array elements for this to hold for all copies. 1306bf215546Sopenharmony_ci */ 1307bf215546Sopenharmony_ci bool fp_progress; 1308bf215546Sopenharmony_ci do { 1309bf215546Sopenharmony_ci fp_progress = false; 1310bf215546Sopenharmony_ci nir_foreach_variable_in_list(var, vars) { 1311bf215546Sopenharmony_ci if (var->data.mode != mode) 1312bf215546Sopenharmony_ci continue; 1313bf215546Sopenharmony_ci 1314bf215546Sopenharmony_ci struct vec_var_usage *var_usage = 1315bf215546Sopenharmony_ci get_vec_var_usage(var, var_usage_map, false, NULL); 1316bf215546Sopenharmony_ci if (!var_usage || !var_usage->vars_copied) 1317bf215546Sopenharmony_ci continue; 1318bf215546Sopenharmony_ci 1319bf215546Sopenharmony_ci set_foreach(var_usage->vars_copied, copy_entry) { 1320bf215546Sopenharmony_ci struct vec_var_usage *copy_usage = (void *)copy_entry->key; 1321bf215546Sopenharmony_ci if (copy_usage->comps_kept != var_usage->comps_kept) { 1322bf215546Sopenharmony_ci nir_component_mask_t comps_kept = 1323bf215546Sopenharmony_ci (var_usage->comps_kept | copy_usage->comps_kept); 1324bf215546Sopenharmony_ci var_usage->comps_kept = comps_kept; 1325bf215546Sopenharmony_ci copy_usage->comps_kept = comps_kept; 1326bf215546Sopenharmony_ci fp_progress = true; 1327bf215546Sopenharmony_ci } 1328bf215546Sopenharmony_ci } 1329bf215546Sopenharmony_ci 1330bf215546Sopenharmony_ci for (unsigned i = 0; i < var_usage->num_levels; i++) { 1331bf215546Sopenharmony_ci struct array_level_usage *var_level = &var_usage->levels[i]; 1332bf215546Sopenharmony_ci if (!var_level->levels_copied) 1333bf215546Sopenharmony_ci continue; 1334bf215546Sopenharmony_ci 1335bf215546Sopenharmony_ci set_foreach(var_level->levels_copied, copy_entry) { 1336bf215546Sopenharmony_ci struct array_level_usage *copy_level = (void *)copy_entry->key; 1337bf215546Sopenharmony_ci if (var_level->array_len != copy_level->array_len) { 1338bf215546Sopenharmony_ci unsigned array_len = 1339bf215546Sopenharmony_ci MAX2(var_level->array_len, copy_level->array_len); 1340bf215546Sopenharmony_ci var_level->array_len = array_len; 1341bf215546Sopenharmony_ci copy_level->array_len = array_len; 1342bf215546Sopenharmony_ci fp_progress = true; 1343bf215546Sopenharmony_ci } 1344bf215546Sopenharmony_ci } 1345bf215546Sopenharmony_ci } 1346bf215546Sopenharmony_ci } 1347bf215546Sopenharmony_ci } while (fp_progress); 1348bf215546Sopenharmony_ci 1349bf215546Sopenharmony_ci bool vars_shrunk = false; 1350bf215546Sopenharmony_ci nir_foreach_variable_in_list_safe(var, vars) { 1351bf215546Sopenharmony_ci if (var->data.mode != mode) 1352bf215546Sopenharmony_ci continue; 1353bf215546Sopenharmony_ci 1354bf215546Sopenharmony_ci struct vec_var_usage *usage = 1355bf215546Sopenharmony_ci get_vec_var_usage(var, var_usage_map, false, NULL); 1356bf215546Sopenharmony_ci if (!usage) 1357bf215546Sopenharmony_ci continue; 1358bf215546Sopenharmony_ci 1359bf215546Sopenharmony_ci bool shrunk = false; 1360bf215546Sopenharmony_ci const struct glsl_type *vec_type = var->type; 1361bf215546Sopenharmony_ci for (unsigned i = 0; i < usage->num_levels; i++) { 1362bf215546Sopenharmony_ci /* If we've reduced the array to zero elements at some level, just 1363bf215546Sopenharmony_ci * set comps_kept to 0 and delete the variable. 1364bf215546Sopenharmony_ci */ 1365bf215546Sopenharmony_ci if (usage->levels[i].array_len == 0) { 1366bf215546Sopenharmony_ci usage->comps_kept = 0; 1367bf215546Sopenharmony_ci break; 1368bf215546Sopenharmony_ci } 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci assert(usage->levels[i].array_len <= glsl_get_length(vec_type)); 1371bf215546Sopenharmony_ci if (usage->levels[i].array_len < glsl_get_length(vec_type)) 1372bf215546Sopenharmony_ci shrunk = true; 1373bf215546Sopenharmony_ci vec_type = glsl_get_array_element(vec_type); 1374bf215546Sopenharmony_ci } 1375bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(vec_type)); 1376bf215546Sopenharmony_ci 1377bf215546Sopenharmony_ci assert(usage->comps_kept == (usage->comps_kept & usage->all_comps)); 1378bf215546Sopenharmony_ci if (usage->comps_kept != usage->all_comps) 1379bf215546Sopenharmony_ci shrunk = true; 1380bf215546Sopenharmony_ci 1381bf215546Sopenharmony_ci if (usage->comps_kept == 0) { 1382bf215546Sopenharmony_ci /* This variable is dead, remove it */ 1383bf215546Sopenharmony_ci vars_shrunk = true; 1384bf215546Sopenharmony_ci exec_node_remove(&var->node); 1385bf215546Sopenharmony_ci continue; 1386bf215546Sopenharmony_ci } 1387bf215546Sopenharmony_ci 1388bf215546Sopenharmony_ci if (!shrunk) { 1389bf215546Sopenharmony_ci /* This variable doesn't need to be shrunk. Remove it from the 1390bf215546Sopenharmony_ci * hash table so later steps will ignore it. 1391bf215546Sopenharmony_ci */ 1392bf215546Sopenharmony_ci _mesa_hash_table_remove_key(var_usage_map, var); 1393bf215546Sopenharmony_ci continue; 1394bf215546Sopenharmony_ci } 1395bf215546Sopenharmony_ci 1396bf215546Sopenharmony_ci /* Build the new var type */ 1397bf215546Sopenharmony_ci unsigned new_num_comps = util_bitcount(usage->comps_kept); 1398bf215546Sopenharmony_ci const struct glsl_type *new_type = 1399bf215546Sopenharmony_ci glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps); 1400bf215546Sopenharmony_ci for (int i = usage->num_levels - 1; i >= 0; i--) { 1401bf215546Sopenharmony_ci assert(usage->levels[i].array_len > 0); 1402bf215546Sopenharmony_ci /* If the original type was a matrix type, we'd like to keep that so 1403bf215546Sopenharmony_ci * we don't convert matrices into arrays. 1404bf215546Sopenharmony_ci */ 1405bf215546Sopenharmony_ci if (i == usage->num_levels - 1 && 1406bf215546Sopenharmony_ci glsl_type_is_matrix(glsl_without_array(var->type)) && 1407bf215546Sopenharmony_ci new_num_comps > 1 && usage->levels[i].array_len > 1) { 1408bf215546Sopenharmony_ci new_type = glsl_matrix_type(glsl_get_base_type(new_type), 1409bf215546Sopenharmony_ci new_num_comps, 1410bf215546Sopenharmony_ci usage->levels[i].array_len); 1411bf215546Sopenharmony_ci } else { 1412bf215546Sopenharmony_ci new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0); 1413bf215546Sopenharmony_ci } 1414bf215546Sopenharmony_ci } 1415bf215546Sopenharmony_ci var->type = new_type; 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_ci vars_shrunk = true; 1418bf215546Sopenharmony_ci } 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci return vars_shrunk; 1421bf215546Sopenharmony_ci} 1422bf215546Sopenharmony_ci 1423bf215546Sopenharmony_cistatic bool 1424bf215546Sopenharmony_civec_deref_is_oob(nir_deref_instr *deref, 1425bf215546Sopenharmony_ci struct vec_var_usage *usage) 1426bf215546Sopenharmony_ci{ 1427bf215546Sopenharmony_ci nir_deref_path path; 1428bf215546Sopenharmony_ci nir_deref_path_init(&path, deref, NULL); 1429bf215546Sopenharmony_ci 1430bf215546Sopenharmony_ci bool oob = false; 1431bf215546Sopenharmony_ci for (unsigned i = 0; i < usage->num_levels; i++) { 1432bf215546Sopenharmony_ci nir_deref_instr *p = path.path[i + 1]; 1433bf215546Sopenharmony_ci if (p->deref_type == nir_deref_type_array_wildcard) 1434bf215546Sopenharmony_ci continue; 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_ci if (nir_src_is_const(p->arr.index) && 1437bf215546Sopenharmony_ci nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) { 1438bf215546Sopenharmony_ci oob = true; 1439bf215546Sopenharmony_ci break; 1440bf215546Sopenharmony_ci } 1441bf215546Sopenharmony_ci } 1442bf215546Sopenharmony_ci 1443bf215546Sopenharmony_ci nir_deref_path_finish(&path); 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci return oob; 1446bf215546Sopenharmony_ci} 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_cistatic bool 1449bf215546Sopenharmony_civec_deref_is_dead_or_oob(nir_deref_instr *deref, 1450bf215546Sopenharmony_ci struct hash_table *var_usage_map, 1451bf215546Sopenharmony_ci nir_variable_mode modes) 1452bf215546Sopenharmony_ci{ 1453bf215546Sopenharmony_ci struct vec_var_usage *usage = 1454bf215546Sopenharmony_ci get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); 1455bf215546Sopenharmony_ci if (!usage) 1456bf215546Sopenharmony_ci return false; 1457bf215546Sopenharmony_ci 1458bf215546Sopenharmony_ci return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage); 1459bf215546Sopenharmony_ci} 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_cistatic void 1462bf215546Sopenharmony_cishrink_vec_var_access_impl(nir_function_impl *impl, 1463bf215546Sopenharmony_ci struct hash_table *var_usage_map, 1464bf215546Sopenharmony_ci nir_variable_mode modes) 1465bf215546Sopenharmony_ci{ 1466bf215546Sopenharmony_ci nir_builder b; 1467bf215546Sopenharmony_ci nir_builder_init(&b, impl); 1468bf215546Sopenharmony_ci 1469bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 1470bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 1471bf215546Sopenharmony_ci switch (instr->type) { 1472bf215546Sopenharmony_ci case nir_instr_type_deref: { 1473bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 1474bf215546Sopenharmony_ci if (!nir_deref_mode_may_be(deref, modes)) 1475bf215546Sopenharmony_ci break; 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci /* Clean up any dead derefs we find lying around. They may refer 1478bf215546Sopenharmony_ci * to variables we've deleted. 1479bf215546Sopenharmony_ci */ 1480bf215546Sopenharmony_ci if (nir_deref_instr_remove_if_unused(deref)) 1481bf215546Sopenharmony_ci break; 1482bf215546Sopenharmony_ci 1483bf215546Sopenharmony_ci /* Update the type in the deref to keep the types consistent as 1484bf215546Sopenharmony_ci * you walk down the chain. We don't need to check if this is one 1485bf215546Sopenharmony_ci * of the derefs we're shrinking because this is a no-op if it 1486bf215546Sopenharmony_ci * isn't. The worst that could happen is that we accidentally fix 1487bf215546Sopenharmony_ci * an invalid deref. 1488bf215546Sopenharmony_ci */ 1489bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var) { 1490bf215546Sopenharmony_ci deref->type = deref->var->type; 1491bf215546Sopenharmony_ci } else if (deref->deref_type == nir_deref_type_array || 1492bf215546Sopenharmony_ci deref->deref_type == nir_deref_type_array_wildcard) { 1493bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(deref); 1494bf215546Sopenharmony_ci assert(glsl_type_is_array(parent->type) || 1495bf215546Sopenharmony_ci glsl_type_is_matrix(parent->type)); 1496bf215546Sopenharmony_ci deref->type = glsl_get_array_element(parent->type); 1497bf215546Sopenharmony_ci } 1498bf215546Sopenharmony_ci break; 1499bf215546Sopenharmony_ci } 1500bf215546Sopenharmony_ci 1501bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 1502bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1503bf215546Sopenharmony_ci 1504bf215546Sopenharmony_ci /* If we have a copy whose source or destination has been deleted 1505bf215546Sopenharmony_ci * because we determined the variable was dead, then we just 1506bf215546Sopenharmony_ci * delete the copy instruction. If the source variable was dead 1507bf215546Sopenharmony_ci * then it was writing undefined garbage anyway and if it's the 1508bf215546Sopenharmony_ci * destination variable that's dead then the write isn't needed. 1509bf215546Sopenharmony_ci */ 1510bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_copy_deref) { 1511bf215546Sopenharmony_ci nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 1512bf215546Sopenharmony_ci nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 1513bf215546Sopenharmony_ci if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) || 1514bf215546Sopenharmony_ci vec_deref_is_dead_or_oob(src, var_usage_map, modes)) { 1515bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 1516bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(dst); 1517bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(src); 1518bf215546Sopenharmony_ci } 1519bf215546Sopenharmony_ci continue; 1520bf215546Sopenharmony_ci } 1521bf215546Sopenharmony_ci 1522bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_load_deref && 1523bf215546Sopenharmony_ci intrin->intrinsic != nir_intrinsic_store_deref) 1524bf215546Sopenharmony_ci continue; 1525bf215546Sopenharmony_ci 1526bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1527bf215546Sopenharmony_ci if (!nir_deref_mode_may_be(deref, modes)) 1528bf215546Sopenharmony_ci continue; 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci struct vec_var_usage *usage = 1531bf215546Sopenharmony_ci get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); 1532bf215546Sopenharmony_ci if (!usage) 1533bf215546Sopenharmony_ci continue; 1534bf215546Sopenharmony_ci 1535bf215546Sopenharmony_ci if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) { 1536bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref) { 1537bf215546Sopenharmony_ci nir_ssa_def *u = 1538bf215546Sopenharmony_ci nir_ssa_undef(&b, intrin->dest.ssa.num_components, 1539bf215546Sopenharmony_ci intrin->dest.ssa.bit_size); 1540bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 1541bf215546Sopenharmony_ci u); 1542bf215546Sopenharmony_ci } 1543bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 1544bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(deref); 1545bf215546Sopenharmony_ci continue; 1546bf215546Sopenharmony_ci } 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci /* If we're not dropping any components, there's no need to 1549bf215546Sopenharmony_ci * compact vectors. 1550bf215546Sopenharmony_ci */ 1551bf215546Sopenharmony_ci if (usage->comps_kept == usage->all_comps) 1552bf215546Sopenharmony_ci continue; 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref) { 1555bf215546Sopenharmony_ci b.cursor = nir_after_instr(&intrin->instr); 1556bf215546Sopenharmony_ci 1557bf215546Sopenharmony_ci nir_ssa_def *undef = 1558bf215546Sopenharmony_ci nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size); 1559bf215546Sopenharmony_ci nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS]; 1560bf215546Sopenharmony_ci unsigned c = 0; 1561bf215546Sopenharmony_ci for (unsigned i = 0; i < intrin->num_components; i++) { 1562bf215546Sopenharmony_ci if (usage->comps_kept & (1u << i)) 1563bf215546Sopenharmony_ci vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++); 1564bf215546Sopenharmony_ci else 1565bf215546Sopenharmony_ci vec_srcs[i] = undef; 1566bf215546Sopenharmony_ci } 1567bf215546Sopenharmony_ci nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components); 1568bf215546Sopenharmony_ci 1569bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, 1570bf215546Sopenharmony_ci vec, 1571bf215546Sopenharmony_ci vec->parent_instr); 1572bf215546Sopenharmony_ci 1573bf215546Sopenharmony_ci /* The SSA def is now only used by the swizzle. It's safe to 1574bf215546Sopenharmony_ci * shrink the number of components. 1575bf215546Sopenharmony_ci */ 1576bf215546Sopenharmony_ci assert(list_length(&intrin->dest.ssa.uses) == c); 1577bf215546Sopenharmony_ci intrin->num_components = c; 1578bf215546Sopenharmony_ci intrin->dest.ssa.num_components = c; 1579bf215546Sopenharmony_ci } else { 1580bf215546Sopenharmony_ci nir_component_mask_t write_mask = 1581bf215546Sopenharmony_ci nir_intrinsic_write_mask(intrin); 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_ci unsigned swizzle[NIR_MAX_VEC_COMPONENTS]; 1584bf215546Sopenharmony_ci nir_component_mask_t new_write_mask = 0; 1585bf215546Sopenharmony_ci unsigned c = 0; 1586bf215546Sopenharmony_ci for (unsigned i = 0; i < intrin->num_components; i++) { 1587bf215546Sopenharmony_ci if (usage->comps_kept & (1u << i)) { 1588bf215546Sopenharmony_ci swizzle[c] = i; 1589bf215546Sopenharmony_ci if (write_mask & (1u << i)) 1590bf215546Sopenharmony_ci new_write_mask |= 1u << c; 1591bf215546Sopenharmony_ci c++; 1592bf215546Sopenharmony_ci } 1593bf215546Sopenharmony_ci } 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci b.cursor = nir_before_instr(&intrin->instr); 1596bf215546Sopenharmony_ci 1597bf215546Sopenharmony_ci nir_ssa_def *swizzled = 1598bf215546Sopenharmony_ci nir_swizzle(&b, intrin->src[1].ssa, swizzle, c); 1599bf215546Sopenharmony_ci 1600bf215546Sopenharmony_ci /* Rewrite to use the compacted source */ 1601bf215546Sopenharmony_ci nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], 1602bf215546Sopenharmony_ci nir_src_for_ssa(swizzled)); 1603bf215546Sopenharmony_ci nir_intrinsic_set_write_mask(intrin, new_write_mask); 1604bf215546Sopenharmony_ci intrin->num_components = c; 1605bf215546Sopenharmony_ci } 1606bf215546Sopenharmony_ci break; 1607bf215546Sopenharmony_ci } 1608bf215546Sopenharmony_ci 1609bf215546Sopenharmony_ci default: 1610bf215546Sopenharmony_ci break; 1611bf215546Sopenharmony_ci } 1612bf215546Sopenharmony_ci } 1613bf215546Sopenharmony_ci } 1614bf215546Sopenharmony_ci} 1615bf215546Sopenharmony_ci 1616bf215546Sopenharmony_cistatic bool 1617bf215546Sopenharmony_cifunction_impl_has_vars_with_modes(nir_function_impl *impl, 1618bf215546Sopenharmony_ci nir_variable_mode modes) 1619bf215546Sopenharmony_ci{ 1620bf215546Sopenharmony_ci nir_shader *shader = impl->function->shader; 1621bf215546Sopenharmony_ci 1622bf215546Sopenharmony_ci if (modes & ~nir_var_function_temp) { 1623bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, 1624bf215546Sopenharmony_ci modes & ~nir_var_function_temp) 1625bf215546Sopenharmony_ci return true; 1626bf215546Sopenharmony_ci } 1627bf215546Sopenharmony_ci 1628bf215546Sopenharmony_ci if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals)) 1629bf215546Sopenharmony_ci return true; 1630bf215546Sopenharmony_ci 1631bf215546Sopenharmony_ci return false; 1632bf215546Sopenharmony_ci} 1633bf215546Sopenharmony_ci 1634bf215546Sopenharmony_ci/** Attempt to shrink arrays of vectors 1635bf215546Sopenharmony_ci * 1636bf215546Sopenharmony_ci * This pass looks at variables which contain a vector or an array (possibly 1637bf215546Sopenharmony_ci * multiple dimensions) of vectors and attempts to lower to a smaller vector 1638bf215546Sopenharmony_ci * or array. If the pass can prove that a component of a vector (or array of 1639bf215546Sopenharmony_ci * vectors) is never really used, then that component will be removed. 1640bf215546Sopenharmony_ci * Similarly, the pass attempts to shorten arrays based on what elements it 1641bf215546Sopenharmony_ci * can prove are never read or never contain valid data. 1642bf215546Sopenharmony_ci */ 1643bf215546Sopenharmony_cibool 1644bf215546Sopenharmony_cinir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes) 1645bf215546Sopenharmony_ci{ 1646bf215546Sopenharmony_ci assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 1647bf215546Sopenharmony_ci 1648bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 1649bf215546Sopenharmony_ci 1650bf215546Sopenharmony_ci struct hash_table *var_usage_map = 1651bf215546Sopenharmony_ci _mesa_pointer_hash_table_create(mem_ctx); 1652bf215546Sopenharmony_ci 1653bf215546Sopenharmony_ci bool has_vars_to_shrink = false; 1654bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 1655bf215546Sopenharmony_ci if (!function->impl) 1656bf215546Sopenharmony_ci continue; 1657bf215546Sopenharmony_ci 1658bf215546Sopenharmony_ci /* Don't even bother crawling the IR if we don't have any variables. 1659bf215546Sopenharmony_ci * Given that this pass deletes any unused variables, it's likely that 1660bf215546Sopenharmony_ci * we will be in this scenario eventually. 1661bf215546Sopenharmony_ci */ 1662bf215546Sopenharmony_ci if (function_impl_has_vars_with_modes(function->impl, modes)) { 1663bf215546Sopenharmony_ci has_vars_to_shrink = true; 1664bf215546Sopenharmony_ci find_used_components_impl(function->impl, var_usage_map, 1665bf215546Sopenharmony_ci modes, mem_ctx); 1666bf215546Sopenharmony_ci } 1667bf215546Sopenharmony_ci } 1668bf215546Sopenharmony_ci if (!has_vars_to_shrink) { 1669bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1670bf215546Sopenharmony_ci nir_shader_preserve_all_metadata(shader); 1671bf215546Sopenharmony_ci return false; 1672bf215546Sopenharmony_ci } 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_ci bool globals_shrunk = false; 1675bf215546Sopenharmony_ci if (modes & nir_var_shader_temp) { 1676bf215546Sopenharmony_ci globals_shrunk = shrink_vec_var_list(&shader->variables, 1677bf215546Sopenharmony_ci nir_var_shader_temp, 1678bf215546Sopenharmony_ci var_usage_map); 1679bf215546Sopenharmony_ci } 1680bf215546Sopenharmony_ci 1681bf215546Sopenharmony_ci bool progress = false; 1682bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 1683bf215546Sopenharmony_ci if (!function->impl) 1684bf215546Sopenharmony_ci continue; 1685bf215546Sopenharmony_ci 1686bf215546Sopenharmony_ci bool locals_shrunk = false; 1687bf215546Sopenharmony_ci if (modes & nir_var_function_temp) { 1688bf215546Sopenharmony_ci locals_shrunk = shrink_vec_var_list(&function->impl->locals, 1689bf215546Sopenharmony_ci nir_var_function_temp, 1690bf215546Sopenharmony_ci var_usage_map); 1691bf215546Sopenharmony_ci } 1692bf215546Sopenharmony_ci 1693bf215546Sopenharmony_ci if (globals_shrunk || locals_shrunk) { 1694bf215546Sopenharmony_ci shrink_vec_var_access_impl(function->impl, var_usage_map, modes); 1695bf215546Sopenharmony_ci 1696bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_block_index | 1697bf215546Sopenharmony_ci nir_metadata_dominance); 1698bf215546Sopenharmony_ci progress = true; 1699bf215546Sopenharmony_ci } else { 1700bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_all); 1701bf215546Sopenharmony_ci } 1702bf215546Sopenharmony_ci } 1703bf215546Sopenharmony_ci 1704bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1705bf215546Sopenharmony_ci 1706bf215546Sopenharmony_ci return progress; 1707bf215546Sopenharmony_ci} 1708