1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_deref.h" 27#include "util/hash_table.h" 28 29static bool 30is_trivial_deref_cast(nir_deref_instr *cast) 31{ 32 nir_deref_instr *parent = nir_src_as_deref(cast->parent); 33 if (!parent) 34 return false; 35 36 return cast->modes == parent->modes && 37 cast->type == parent->type && 38 cast->dest.ssa.num_components == parent->dest.ssa.num_components && 39 cast->dest.ssa.bit_size == parent->dest.ssa.bit_size; 40} 41 42void 43nir_deref_path_init(nir_deref_path *path, 44 nir_deref_instr *deref, void *mem_ctx) 45{ 46 assert(deref != NULL); 47 48 /* The length of the short path is at most ARRAY_SIZE - 1 because we need 49 * room for the NULL terminator. 50 */ 51 static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1; 52 53 int count = 0; 54 55 nir_deref_instr **tail = &path->_short_path[max_short_path_len]; 56 nir_deref_instr **head = tail; 57 58 *tail = NULL; 59 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 60 if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d)) 61 continue; 62 count++; 63 if (count <= max_short_path_len) 64 *(--head) = d; 65 } 66 67 if (count <= max_short_path_len) { 68 /* If we're under max_short_path_len, just use the short path. */ 69 path->path = head; 70 goto done; 71 } 72 73#ifndef NDEBUG 74 /* Just in case someone uses short_path by accident */ 75 for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++) 76 path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef; 77#endif 78 79 path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1); 80 head = tail = path->path + count; 81 *tail = NULL; 82 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 83 if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d)) 84 continue; 85 *(--head) = d; 86 } 87 88done: 89 assert(head == path->path); 90 assert(tail == head + count); 91 assert(*tail == NULL); 92} 93 94void 95nir_deref_path_finish(nir_deref_path *path) 96{ 97 if (path->path < &path->_short_path[0] || 98 path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1]) 99 ralloc_free(path->path); 100} 101 102/** 103 * Recursively removes unused deref instructions 104 */ 105bool 106nir_deref_instr_remove_if_unused(nir_deref_instr *instr) 107{ 108 bool progress = false; 109 110 for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) { 111 /* If anyone is using this deref, leave it alone */ 112 assert(d->dest.is_ssa); 113 if (!nir_ssa_def_is_unused(&d->dest.ssa)) 114 break; 115 116 nir_instr_remove(&d->instr); 117 progress = true; 118 } 119 120 return progress; 121} 122 123bool 124nir_deref_instr_has_indirect(nir_deref_instr *instr) 125{ 126 while (instr->deref_type != nir_deref_type_var) { 127 /* Consider casts to be indirects */ 128 if (instr->deref_type == nir_deref_type_cast) 129 return true; 130 131 if ((instr->deref_type == nir_deref_type_array || 132 instr->deref_type == nir_deref_type_ptr_as_array) && 133 !nir_src_is_const(instr->arr.index)) 134 return true; 135 136 instr = nir_deref_instr_parent(instr); 137 } 138 139 return false; 140} 141 142bool 143nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr) 144{ 145 for (; instr; instr = nir_deref_instr_parent(instr)) { 146 if (instr->deref_type == nir_deref_type_array && 147 nir_src_is_const(instr->arr.index) && 148 nir_src_as_uint(instr->arr.index) >= 149 glsl_get_length(nir_deref_instr_parent(instr)->type)) 150 return true; 151 } 152 153 return false; 154} 155 156bool 157nir_deref_instr_has_complex_use(nir_deref_instr *deref, 158 nir_deref_instr_has_complex_use_options opts) 159{ 160 nir_foreach_use(use_src, &deref->dest.ssa) { 161 nir_instr *use_instr = use_src->parent_instr; 162 163 switch (use_instr->type) { 164 case nir_instr_type_deref: { 165 nir_deref_instr *use_deref = nir_instr_as_deref(use_instr); 166 167 /* A var deref has no sources */ 168 assert(use_deref->deref_type != nir_deref_type_var); 169 170 /* If a deref shows up in an array index or something like that, it's 171 * a complex use. 172 */ 173 if (use_src != &use_deref->parent) 174 return true; 175 176 /* Anything that isn't a basic struct or array deref is considered to 177 * be a "complex" use. In particular, we don't allow ptr_as_array 178 * because we assume that opt_deref will turn any non-complex 179 * ptr_as_array derefs into regular array derefs eventually so passes 180 * which only want to handle simple derefs will pick them up in a 181 * later pass. 182 */ 183 if (use_deref->deref_type != nir_deref_type_struct && 184 use_deref->deref_type != nir_deref_type_array_wildcard && 185 use_deref->deref_type != nir_deref_type_array) 186 return true; 187 188 if (nir_deref_instr_has_complex_use(use_deref, opts)) 189 return true; 190 191 continue; 192 } 193 194 case nir_instr_type_intrinsic: { 195 nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr); 196 switch (use_intrin->intrinsic) { 197 case nir_intrinsic_load_deref: 198 assert(use_src == &use_intrin->src[0]); 199 continue; 200 201 case nir_intrinsic_copy_deref: 202 assert(use_src == &use_intrin->src[0] || 203 use_src == &use_intrin->src[1]); 204 continue; 205 206 case nir_intrinsic_store_deref: 207 /* A use in src[1] of a store means we're taking that pointer and 208 * writing it to a variable. Because we have no idea who will 209 * read that variable and what they will do with the pointer, it's 210 * considered a "complex" use. A use in src[0], on the other 211 * hand, is a simple use because we're just going to dereference 212 * it and write a value there. 213 */ 214 if (use_src == &use_intrin->src[0]) 215 continue; 216 return true; 217 218 case nir_intrinsic_memcpy_deref: 219 if (use_src == &use_intrin->src[0] && 220 (opts & nir_deref_instr_has_complex_use_allow_memcpy_dst)) 221 continue; 222 if (use_src == &use_intrin->src[1] && 223 (opts & nir_deref_instr_has_complex_use_allow_memcpy_src)) 224 continue; 225 return true; 226 227 default: 228 return true; 229 } 230 unreachable("Switch default failed"); 231 } 232 233 default: 234 return true; 235 } 236 } 237 238 nir_foreach_if_use(use, &deref->dest.ssa) 239 return true; 240 241 return false; 242} 243 244static unsigned 245type_scalar_size_bytes(const struct glsl_type *type) 246{ 247 assert(glsl_type_is_vector_or_scalar(type) || 248 glsl_type_is_matrix(type)); 249 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 250} 251 252unsigned 253nir_deref_instr_array_stride(nir_deref_instr *deref) 254{ 255 switch (deref->deref_type) { 256 case nir_deref_type_array: 257 case nir_deref_type_array_wildcard: { 258 const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type; 259 unsigned stride = glsl_get_explicit_stride(arr_type); 260 261 if ((glsl_type_is_matrix(arr_type) && 262 glsl_matrix_type_is_row_major(arr_type)) || 263 (glsl_type_is_vector(arr_type) && stride == 0)) 264 stride = type_scalar_size_bytes(arr_type); 265 266 return stride; 267 } 268 case nir_deref_type_ptr_as_array: 269 return nir_deref_instr_array_stride(nir_deref_instr_parent(deref)); 270 case nir_deref_type_cast: 271 return deref->cast.ptr_stride; 272 default: 273 return 0; 274 } 275} 276 277static unsigned 278type_get_array_stride(const struct glsl_type *elem_type, 279 glsl_type_size_align_func size_align) 280{ 281 unsigned elem_size, elem_align; 282 size_align(elem_type, &elem_size, &elem_align); 283 return ALIGN_POT(elem_size, elem_align); 284} 285 286static unsigned 287struct_type_get_field_offset(const struct glsl_type *struct_type, 288 glsl_type_size_align_func size_align, 289 unsigned field_idx) 290{ 291 assert(glsl_type_is_struct_or_ifc(struct_type)); 292 unsigned offset = 0; 293 for (unsigned i = 0; i <= field_idx; i++) { 294 unsigned elem_size, elem_align; 295 size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align); 296 offset = ALIGN_POT(offset, elem_align); 297 if (i < field_idx) 298 offset += elem_size; 299 } 300 return offset; 301} 302 303unsigned 304nir_deref_instr_get_const_offset(nir_deref_instr *deref, 305 glsl_type_size_align_func size_align) 306{ 307 nir_deref_path path; 308 nir_deref_path_init(&path, deref, NULL); 309 310 unsigned offset = 0; 311 for (nir_deref_instr **p = &path.path[1]; *p; p++) { 312 switch ((*p)->deref_type) { 313 case nir_deref_type_array: 314 offset += nir_src_as_uint((*p)->arr.index) * 315 type_get_array_stride((*p)->type, size_align); 316 break; 317 case nir_deref_type_struct: { 318 /* p starts at path[1], so this is safe */ 319 nir_deref_instr *parent = *(p - 1); 320 offset += struct_type_get_field_offset(parent->type, size_align, 321 (*p)->strct.index); 322 break; 323 } 324 case nir_deref_type_cast: 325 /* A cast doesn't contribute to the offset */ 326 break; 327 default: 328 unreachable("Unsupported deref type"); 329 } 330 } 331 332 nir_deref_path_finish(&path); 333 334 return offset; 335} 336 337nir_ssa_def * 338nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref, 339 glsl_type_size_align_func size_align) 340{ 341 nir_deref_path path; 342 nir_deref_path_init(&path, deref, NULL); 343 344 nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size); 345 for (nir_deref_instr **p = &path.path[1]; *p; p++) { 346 switch ((*p)->deref_type) { 347 case nir_deref_type_array: 348 case nir_deref_type_ptr_as_array: { 349 nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1); 350 int stride = type_get_array_stride((*p)->type, size_align); 351 offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride)); 352 break; 353 } 354 case nir_deref_type_struct: { 355 /* p starts at path[1], so this is safe */ 356 nir_deref_instr *parent = *(p - 1); 357 unsigned field_offset = 358 struct_type_get_field_offset(parent->type, size_align, 359 (*p)->strct.index); 360 offset = nir_iadd_imm(b, offset, field_offset); 361 break; 362 } 363 case nir_deref_type_cast: 364 /* A cast doesn't contribute to the offset */ 365 break; 366 default: 367 unreachable("Unsupported deref type"); 368 } 369 } 370 371 nir_deref_path_finish(&path); 372 373 return offset; 374} 375 376bool 377nir_remove_dead_derefs_impl(nir_function_impl *impl) 378{ 379 bool progress = false; 380 381 nir_foreach_block(block, impl) { 382 nir_foreach_instr_safe(instr, block) { 383 if (instr->type == nir_instr_type_deref && 384 nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr))) 385 progress = true; 386 } 387 } 388 389 if (progress) { 390 nir_metadata_preserve(impl, nir_metadata_block_index | 391 nir_metadata_dominance); 392 } else { 393 nir_metadata_preserve(impl, nir_metadata_all); 394 } 395 396 return progress; 397} 398 399bool 400nir_remove_dead_derefs(nir_shader *shader) 401{ 402 bool progress = false; 403 nir_foreach_function(function, shader) { 404 if (function->impl && nir_remove_dead_derefs_impl(function->impl)) 405 progress = true; 406 } 407 408 return progress; 409} 410 411void 412nir_fixup_deref_modes(nir_shader *shader) 413{ 414 nir_foreach_function(function, shader) { 415 if (!function->impl) 416 continue; 417 418 nir_foreach_block(block, function->impl) { 419 nir_foreach_instr(instr, block) { 420 if (instr->type != nir_instr_type_deref) 421 continue; 422 423 nir_deref_instr *deref = nir_instr_as_deref(instr); 424 if (deref->deref_type == nir_deref_type_cast) 425 continue; 426 427 nir_variable_mode parent_modes; 428 if (deref->deref_type == nir_deref_type_var) { 429 parent_modes = deref->var->data.mode; 430 } else { 431 assert(deref->parent.is_ssa); 432 nir_deref_instr *parent = 433 nir_instr_as_deref(deref->parent.ssa->parent_instr); 434 parent_modes = parent->modes; 435 } 436 437 deref->modes = parent_modes; 438 } 439 } 440 } 441} 442 443static bool 444modes_may_alias(nir_variable_mode a, nir_variable_mode b) 445{ 446 /* Generic pointers can alias with SSBOs */ 447 if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) && 448 (b & (nir_var_mem_ssbo | nir_var_mem_global))) 449 return true; 450 451 /* Pointers can only alias if they share a mode. */ 452 return a & b; 453} 454 455ALWAYS_INLINE static nir_deref_compare_result 456compare_deref_paths(nir_deref_path *a_path, nir_deref_path *b_path, 457 unsigned *i, bool (*stop_fn)(const nir_deref_instr *)) 458{ 459 /* Start off assuming they fully compare. We ignore equality for now. In 460 * the end, we'll determine that by containment. 461 */ 462 nir_deref_compare_result result = nir_derefs_may_alias_bit | 463 nir_derefs_a_contains_b_bit | 464 nir_derefs_b_contains_a_bit; 465 466 nir_deref_instr **a = a_path->path; 467 nir_deref_instr **b = b_path->path; 468 469 for (; a[*i] != NULL; (*i)++) { 470 if (a[*i] != b[*i]) 471 break; 472 473 if (stop_fn && stop_fn(a[*i])) 474 break; 475 } 476 477 /* We're at either the tail or the divergence point between the two deref 478 * paths. Look to see if either contains cast or a ptr_as_array deref. If 479 * it does we don't know how to safely make any inferences. Hopefully, 480 * nir_opt_deref will clean most of these up and we can start inferring 481 * things again. 482 * 483 * In theory, we could do a bit better. For instance, we could detect the 484 * case where we have exactly one ptr_as_array deref in the chain after the 485 * divergence point and it's matched in both chains and the two chains have 486 * different constant indices. 487 */ 488 for (unsigned j = *i; a[j] != NULL; j++) { 489 if (stop_fn && stop_fn(a[j])) 490 break; 491 492 if (a[j]->deref_type == nir_deref_type_cast || 493 a[j]->deref_type == nir_deref_type_ptr_as_array) 494 return nir_derefs_may_alias_bit; 495 } 496 for (unsigned j = *i; b[j] != NULL; j++) { 497 if (stop_fn && stop_fn(b[j])) 498 break; 499 500 if (b[j]->deref_type == nir_deref_type_cast || 501 b[j]->deref_type == nir_deref_type_ptr_as_array) 502 return nir_derefs_may_alias_bit; 503 } 504 505 for (; a[*i] != NULL && b[*i] != NULL; (*i)++) { 506 if (stop_fn && (stop_fn(a[*i]) || stop_fn(b[*i]))) 507 break; 508 509 switch (a[*i]->deref_type) { 510 case nir_deref_type_array: 511 case nir_deref_type_array_wildcard: { 512 assert(b[*i]->deref_type == nir_deref_type_array || 513 b[*i]->deref_type == nir_deref_type_array_wildcard); 514 515 if (a[*i]->deref_type == nir_deref_type_array_wildcard) { 516 if (b[*i]->deref_type != nir_deref_type_array_wildcard) 517 result &= ~nir_derefs_b_contains_a_bit; 518 } else if (b[*i]->deref_type == nir_deref_type_array_wildcard) { 519 if (a[*i]->deref_type != nir_deref_type_array_wildcard) 520 result &= ~nir_derefs_a_contains_b_bit; 521 } else { 522 assert(a[*i]->deref_type == nir_deref_type_array && 523 b[*i]->deref_type == nir_deref_type_array); 524 assert(a[*i]->arr.index.is_ssa && b[*i]->arr.index.is_ssa); 525 526 if (nir_src_is_const(a[*i]->arr.index) && 527 nir_src_is_const(b[*i]->arr.index)) { 528 /* If they're both direct and have different offsets, they 529 * don't even alias much less anything else. 530 */ 531 if (nir_src_as_uint(a[*i]->arr.index) != 532 nir_src_as_uint(b[*i]->arr.index)) 533 return nir_derefs_do_not_alias; 534 } else if (a[*i]->arr.index.ssa == b[*i]->arr.index.ssa) { 535 /* They're the same indirect, continue on */ 536 } else { 537 /* They're not the same index so we can't prove anything about 538 * containment. 539 */ 540 result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit); 541 } 542 } 543 break; 544 } 545 546 case nir_deref_type_struct: { 547 /* If they're different struct members, they don't even alias */ 548 if (a[*i]->strct.index != b[*i]->strct.index) 549 return nir_derefs_do_not_alias; 550 break; 551 } 552 553 default: 554 unreachable("Invalid deref type"); 555 } 556 } 557 558 /* If a is longer than b, then it can't contain b. If neither a[i] nor 559 * b[i] are NULL then we aren't at the end of the chain and we know nothing 560 * about containment. 561 */ 562 if (a[*i] != NULL) 563 result &= ~nir_derefs_a_contains_b_bit; 564 if (b[*i] != NULL) 565 result &= ~nir_derefs_b_contains_a_bit; 566 567 /* If a contains b and b contains a they must be equal. */ 568 if ((result & nir_derefs_a_contains_b_bit) && 569 (result & nir_derefs_b_contains_a_bit)) 570 result |= nir_derefs_equal_bit; 571 572 return result; 573} 574 575static bool 576is_interface_struct_deref(const nir_deref_instr *deref) 577{ 578 if (deref->deref_type == nir_deref_type_struct) { 579 assert(glsl_type_is_struct_or_ifc(nir_deref_instr_parent(deref)->type)); 580 return true; 581 } else { 582 return false; 583 } 584} 585 586nir_deref_compare_result 587nir_compare_deref_paths(nir_deref_path *a_path, 588 nir_deref_path *b_path) 589{ 590 if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes)) 591 return nir_derefs_do_not_alias; 592 593 if (a_path->path[0]->deref_type != b_path->path[0]->deref_type) 594 return nir_derefs_may_alias_bit; 595 596 unsigned path_idx = 1; 597 if (a_path->path[0]->deref_type == nir_deref_type_var) { 598 const nir_variable *a_var = a_path->path[0]->var; 599 const nir_variable *b_var = b_path->path[0]->var; 600 601 /* If we got here, the two variables must have the same mode. The 602 * only way modes_may_alias() can return true for two different modes 603 * is if one is global and the other ssbo. However, Global variables 604 * only exist in OpenCL and SSBOs don't exist there. No API allows 605 * both for variables. 606 */ 607 assert(a_var->data.mode == b_var->data.mode); 608 609 switch (a_var->data.mode) { 610 case nir_var_mem_ssbo: { 611 nir_deref_compare_result binding_compare; 612 if (a_var == b_var) { 613 binding_compare = compare_deref_paths(a_path, b_path, &path_idx, 614 is_interface_struct_deref); 615 } else { 616 binding_compare = nir_derefs_do_not_alias; 617 } 618 619 if (binding_compare & nir_derefs_equal_bit) 620 break; 621 622 /* If the binding derefs can't alias and at least one is RESTRICT, 623 * then we know they can't alias. 624 */ 625 if (!(binding_compare & nir_derefs_may_alias_bit) && 626 ((a_var->data.access & ACCESS_RESTRICT) || 627 (b_var->data.access & ACCESS_RESTRICT))) 628 return nir_derefs_do_not_alias; 629 630 return nir_derefs_may_alias_bit; 631 } 632 633 case nir_var_mem_shared: 634 if (a_var == b_var) 635 break; 636 637 /* Per SPV_KHR_workgroup_memory_explicit_layout and 638 * GL_EXT_shared_memory_block, shared blocks alias each other. 639 * We will have either all blocks or all non-blocks. 640 */ 641 if (glsl_type_is_interface(a_var->type) || 642 glsl_type_is_interface(b_var->type)) { 643 assert(glsl_type_is_interface(a_var->type) && 644 glsl_type_is_interface(b_var->type)); 645 return nir_derefs_may_alias_bit; 646 } 647 648 /* Otherwise, distinct shared vars don't alias */ 649 return nir_derefs_do_not_alias; 650 651 default: 652 /* For any other variable types, if we can chase them back to the 653 * variable, and the variables are different, they don't alias. 654 */ 655 if (a_var == b_var) 656 break; 657 658 return nir_derefs_do_not_alias; 659 } 660 } else { 661 assert(a_path->path[0]->deref_type == nir_deref_type_cast); 662 /* If they're not exactly the same cast, it's hard to compare them so we 663 * just assume they alias. Comparing casts is tricky as there are lots 664 * of things such as mode, type, etc. to make sure work out; for now, we 665 * just assume nit_opt_deref will combine them and compare the deref 666 * instructions. 667 * 668 * TODO: At some point in the future, we could be clever and understand 669 * that a float[] and int[] have the same layout and aliasing structure 670 * but double[] and vec3[] do not and we could potentially be a bit 671 * smarter here. 672 */ 673 if (a_path->path[0] != b_path->path[0]) 674 return nir_derefs_may_alias_bit; 675 } 676 677 return compare_deref_paths(a_path, b_path, &path_idx, NULL); 678} 679 680nir_deref_compare_result 681nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b) 682{ 683 if (a == b) { 684 return nir_derefs_equal_bit | nir_derefs_may_alias_bit | 685 nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit; 686 } 687 688 nir_deref_path a_path, b_path; 689 nir_deref_path_init(&a_path, a, NULL); 690 nir_deref_path_init(&b_path, b, NULL); 691 assert(a_path.path[0]->deref_type == nir_deref_type_var || 692 a_path.path[0]->deref_type == nir_deref_type_cast); 693 assert(b_path.path[0]->deref_type == nir_deref_type_var || 694 b_path.path[0]->deref_type == nir_deref_type_cast); 695 696 nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path); 697 698 nir_deref_path_finish(&a_path); 699 nir_deref_path_finish(&b_path); 700 701 return result; 702} 703 704nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref) 705{ 706 if (!deref->_path) { 707 deref->_path = ralloc(mem_ctx, nir_deref_path); 708 nir_deref_path_init(deref->_path, deref->instr, mem_ctx); 709 } 710 return deref->_path; 711} 712 713nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx, 714 nir_deref_and_path *a, 715 nir_deref_and_path *b) 716{ 717 if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */ 718 return nir_compare_derefs(a->instr, b->instr); 719 720 return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a), 721 nir_get_deref_path(mem_ctx, b)); 722} 723 724struct rematerialize_deref_state { 725 bool progress; 726 nir_builder builder; 727 nir_block *block; 728 struct hash_table *cache; 729}; 730 731static nir_deref_instr * 732rematerialize_deref_in_block(nir_deref_instr *deref, 733 struct rematerialize_deref_state *state) 734{ 735 if (deref->instr.block == state->block) 736 return deref; 737 738 if (!state->cache) { 739 state->cache = _mesa_pointer_hash_table_create(NULL); 740 } 741 742 struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref); 743 if (cached) 744 return cached->data; 745 746 nir_builder *b = &state->builder; 747 nir_deref_instr *new_deref = 748 nir_deref_instr_create(b->shader, deref->deref_type); 749 new_deref->modes = deref->modes; 750 new_deref->type = deref->type; 751 752 if (deref->deref_type == nir_deref_type_var) { 753 new_deref->var = deref->var; 754 } else { 755 nir_deref_instr *parent = nir_src_as_deref(deref->parent); 756 if (parent) { 757 parent = rematerialize_deref_in_block(parent, state); 758 new_deref->parent = nir_src_for_ssa(&parent->dest.ssa); 759 } else { 760 nir_src_copy(&new_deref->parent, &deref->parent); 761 } 762 } 763 764 switch (deref->deref_type) { 765 case nir_deref_type_var: 766 case nir_deref_type_array_wildcard: 767 /* Nothing more to do */ 768 break; 769 770 case nir_deref_type_cast: 771 new_deref->cast.ptr_stride = deref->cast.ptr_stride; 772 break; 773 774 case nir_deref_type_array: 775 case nir_deref_type_ptr_as_array: 776 assert(!nir_src_as_deref(deref->arr.index)); 777 nir_src_copy(&new_deref->arr.index, &deref->arr.index); 778 break; 779 780 case nir_deref_type_struct: 781 new_deref->strct.index = deref->strct.index; 782 break; 783 784 default: 785 unreachable("Invalid deref instruction type"); 786 } 787 788 nir_ssa_dest_init(&new_deref->instr, &new_deref->dest, 789 deref->dest.ssa.num_components, 790 deref->dest.ssa.bit_size, 791 NULL); 792 nir_builder_instr_insert(b, &new_deref->instr); 793 794 return new_deref; 795} 796 797static bool 798rematerialize_deref_src(nir_src *src, void *_state) 799{ 800 struct rematerialize_deref_state *state = _state; 801 802 nir_deref_instr *deref = nir_src_as_deref(*src); 803 if (!deref) 804 return true; 805 806 nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state); 807 if (block_deref != deref) { 808 nir_instr_rewrite_src(src->parent_instr, src, 809 nir_src_for_ssa(&block_deref->dest.ssa)); 810 nir_deref_instr_remove_if_unused(deref); 811 state->progress = true; 812 } 813 814 return true; 815} 816 817/** Re-materialize derefs in every block 818 * 819 * This pass re-materializes deref instructions in every block in which it is 820 * used. After this pass has been run, every use of a deref will be of a 821 * deref in the same block as the use. Also, all unused derefs will be 822 * deleted as a side-effect. 823 * 824 * Derefs used as sources of phi instructions are not rematerialized. 825 */ 826bool 827nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl) 828{ 829 struct rematerialize_deref_state state = { 0 }; 830 nir_builder_init(&state.builder, impl); 831 832 nir_foreach_block_unstructured(block, impl) { 833 state.block = block; 834 835 /* Start each block with a fresh cache */ 836 if (state.cache) 837 _mesa_hash_table_clear(state.cache, NULL); 838 839 nir_foreach_instr_safe(instr, block) { 840 if (instr->type == nir_instr_type_deref && 841 nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr))) 842 continue; 843 844 /* If a deref is used in a phi, we can't rematerialize it, as the new 845 * derefs would appear before the phi, which is not valid. 846 */ 847 if (instr->type == nir_instr_type_phi) 848 continue; 849 850 state.builder.cursor = nir_before_instr(instr); 851 nir_foreach_src(instr, rematerialize_deref_src, &state); 852 } 853 854#ifndef NDEBUG 855 nir_if *following_if = nir_block_get_following_if(block); 856 if (following_if) 857 assert(!nir_src_as_deref(following_if->condition)); 858#endif 859 } 860 861 _mesa_hash_table_destroy(state.cache, NULL); 862 863 return state.progress; 864} 865 866static void 867nir_deref_instr_fixup_child_types(nir_deref_instr *parent) 868{ 869 nir_foreach_use(use, &parent->dest.ssa) { 870 if (use->parent_instr->type != nir_instr_type_deref) 871 continue; 872 873 nir_deref_instr *child = nir_instr_as_deref(use->parent_instr); 874 switch (child->deref_type) { 875 case nir_deref_type_var: 876 unreachable("nir_deref_type_var cannot be a child"); 877 878 case nir_deref_type_array: 879 case nir_deref_type_array_wildcard: 880 child->type = glsl_get_array_element(parent->type); 881 break; 882 883 case nir_deref_type_ptr_as_array: 884 child->type = parent->type; 885 break; 886 887 case nir_deref_type_struct: 888 child->type = glsl_get_struct_field(parent->type, 889 child->strct.index); 890 break; 891 892 case nir_deref_type_cast: 893 /* We stop the recursion here */ 894 continue; 895 } 896 897 /* Recurse into children */ 898 nir_deref_instr_fixup_child_types(child); 899 } 900} 901 902static bool 903opt_alu_of_cast(nir_alu_instr *alu) 904{ 905 bool progress = false; 906 907 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 908 assert(alu->src[i].src.is_ssa); 909 nir_instr *src_instr = alu->src[i].src.ssa->parent_instr; 910 if (src_instr->type != nir_instr_type_deref) 911 continue; 912 913 nir_deref_instr *src_deref = nir_instr_as_deref(src_instr); 914 if (src_deref->deref_type != nir_deref_type_cast) 915 continue; 916 917 assert(src_deref->parent.is_ssa); 918 nir_instr_rewrite_src_ssa(&alu->instr, &alu->src[i].src, 919 src_deref->parent.ssa); 920 progress = true; 921 } 922 923 return progress; 924} 925 926static bool 927is_trivial_array_deref_cast(nir_deref_instr *cast) 928{ 929 assert(is_trivial_deref_cast(cast)); 930 931 nir_deref_instr *parent = nir_src_as_deref(cast->parent); 932 933 if (parent->deref_type == nir_deref_type_array) { 934 return cast->cast.ptr_stride == 935 glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type); 936 } else if (parent->deref_type == nir_deref_type_ptr_as_array) { 937 return cast->cast.ptr_stride == 938 nir_deref_instr_array_stride(parent); 939 } else { 940 return false; 941 } 942} 943 944static bool 945is_deref_ptr_as_array(nir_instr *instr) 946{ 947 return instr->type == nir_instr_type_deref && 948 nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array; 949} 950 951static bool 952opt_remove_restricting_cast_alignments(nir_deref_instr *cast) 953{ 954 assert(cast->deref_type == nir_deref_type_cast); 955 if (cast->cast.align_mul == 0) 956 return false; 957 958 nir_deref_instr *parent = nir_src_as_deref(cast->parent); 959 if (parent == NULL) 960 return false; 961 962 /* Don't use any default alignment for this check. We don't want to fall 963 * back to type alignment too early in case we find out later that we're 964 * somehow a child of a packed struct. 965 */ 966 uint32_t parent_mul, parent_offset; 967 if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */, 968 &parent_mul, &parent_offset)) 969 return false; 970 971 /* If this cast increases the alignment, we want to keep it. 972 * 973 * There is a possibility that the larger alignment provided by this cast 974 * somehow disagrees with the smaller alignment further up the deref chain. 975 * In that case, we choose to favor the alignment closer to the actual 976 * memory operation which, in this case, is the cast and not its parent so 977 * keeping the cast alignment is the right thing to do. 978 */ 979 if (parent_mul < cast->cast.align_mul) 980 return false; 981 982 /* If we've gotten here, we have a parent deref with an align_mul at least 983 * as large as ours so we can potentially throw away the alignment 984 * information on this deref. There are two cases to consider here: 985 * 986 * 1. We can chase the deref all the way back to the variable. In this 987 * case, we have "perfect" knowledge, modulo indirect array derefs. 988 * Unless we've done something wrong in our indirect/wildcard stride 989 * calculations, our knowledge from the deref walk is better than the 990 * client's. 991 * 992 * 2. We can't chase it all the way back to the variable. In this case, 993 * because our call to nir_get_explicit_deref_align(parent, ...) above 994 * above passes default_to_type_align=false, the only way we can even 995 * get here is if something further up the deref chain has a cast with 996 * an alignment which can only happen if we get an alignment from the 997 * client (most likely a decoration in the SPIR-V). If the client has 998 * provided us with two conflicting alignments in the deref chain, 999 * that's their fault and we can do whatever we want. 1000 * 1001 * In either case, we should be without our rights, at this point, to throw 1002 * away the alignment information on this deref. However, to be "nice" to 1003 * weird clients, we do one more check. It really shouldn't happen but 1004 * it's possible that the parent's alignment offset disagrees with the 1005 * cast's alignment offset. In this case, we consider the cast as 1006 * providing more information (or at least more valid information) and keep 1007 * it even if the align_mul from the parent is larger. 1008 */ 1009 assert(cast->cast.align_mul <= parent_mul); 1010 if (parent_offset % cast->cast.align_mul != cast->cast.align_offset) 1011 return false; 1012 1013 /* If we got here, the parent has better alignment information than the 1014 * child and we can get rid of the child alignment information. 1015 */ 1016 cast->cast.align_mul = 0; 1017 cast->cast.align_offset = 0; 1018 return true; 1019} 1020 1021/** 1022 * Remove casts that just wrap other casts. 1023 */ 1024static bool 1025opt_remove_cast_cast(nir_deref_instr *cast) 1026{ 1027 nir_deref_instr *first_cast = cast; 1028 1029 while (true) { 1030 nir_deref_instr *parent = nir_deref_instr_parent(first_cast); 1031 if (parent == NULL || parent->deref_type != nir_deref_type_cast) 1032 break; 1033 first_cast = parent; 1034 } 1035 if (cast == first_cast) 1036 return false; 1037 1038 nir_instr_rewrite_src(&cast->instr, &cast->parent, 1039 nir_src_for_ssa(first_cast->parent.ssa)); 1040 return true; 1041} 1042 1043/* Restrict variable modes in casts. 1044 * 1045 * If we know from something higher up the deref chain that the deref has a 1046 * specific mode, we can cast to more general and back but we can never cast 1047 * across modes. For non-cast derefs, we should only ever do anything here if 1048 * the parent eventually comes from a cast that we restricted earlier. 1049 */ 1050static bool 1051opt_restrict_deref_modes(nir_deref_instr *deref) 1052{ 1053 if (deref->deref_type == nir_deref_type_var) { 1054 assert(deref->modes == deref->var->data.mode); 1055 return false; 1056 } 1057 1058 nir_deref_instr *parent = nir_src_as_deref(deref->parent); 1059 if (parent == NULL || parent->modes == deref->modes) 1060 return false; 1061 1062 assert(parent->modes & deref->modes); 1063 deref->modes &= parent->modes; 1064 return true; 1065} 1066 1067static bool 1068opt_remove_sampler_cast(nir_deref_instr *cast) 1069{ 1070 assert(cast->deref_type == nir_deref_type_cast); 1071 nir_deref_instr *parent = nir_src_as_deref(cast->parent); 1072 if (parent == NULL) 1073 return false; 1074 1075 /* Strip both types down to their non-array type and bail if there are any 1076 * discrepancies in array lengths. 1077 */ 1078 const struct glsl_type *parent_type = parent->type; 1079 const struct glsl_type *cast_type = cast->type; 1080 while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) { 1081 if (glsl_get_length(parent_type) != glsl_get_length(cast_type)) 1082 return false; 1083 parent_type = glsl_get_array_element(parent_type); 1084 cast_type = glsl_get_array_element(cast_type); 1085 } 1086 1087 if (!glsl_type_is_sampler(parent_type)) 1088 return false; 1089 1090 if (cast_type != glsl_bare_sampler_type() && 1091 (glsl_type_is_bare_sampler(parent_type) || 1092 cast_type != glsl_sampler_type_to_texture(parent_type))) 1093 return false; 1094 1095 /* We're a cast from a more detailed sampler type to a bare sampler or a 1096 * texture type with the same dimensionality. 1097 */ 1098 nir_ssa_def_rewrite_uses(&cast->dest.ssa, 1099 &parent->dest.ssa); 1100 nir_instr_remove(&cast->instr); 1101 1102 /* Recursively crawl the deref tree and clean up types */ 1103 nir_deref_instr_fixup_child_types(parent); 1104 1105 return true; 1106} 1107 1108/** 1109 * Is this casting a struct to a contained struct. 1110 * struct a { struct b field0 }; 1111 * ssa_5 is structa; 1112 * deref_cast (structb *)ssa_5 (function_temp structb); 1113 * converts to 1114 * deref_struct &ssa_5->field0 (function_temp structb); 1115 * This allows subsequent copy propagation to work. 1116 */ 1117static bool 1118opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast) 1119{ 1120 nir_deref_instr *parent = nir_src_as_deref(cast->parent); 1121 if (!parent) 1122 return false; 1123 1124 if (cast->cast.align_mul > 0) 1125 return false; 1126 1127 if (!glsl_type_is_struct(parent->type)) 1128 return false; 1129 1130 /* Empty struct */ 1131 if (glsl_get_length(parent->type) < 1) 1132 return false; 1133 1134 if (glsl_get_struct_field_offset(parent->type, 0) != 0) 1135 return false; 1136 1137 if (cast->type != glsl_get_struct_field(parent->type, 0)) 1138 return false; 1139 1140 nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0); 1141 nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa); 1142 nir_deref_instr_remove_if_unused(cast); 1143 return true; 1144} 1145 1146static bool 1147opt_deref_cast(nir_builder *b, nir_deref_instr *cast) 1148{ 1149 bool progress = false; 1150 1151 progress |= opt_remove_restricting_cast_alignments(cast); 1152 1153 if (opt_replace_struct_wrapper_cast(b, cast)) 1154 return true; 1155 1156 if (opt_remove_sampler_cast(cast)) 1157 return true; 1158 1159 progress |= opt_remove_cast_cast(cast); 1160 if (!is_trivial_deref_cast(cast)) 1161 return progress; 1162 1163 /* If this deref still contains useful alignment information, we don't want 1164 * to delete it. 1165 */ 1166 if (cast->cast.align_mul > 0) 1167 return progress; 1168 1169 bool trivial_array_cast = is_trivial_array_deref_cast(cast); 1170 1171 assert(cast->dest.is_ssa); 1172 assert(cast->parent.is_ssa); 1173 1174 nir_foreach_use_safe(use_src, &cast->dest.ssa) { 1175 /* If this isn't a trivial array cast, we can't propagate into 1176 * ptr_as_array derefs. 1177 */ 1178 if (is_deref_ptr_as_array(use_src->parent_instr) && 1179 !trivial_array_cast) 1180 continue; 1181 1182 nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent); 1183 progress = true; 1184 } 1185 1186 /* If uses would be a bit crazy */ 1187 assert(list_is_empty(&cast->dest.ssa.if_uses)); 1188 1189 if (nir_deref_instr_remove_if_unused(cast)) 1190 progress = true; 1191 1192 return progress; 1193} 1194 1195static bool 1196opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref) 1197{ 1198 assert(deref->deref_type == nir_deref_type_ptr_as_array); 1199 1200 nir_deref_instr *parent = nir_deref_instr_parent(deref); 1201 1202 if (nir_src_is_const(deref->arr.index) && 1203 nir_src_as_int(deref->arr.index) == 0) { 1204 /* If it's a ptr_as_array deref with an index of 0, it does nothing 1205 * and we can just replace its uses with its parent, unless it has 1206 * alignment information. 1207 * 1208 * The source of a ptr_as_array deref always has a deref_type of 1209 * nir_deref_type_array or nir_deref_type_cast. If it's a cast, it 1210 * may be trivial and we may be able to get rid of that too. Any 1211 * trivial cast of trivial cast cases should be handled already by 1212 * opt_deref_cast() above. 1213 */ 1214 if (parent->deref_type == nir_deref_type_cast && 1215 parent->cast.align_mul == 0 && 1216 is_trivial_deref_cast(parent)) 1217 parent = nir_deref_instr_parent(parent); 1218 nir_ssa_def_rewrite_uses(&deref->dest.ssa, 1219 &parent->dest.ssa); 1220 nir_instr_remove(&deref->instr); 1221 return true; 1222 } 1223 1224 if (parent->deref_type != nir_deref_type_array && 1225 parent->deref_type != nir_deref_type_ptr_as_array) 1226 return false; 1227 1228 assert(parent->parent.is_ssa); 1229 assert(parent->arr.index.is_ssa); 1230 assert(deref->arr.index.is_ssa); 1231 1232 deref->arr.in_bounds &= parent->arr.in_bounds; 1233 1234 nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa, 1235 deref->arr.index.ssa); 1236 1237 deref->deref_type = parent->deref_type; 1238 nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent); 1239 nir_instr_rewrite_src(&deref->instr, &deref->arr.index, 1240 nir_src_for_ssa(new_idx)); 1241 return true; 1242} 1243 1244static bool 1245is_vector_bitcast_deref(nir_deref_instr *cast, 1246 nir_component_mask_t mask, 1247 bool is_write) 1248{ 1249 if (cast->deref_type != nir_deref_type_cast) 1250 return false; 1251 1252 /* Don't throw away useful alignment information */ 1253 if (cast->cast.align_mul > 0) 1254 return false; 1255 1256 /* It has to be a cast of another deref */ 1257 nir_deref_instr *parent = nir_src_as_deref(cast->parent); 1258 if (parent == NULL) 1259 return false; 1260 1261 /* The parent has to be a vector or scalar */ 1262 if (!glsl_type_is_vector_or_scalar(parent->type)) 1263 return false; 1264 1265 /* Don't bother with 1-bit types */ 1266 unsigned cast_bit_size = glsl_get_bit_size(cast->type); 1267 unsigned parent_bit_size = glsl_get_bit_size(parent->type); 1268 if (cast_bit_size == 1 || parent_bit_size == 1) 1269 return false; 1270 1271 /* A strided vector type means it's not tightly packed */ 1272 if (glsl_get_explicit_stride(cast->type) || 1273 glsl_get_explicit_stride(parent->type)) 1274 return false; 1275 1276 assert(cast_bit_size > 0 && cast_bit_size % 8 == 0); 1277 assert(parent_bit_size > 0 && parent_bit_size % 8 == 0); 1278 unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8); 1279 unsigned parent_bytes = glsl_get_vector_elements(parent->type) * 1280 (parent_bit_size / 8); 1281 if (bytes_used > parent_bytes) 1282 return false; 1283 1284 if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size, 1285 parent_bit_size)) 1286 return false; 1287 1288 return true; 1289} 1290 1291static nir_ssa_def * 1292resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components) 1293{ 1294 if (num_components == data->num_components) 1295 return data; 1296 1297 unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, }; 1298 for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++) 1299 swiz[i] = i; 1300 1301 return nir_swizzle(b, data, swiz, num_components); 1302} 1303 1304static bool 1305opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load) 1306{ 1307 nir_deref_instr *deref = nir_src_as_deref(load->src[0]); 1308 nir_component_mask_t read_mask = 1309 nir_ssa_def_components_read(&load->dest.ssa); 1310 1311 /* LLVM loves take advantage of the fact that vec3s in OpenCL are 1312 * vec4-aligned and so it can just read/write them as vec4s. This 1313 * results in a LOT of vec4->vec3 casts on loads and stores. 1314 */ 1315 if (is_vector_bitcast_deref(deref, read_mask, false)) { 1316 const unsigned old_num_comps = load->dest.ssa.num_components; 1317 const unsigned old_bit_size = load->dest.ssa.bit_size; 1318 1319 nir_deref_instr *parent = nir_src_as_deref(deref->parent); 1320 const unsigned new_num_comps = glsl_get_vector_elements(parent->type); 1321 const unsigned new_bit_size = glsl_get_bit_size(parent->type); 1322 1323 /* Stomp it to reference the parent */ 1324 nir_instr_rewrite_src(&load->instr, &load->src[0], 1325 nir_src_for_ssa(&parent->dest.ssa)); 1326 assert(load->dest.is_ssa); 1327 load->dest.ssa.bit_size = new_bit_size; 1328 load->dest.ssa.num_components = new_num_comps; 1329 load->num_components = new_num_comps; 1330 1331 b->cursor = nir_after_instr(&load->instr); 1332 nir_ssa_def *data = &load->dest.ssa; 1333 if (old_bit_size != new_bit_size) 1334 data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size); 1335 data = resize_vector(b, data, old_num_comps); 1336 1337 nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data, 1338 data->parent_instr); 1339 return true; 1340 } 1341 1342 return false; 1343} 1344 1345static bool 1346opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store) 1347{ 1348 nir_deref_instr *deref = nir_src_as_deref(store->src[0]); 1349 nir_component_mask_t write_mask = nir_intrinsic_write_mask(store); 1350 1351 /* LLVM loves take advantage of the fact that vec3s in OpenCL are 1352 * vec4-aligned and so it can just read/write them as vec4s. This 1353 * results in a LOT of vec4->vec3 casts on loads and stores. 1354 */ 1355 if (is_vector_bitcast_deref(deref, write_mask, true)) { 1356 assert(store->src[1].is_ssa); 1357 nir_ssa_def *data = store->src[1].ssa; 1358 1359 const unsigned old_bit_size = data->bit_size; 1360 1361 nir_deref_instr *parent = nir_src_as_deref(deref->parent); 1362 const unsigned new_num_comps = glsl_get_vector_elements(parent->type); 1363 const unsigned new_bit_size = glsl_get_bit_size(parent->type); 1364 1365 nir_instr_rewrite_src(&store->instr, &store->src[0], 1366 nir_src_for_ssa(&parent->dest.ssa)); 1367 1368 /* Restrict things down as needed so the bitcast doesn't fail */ 1369 data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1); 1370 if (old_bit_size != new_bit_size) 1371 data = nir_bitcast_vector(b, data, new_bit_size); 1372 data = resize_vector(b, data, new_num_comps); 1373 nir_instr_rewrite_src(&store->instr, &store->src[1], 1374 nir_src_for_ssa(data)); 1375 store->num_components = new_num_comps; 1376 1377 /* Adjust the write mask */ 1378 write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size, 1379 new_bit_size); 1380 nir_intrinsic_set_write_mask(store, write_mask); 1381 return true; 1382 } 1383 1384 return false; 1385} 1386 1387static bool 1388opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin) 1389{ 1390 nir_variable_mode modes = nir_intrinsic_memory_modes(intrin); 1391 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1392 if (deref == NULL) 1393 return false; 1394 1395 nir_ssa_def *deref_is = NULL; 1396 1397 if (nir_deref_mode_must_be(deref, modes)) 1398 deref_is = nir_imm_true(b); 1399 1400 if (!nir_deref_mode_may_be(deref, modes)) 1401 deref_is = nir_imm_false(b); 1402 1403 if (deref_is == NULL) 1404 return false; 1405 1406 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is); 1407 nir_instr_remove(&intrin->instr); 1408 return true; 1409} 1410 1411bool 1412nir_opt_deref_impl(nir_function_impl *impl) 1413{ 1414 bool progress = false; 1415 1416 nir_builder b; 1417 nir_builder_init(&b, impl); 1418 1419 nir_foreach_block(block, impl) { 1420 nir_foreach_instr_safe(instr, block) { 1421 b.cursor = nir_before_instr(instr); 1422 1423 switch (instr->type) { 1424 case nir_instr_type_alu: { 1425 nir_alu_instr *alu = nir_instr_as_alu(instr); 1426 if (opt_alu_of_cast(alu)) 1427 progress = true; 1428 break; 1429 } 1430 1431 case nir_instr_type_deref: { 1432 nir_deref_instr *deref = nir_instr_as_deref(instr); 1433 1434 if (opt_restrict_deref_modes(deref)) 1435 progress = true; 1436 1437 switch (deref->deref_type) { 1438 case nir_deref_type_ptr_as_array: 1439 if (opt_deref_ptr_as_array(&b, deref)) 1440 progress = true; 1441 break; 1442 1443 case nir_deref_type_cast: 1444 if (opt_deref_cast(&b, deref)) 1445 progress = true; 1446 break; 1447 1448 default: 1449 /* Do nothing */ 1450 break; 1451 } 1452 break; 1453 } 1454 1455 case nir_instr_type_intrinsic: { 1456 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1457 switch (intrin->intrinsic) { 1458 case nir_intrinsic_load_deref: 1459 if (opt_load_vec_deref(&b, intrin)) 1460 progress = true; 1461 break; 1462 1463 case nir_intrinsic_store_deref: 1464 if (opt_store_vec_deref(&b, intrin)) 1465 progress = true; 1466 break; 1467 1468 case nir_intrinsic_deref_mode_is: 1469 if (opt_known_deref_mode_is(&b, intrin)) 1470 progress = true; 1471 break; 1472 1473 default: 1474 /* Do nothing */ 1475 break; 1476 } 1477 break; 1478 } 1479 1480 default: 1481 /* Do nothing */ 1482 break; 1483 } 1484 } 1485 } 1486 1487 if (progress) { 1488 nir_metadata_preserve(impl, nir_metadata_block_index | 1489 nir_metadata_dominance); 1490 } else { 1491 nir_metadata_preserve(impl, nir_metadata_all); 1492 } 1493 1494 return progress; 1495} 1496 1497bool 1498nir_opt_deref(nir_shader *shader) 1499{ 1500 bool progress = false; 1501 1502 nir_foreach_function(func, shader) { 1503 if (func->impl && nir_opt_deref_impl(func->impl)) 1504 progress = true; 1505 } 1506 1507 return progress; 1508} 1509