1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_control_flow.h" 27#include "nir_loop_analyze.h" 28 29 30/* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32 31 * instructions. (Multiply counting nodes and magic number 5.) But there is 32 * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed 33 * to give about the same results. Around 5 instructions per node. But some 34 * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so 35 * we set it to 26. 36 */ 37#define LOOP_UNROLL_LIMIT 26 38 39/* Prepare this loop for unrolling by first converting to lcssa and then 40 * converting the phis from the top level of the loop body to regs. 41 * Partially converting out of SSA allows us to unroll the loop without having 42 * to keep track of and update phis along the way which gets tricky and 43 * doesn't add much value over converting to regs. 44 * 45 * The loop may have a jump instruction at the end of the loop which does 46 * nothing. Once we're out of SSA, we can safely delete it so we don't have 47 * to deal with it later. 48 */ 49static void 50loop_prepare_for_unroll(nir_loop *loop) 51{ 52 nir_rematerialize_derefs_in_use_blocks_impl( 53 nir_cf_node_get_function(&loop->cf_node)); 54 55 nir_convert_loop_to_lcssa(loop); 56 57 /* Lower phis at the top level of the loop body */ 58 foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { 59 if (nir_cf_node_block == node->type) { 60 nir_lower_phis_to_regs_block(nir_cf_node_as_block(node)); 61 } 62 } 63 64 /* Lower phis after the loop */ 65 nir_block *block_after_loop = 66 nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 67 68 nir_lower_phis_to_regs_block(block_after_loop); 69 70 /* Remove jump if it's the last instruction in the loop */ 71 nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop)); 72 if (last_instr && last_instr->type == nir_instr_type_jump) { 73 nir_instr_remove(last_instr); 74 } 75} 76 77static void 78get_first_blocks_in_terminator(nir_loop_terminator *term, 79 nir_block **first_break_block, 80 nir_block **first_continue_block) 81{ 82 if (term->continue_from_then) { 83 *first_continue_block = nir_if_first_then_block(term->nif); 84 *first_break_block = nir_if_first_else_block(term->nif); 85 } else { 86 *first_continue_block = nir_if_first_else_block(term->nif); 87 *first_break_block = nir_if_first_then_block(term->nif); 88 } 89} 90 91/** 92 * Unroll a loop where we know exactly how many iterations there are and there 93 * is only a single exit point. Note here we can unroll loops with multiple 94 * theoretical exits that only have a single terminating exit that we always 95 * know is the "real" exit. 96 * 97 * loop { 98 * ...instrs... 99 * } 100 * 101 * And the iteration count is 3, the output will be: 102 * 103 * ...instrs... ...instrs... ...instrs... 104 */ 105static void 106simple_unroll(nir_loop *loop) 107{ 108 nir_loop_terminator *limiting_term = loop->info->limiting_terminator; 109 assert(nir_is_trivial_loop_if(limiting_term->nif, 110 limiting_term->break_block)); 111 112 loop_prepare_for_unroll(loop); 113 114 /* Skip over loop terminator and get the loop body. */ 115 list_for_each_entry(nir_loop_terminator, terminator, 116 &loop->info->loop_terminator_list, 117 loop_terminator_link) { 118 119 /* Remove all but the limiting terminator as we know the other exit 120 * conditions can never be met. Note we need to extract any instructions 121 * in the continue from branch and insert then into the loop body before 122 * removing it. 123 */ 124 if (terminator->nif != limiting_term->nif) { 125 nir_block *first_break_block; 126 nir_block *first_continue_block; 127 get_first_blocks_in_terminator(terminator, &first_break_block, 128 &first_continue_block); 129 130 assert(nir_is_trivial_loop_if(terminator->nif, 131 terminator->break_block)); 132 133 nir_cf_list continue_from_lst; 134 nir_cf_extract(&continue_from_lst, 135 nir_before_block(first_continue_block), 136 nir_after_block(terminator->continue_from_block)); 137 nir_cf_reinsert(&continue_from_lst, 138 nir_after_cf_node(&terminator->nif->cf_node)); 139 140 nir_cf_node_remove(&terminator->nif->cf_node); 141 } 142 } 143 144 nir_block *first_break_block; 145 nir_block *first_continue_block; 146 get_first_blocks_in_terminator(limiting_term, &first_break_block, 147 &first_continue_block); 148 149 /* Pluck out the loop header */ 150 nir_block *header_blk = nir_loop_first_block(loop); 151 nir_cf_list lp_header; 152 nir_cf_extract(&lp_header, nir_before_block(header_blk), 153 nir_before_cf_node(&limiting_term->nif->cf_node)); 154 155 /* Add the continue from block of the limiting terminator to the loop body 156 */ 157 nir_cf_list continue_from_lst; 158 nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block), 159 nir_after_block(limiting_term->continue_from_block)); 160 nir_cf_reinsert(&continue_from_lst, 161 nir_after_cf_node(&limiting_term->nif->cf_node)); 162 163 /* Pluck out the loop body */ 164 nir_cf_list loop_body; 165 nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node), 166 nir_after_block(nir_loop_last_block(loop))); 167 168 struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL); 169 170 /* Clone the loop header and insert before the loop */ 171 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 172 nir_before_cf_node(&loop->cf_node), 173 remap_table); 174 175 for (unsigned i = 0; i < loop->info->max_trip_count; i++) { 176 /* Clone loop body and insert before the loop */ 177 nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent, 178 nir_before_cf_node(&loop->cf_node), 179 remap_table); 180 181 /* Clone loop header and insert after loop body */ 182 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 183 nir_before_cf_node(&loop->cf_node), 184 remap_table); 185 } 186 187 /* Remove the break from the loop terminator and add instructions from 188 * the break block after the unrolled loop. 189 */ 190 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block); 191 nir_instr_remove(break_instr); 192 nir_cf_list break_list; 193 nir_cf_extract(&break_list, nir_before_block(first_break_block), 194 nir_after_block(limiting_term->break_block)); 195 196 /* Clone so things get properly remapped */ 197 nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent, 198 nir_before_cf_node(&loop->cf_node), 199 remap_table); 200 201 /* Remove the loop */ 202 nir_cf_node_remove(&loop->cf_node); 203 204 /* Delete the original loop body, break block & header */ 205 nir_cf_delete(&lp_header); 206 nir_cf_delete(&loop_body); 207 nir_cf_delete(&break_list); 208 209 _mesa_hash_table_destroy(remap_table, NULL); 210} 211 212static void 213move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term) 214{ 215 /* Move the rest of the loop inside the continue-from-block */ 216 nir_cf_reinsert(lst, nir_after_block(term->continue_from_block)); 217 218 /* Remove the break */ 219 nir_instr_remove(nir_block_last_instr(term->break_block)); 220} 221 222static nir_cursor 223get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then) 224{ 225 if (node->type == nir_cf_node_loop) { 226 return nir_before_cf_node(node); 227 } else { 228 nir_if *if_stmt = nir_cf_node_as_if(node); 229 if (continue_from_then) { 230 return nir_after_block(nir_if_last_then_block(if_stmt)); 231 } else { 232 return nir_after_block(nir_if_last_else_block(if_stmt)); 233 } 234 } 235} 236 237static nir_cf_node * 238complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term, 239 nir_cf_list *lp_header, nir_cf_list *lp_body, 240 struct hash_table *remap_table, 241 unsigned num_times_to_clone) 242{ 243 /* In the terminator that we have no trip count for move everything after 244 * the terminator into the continue from branch. 245 */ 246 nir_cf_list loop_end; 247 nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node), 248 nir_after_block(nir_loop_last_block(loop))); 249 move_cf_list_into_loop_term(&loop_end, unlimit_term); 250 251 /* Pluck out the loop body. */ 252 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)), 253 nir_after_block(nir_loop_last_block(loop))); 254 255 /* Set unroll_loc to the loop as we will insert the unrolled loop before it 256 */ 257 nir_cf_node *unroll_loc = &loop->cf_node; 258 259 /* Temp list to store the cloned loop as we unroll */ 260 nir_cf_list unrolled_lp_body; 261 262 for (unsigned i = 0; i < num_times_to_clone; i++) { 263 264 nir_cursor cursor = 265 get_complex_unroll_insert_location(unroll_loc, 266 unlimit_term->continue_from_then); 267 268 /* Clone loop header and insert in if branch */ 269 nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent, 270 cursor, remap_table); 271 272 cursor = 273 get_complex_unroll_insert_location(unroll_loc, 274 unlimit_term->continue_from_then); 275 276 /* Clone loop body */ 277 nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent, 278 remap_table); 279 280 unroll_loc = exec_node_data(nir_cf_node, 281 exec_list_get_tail(&unrolled_lp_body.list), 282 node); 283 assert(unroll_loc->type == nir_cf_node_block && 284 exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list)); 285 286 /* Get the unrolled if node */ 287 unroll_loc = nir_cf_node_prev(unroll_loc); 288 289 /* Insert unrolled loop body */ 290 nir_cf_reinsert(&unrolled_lp_body, cursor); 291 } 292 293 return unroll_loc; 294} 295 296/** 297 * Unroll a loop with two exists when the trip count of one of the exits is 298 * unknown. If continue_from_then is true, the loop is repeated only when the 299 * "then" branch of the if is taken; otherwise it is repeated only 300 * when the "else" branch of the if is taken. 301 * 302 * For example, if the input is: 303 * 304 * loop { 305 * ...phis/condition... 306 * if condition { 307 * ...then instructions... 308 * } else { 309 * ...continue instructions... 310 * break 311 * } 312 * ...body... 313 * } 314 * 315 * And the iteration count is 3, and unlimit_term->continue_from_then is true, 316 * then the output will be: 317 * 318 * ...condition... 319 * if condition { 320 * ...then instructions... 321 * ...body... 322 * if condition { 323 * ...then instructions... 324 * ...body... 325 * if condition { 326 * ...then instructions... 327 * ...body... 328 * } else { 329 * ...continue instructions... 330 * } 331 * } else { 332 * ...continue instructions... 333 * } 334 * } else { 335 * ...continue instructions... 336 * } 337 */ 338static void 339complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term, 340 bool limiting_term_second) 341{ 342 assert(nir_is_trivial_loop_if(unlimit_term->nif, 343 unlimit_term->break_block)); 344 345 nir_loop_terminator *limiting_term = loop->info->limiting_terminator; 346 assert(nir_is_trivial_loop_if(limiting_term->nif, 347 limiting_term->break_block)); 348 349 loop_prepare_for_unroll(loop); 350 351 nir_block *header_blk = nir_loop_first_block(loop); 352 353 nir_cf_list lp_header; 354 nir_cf_list limit_break_list; 355 unsigned num_times_to_clone; 356 if (limiting_term_second) { 357 /* Pluck out the loop header */ 358 nir_cf_extract(&lp_header, nir_before_block(header_blk), 359 nir_before_cf_node(&unlimit_term->nif->cf_node)); 360 361 /* We need some special handling when its the second terminator causing 362 * us to exit the loop for example: 363 * 364 * for (int i = 0; i < uniform_lp_count; i++) { 365 * colour = vec4(0.0, 1.0, 0.0, 1.0); 366 * 367 * if (i == 1) { 368 * break; 369 * } 370 * ... any further code is unreachable after i == 1 ... 371 * } 372 */ 373 nir_cf_list after_lt; 374 nir_if *limit_if = limiting_term->nif; 375 nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node), 376 nir_after_block(nir_loop_last_block(loop))); 377 move_cf_list_into_loop_term(&after_lt, limiting_term); 378 379 /* Because the trip count is the number of times we pass over the entire 380 * loop before hitting a break when the second terminator is the 381 * limiting terminator we can actually execute code inside the loop when 382 * trip count == 0 e.g. the code above the break. So we need to bump 383 * the trip_count in order for the code below to clone anything. When 384 * trip count == 1 we execute the code above the break twice and the 385 * code below it once so we need clone things twice and so on. 386 */ 387 num_times_to_clone = loop->info->max_trip_count + 1; 388 } else { 389 /* Pluck out the loop header */ 390 nir_cf_extract(&lp_header, nir_before_block(header_blk), 391 nir_before_cf_node(&limiting_term->nif->cf_node)); 392 393 nir_block *first_break_block; 394 nir_block *first_continue_block; 395 get_first_blocks_in_terminator(limiting_term, &first_break_block, 396 &first_continue_block); 397 398 /* Remove the break then extract instructions from the break block so we 399 * can insert them in the innermost else of the unrolled loop. 400 */ 401 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block); 402 nir_instr_remove(break_instr); 403 nir_cf_extract(&limit_break_list, nir_before_block(first_break_block), 404 nir_after_block(limiting_term->break_block)); 405 406 nir_cf_list continue_list; 407 nir_cf_extract(&continue_list, nir_before_block(first_continue_block), 408 nir_after_block(limiting_term->continue_from_block)); 409 410 nir_cf_reinsert(&continue_list, 411 nir_after_cf_node(&limiting_term->nif->cf_node)); 412 413 nir_cf_node_remove(&limiting_term->nif->cf_node); 414 415 num_times_to_clone = loop->info->max_trip_count; 416 } 417 418 struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL); 419 420 nir_cf_list lp_body; 421 nir_cf_node *unroll_loc = 422 complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body, 423 remap_table, num_times_to_clone); 424 425 if (!limiting_term_second) { 426 assert(unroll_loc->type == nir_cf_node_if); 427 428 nir_cursor cursor = 429 get_complex_unroll_insert_location(unroll_loc, 430 unlimit_term->continue_from_then); 431 432 /* Clone loop header and insert in if branch */ 433 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 434 cursor, remap_table); 435 436 cursor = 437 get_complex_unroll_insert_location(unroll_loc, 438 unlimit_term->continue_from_then); 439 440 /* Clone so things get properly remapped, and insert break block from 441 * the limiting terminator. 442 */ 443 nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent, 444 cursor, remap_table); 445 446 nir_cf_delete(&limit_break_list); 447 } 448 449 /* The loop has been unrolled so remove it. */ 450 nir_cf_node_remove(&loop->cf_node); 451 452 /* Delete the original loop header and body */ 453 nir_cf_delete(&lp_header); 454 nir_cf_delete(&lp_body); 455 456 _mesa_hash_table_destroy(remap_table, NULL); 457} 458 459/** 460 * Unroll loops where we only have a single terminator but the exact trip 461 * count is unknown. For example: 462 * 463 * for (int i = 0; i < imin(x, 4); i++) 464 * ... 465 */ 466static void 467complex_unroll_single_terminator(nir_loop *loop) 468{ 469 assert(list_length(&loop->info->loop_terminator_list) == 1); 470 assert(loop->info->limiting_terminator); 471 assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif, 472 loop->info->limiting_terminator->break_block)); 473 474 nir_loop_terminator *terminator = loop->info->limiting_terminator; 475 476 loop_prepare_for_unroll(loop); 477 478 /* Pluck out the loop header */ 479 nir_cf_list lp_header; 480 nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)), 481 nir_before_cf_node(&terminator->nif->cf_node)); 482 483 struct hash_table *remap_table = 484 _mesa_hash_table_create(NULL, _mesa_hash_pointer, 485 _mesa_key_pointer_equal); 486 487 /* We need to clone the loop one extra time in order to clone the lcssa 488 * vars for the last iteration (they are inside the following ifs break 489 * branch). We leave other passes to clean up this redundant if. 490 */ 491 unsigned num_times_to_clone = loop->info->max_trip_count + 1; 492 493 nir_cf_list lp_body; 494 UNUSED nir_cf_node *unroll_loc = 495 complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body, 496 remap_table, num_times_to_clone); 497 498 assert(unroll_loc->type == nir_cf_node_if); 499 500 /* We need to clone the lcssa vars in order to insert them on both sides 501 * of the if in the last iteration/if-statement. Otherwise the optimisation 502 * passes will have trouble optimising the unrolled if ladder. 503 */ 504 nir_cursor cursor = 505 get_complex_unroll_insert_location(unroll_loc, 506 terminator->continue_from_then); 507 508 nir_if *if_stmt = nir_cf_node_as_if(unroll_loc); 509 nir_cursor start_cursor; 510 nir_cursor end_cursor; 511 if (terminator->continue_from_then) { 512 start_cursor = nir_before_block(nir_if_first_else_block(if_stmt)); 513 end_cursor = nir_after_block(nir_if_last_else_block(if_stmt)); 514 } else { 515 start_cursor = nir_before_block(nir_if_first_then_block(if_stmt)); 516 end_cursor = nir_after_block(nir_if_last_then_block(if_stmt)); 517 } 518 519 nir_cf_list lcssa_list; 520 nir_cf_extract(&lcssa_list, start_cursor, end_cursor); 521 522 /* Insert the cloned vars in the last continue branch */ 523 nir_cf_list_clone_and_reinsert(&lcssa_list, loop->cf_node.parent, 524 cursor, remap_table); 525 526 start_cursor = terminator->continue_from_then ? 527 nir_before_block(nir_if_first_else_block(if_stmt)) : 528 nir_before_block(nir_if_first_then_block(if_stmt)); 529 530 /* Reinsert the cloned vars back where they came from */ 531 nir_cf_reinsert(&lcssa_list, start_cursor); 532 533 /* Delete the original loop header and body */ 534 nir_cf_delete(&lp_header); 535 nir_cf_delete(&lp_body); 536 537 /* The original loop has been replaced so remove it. */ 538 nir_cf_node_remove(&loop->cf_node); 539 540 _mesa_hash_table_destroy(remap_table, NULL); 541} 542 543/* Unrolls the classic wrapper loops e.g 544 * 545 * do { 546 * // ... 547 * } while (false) 548 */ 549static bool 550wrapper_unroll(nir_loop *loop) 551{ 552 if (!list_is_empty(&loop->info->loop_terminator_list)) { 553 554 /* Unrolling a loop with a large number of exits can result in a 555 * large inrease in register pressure. For now we just skip 556 * unrolling if we have more than 3 exits (not including the break 557 * at the end of the loop). 558 * 559 * TODO: Most loops that fit this pattern are simply switch 560 * statements that are converted to a loop to take advantage of 561 * exiting jump instruction handling. In this case we could make 562 * use of a binary seach pattern like we do in 563 * nir_lower_indirect_derefs(), this should allow us to unroll the 564 * loops in an optimal way and should also avoid some of the 565 * register pressure that comes from simply nesting the 566 * terminators one after the other. 567 */ 568 if (list_length(&loop->info->loop_terminator_list) > 3) 569 return false; 570 571 loop_prepare_for_unroll(loop); 572 573 nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop)); 574 list_for_each_entry(nir_loop_terminator, terminator, 575 &loop->info->loop_terminator_list, 576 loop_terminator_link) { 577 578 /* Remove break from the terminator */ 579 nir_instr *break_instr = 580 nir_block_last_instr(terminator->break_block); 581 nir_instr_remove(break_instr); 582 583 /* Pluck out the loop body. */ 584 nir_cf_list loop_body; 585 nir_cf_extract(&loop_body, 586 nir_after_cf_node(&terminator->nif->cf_node), 587 loop_end); 588 589 /* Reinsert loop body into continue from block */ 590 nir_cf_reinsert(&loop_body, 591 nir_after_block(terminator->continue_from_block)); 592 593 loop_end = terminator->continue_from_then ? 594 nir_after_block(nir_if_last_then_block(terminator->nif)) : 595 nir_after_block(nir_if_last_else_block(terminator->nif)); 596 } 597 } else { 598 loop_prepare_for_unroll(loop); 599 } 600 601 /* Pluck out the loop body. */ 602 nir_cf_list loop_body; 603 nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)), 604 nir_after_block(nir_loop_last_block(loop))); 605 606 /* Reinsert loop body after the loop */ 607 nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node)); 608 609 /* The loop has been unrolled so remove it. */ 610 nir_cf_node_remove(&loop->cf_node); 611 612 return true; 613} 614 615static bool 616is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref, 617 unsigned trip_count) 618{ 619 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 620 if (d->deref_type != nir_deref_type_array) 621 continue; 622 623 nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr); 624 nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src; 625 if (!nir_srcs_equal(d->arr.index, src)) 626 continue; 627 628 nir_deref_instr *parent = nir_deref_instr_parent(d); 629 assert(glsl_type_is_array(parent->type) || 630 glsl_type_is_matrix(parent->type) || 631 glsl_type_is_vector(parent->type)); 632 633 /* We have already unrolled the loop and the new one will be imbedded in 634 * the innermost continue branch. So unless the array is greater than 635 * the trip count any iteration over the loop will be an out of bounds 636 * access of the array. 637 */ 638 unsigned length = glsl_type_is_vector(parent->type) ? 639 glsl_get_vector_elements(parent->type) : 640 glsl_get_length(parent->type); 641 return length <= trip_count; 642 } 643 644 return false; 645} 646 647/* If we know an array access is going to be out of bounds remove or replace 648 * the access with an undef. This can later result in the entire loop being 649 * removed by nir_opt_dead_cf(). 650 */ 651static void 652remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop, 653 nir_loop_terminator *term, 654 nir_cf_list *lp_header, 655 nir_cf_list *lp_body, 656 unsigned trip_count) 657{ 658 if (!loop->info->guessed_trip_count) 659 return; 660 661 /* Temporarily recreate the original loop so we can alter it */ 662 nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop))); 663 nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop))); 664 665 nir_builder b; 666 nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node)); 667 668 nir_foreach_block_in_cf_node(block, &loop->cf_node) { 669 nir_foreach_instr_safe(instr, block) { 670 if (instr->type != nir_instr_type_intrinsic) 671 continue; 672 673 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 674 675 /* Check for arrays variably-indexed by a loop induction variable. 676 * If this access is out of bounds remove the instruction or replace 677 * its use with an undefined instruction. 678 * If the loop is no longer useful we leave it for the appropriate 679 * pass to clean it up for us. 680 */ 681 if (intrin->intrinsic == nir_intrinsic_load_deref || 682 intrin->intrinsic == nir_intrinsic_store_deref || 683 intrin->intrinsic == nir_intrinsic_copy_deref) { 684 685 if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]), 686 trip_count)) { 687 if (intrin->intrinsic == nir_intrinsic_load_deref) { 688 nir_ssa_def *undef = 689 nir_ssa_undef(&b, intrin->dest.ssa.num_components, 690 intrin->dest.ssa.bit_size); 691 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 692 undef); 693 } else { 694 nir_instr_remove(instr); 695 continue; 696 } 697 } 698 699 if (intrin->intrinsic == nir_intrinsic_copy_deref && 700 is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]), 701 trip_count)) { 702 nir_instr_remove(instr); 703 } 704 } 705 } 706 } 707 708 /* Now that we are done extract the loop header and body again */ 709 nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)), 710 nir_before_cf_node(&term->nif->cf_node)); 711 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)), 712 nir_after_block(nir_loop_last_block(loop))); 713} 714 715/* Partially unrolls loops that don't have a known trip count. 716 */ 717static void 718partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count) 719{ 720 assert(list_length(&loop->info->loop_terminator_list) == 1); 721 722 nir_loop_terminator *terminator = 723 list_first_entry(&loop->info->loop_terminator_list, 724 nir_loop_terminator, loop_terminator_link); 725 726 assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block)); 727 728 loop_prepare_for_unroll(loop); 729 730 /* Pluck out the loop header */ 731 nir_cf_list lp_header; 732 nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)), 733 nir_before_cf_node(&terminator->nif->cf_node)); 734 735 struct hash_table *remap_table = 736 _mesa_hash_table_create(NULL, _mesa_hash_pointer, 737 _mesa_key_pointer_equal); 738 739 nir_cf_list lp_body; 740 nir_cf_node *unroll_loc = 741 complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body, 742 remap_table, trip_count); 743 744 /* Attempt to remove out of bounds array access */ 745 remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header, 746 &lp_body, trip_count); 747 748 nir_cursor cursor = 749 get_complex_unroll_insert_location(unroll_loc, 750 terminator->continue_from_then); 751 752 /* Reinsert the loop in the innermost nested continue branch of the unrolled 753 * loop. 754 */ 755 nir_loop *new_loop = nir_loop_create(shader); 756 nir_cf_node_insert(cursor, &new_loop->cf_node); 757 new_loop->partially_unrolled = true; 758 759 /* Clone loop header and insert into new loop */ 760 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 761 nir_after_cf_list(&new_loop->body), 762 remap_table); 763 764 /* Clone loop body and insert into new loop */ 765 nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent, 766 nir_after_cf_list(&new_loop->body), 767 remap_table); 768 769 /* Insert break back into terminator */ 770 nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break); 771 nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop)); 772 if (terminator->continue_from_then) { 773 nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr); 774 } else { 775 nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr); 776 } 777 778 /* Delete the original loop header and body */ 779 nir_cf_delete(&lp_header); 780 nir_cf_delete(&lp_body); 781 782 /* The original loop has been replaced so remove it. */ 783 nir_cf_node_remove(&loop->cf_node); 784 785 _mesa_hash_table_destroy(remap_table, NULL); 786} 787 788static bool 789is_indirect_load(nir_instr *instr) 790{ 791 if (instr->type == nir_instr_type_intrinsic) { 792 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 793 794 if ((intrin->intrinsic == nir_intrinsic_load_ubo || 795 intrin->intrinsic == nir_intrinsic_load_ssbo) && 796 !nir_src_is_const(intrin->src[1])) { 797 return true; 798 } 799 800 if (intrin->intrinsic == nir_intrinsic_load_global) 801 return true; 802 803 if (intrin->intrinsic == nir_intrinsic_load_deref || 804 intrin->intrinsic == nir_intrinsic_store_deref) { 805 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 806 nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global; 807 if (!nir_deref_mode_may_be(deref, mem_modes)) 808 return false; 809 while (deref) { 810 if ((deref->deref_type == nir_deref_type_array || 811 deref->deref_type == nir_deref_type_ptr_as_array) && 812 !nir_src_is_const(deref->arr.index)) { 813 return true; 814 } 815 deref = nir_deref_instr_parent(deref); 816 } 817 } 818 } else if (instr->type == nir_instr_type_tex) { 819 nir_tex_instr *tex = nir_instr_as_tex(instr); 820 821 for (unsigned i = 0; i < tex->num_srcs; i++) { 822 if (!nir_src_is_const(tex->src[i].src)) 823 return true; 824 } 825 } 826 827 return false; 828} 829 830static bool 831can_pipeline_loads(nir_loop *loop) 832{ 833 if (!loop->info->exact_trip_count_known) 834 return false; 835 836 bool interesting_loads = false; 837 838 foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { 839 if (cf_node == &loop->info->limiting_terminator->nif->cf_node) 840 continue; 841 842 /* Control flow usually prevents useful scheduling */ 843 if (cf_node->type != nir_cf_node_block) 844 return false; 845 846 if (interesting_loads) 847 continue; 848 849 nir_block *block = nir_cf_node_as_block(cf_node); 850 nir_foreach_instr(instr, block) { 851 if (is_indirect_load(instr)) { 852 interesting_loads = true; 853 break; 854 } 855 } 856 } 857 858 return interesting_loads; 859} 860 861/* 862 * Returns true if we should unroll the loop, otherwise false. 863 */ 864static bool 865check_unrolling_restrictions(nir_shader *shader, nir_loop *loop) 866{ 867 if (loop->control == nir_loop_control_unroll) 868 return true; 869 870 if (loop->control == nir_loop_control_dont_unroll) 871 return false; 872 873 nir_loop_info *li = loop->info; 874 unsigned max_iter = shader->options->max_unroll_iterations; 875 /* Unroll much more aggressively if it can hide load latency. */ 876 if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop)) 877 max_iter = shader->options->max_unroll_iterations_aggressive; 878 unsigned trip_count = 879 li->max_trip_count ? li->max_trip_count : li->guessed_trip_count; 880 881 if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter) 882 return true; 883 884 unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT; 885 unsigned cost = li->instr_cost * trip_count; 886 887 if (cost <= cost_limit && trip_count <= max_iter) 888 return true; 889 890 return false; 891} 892 893static bool 894process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out, 895 bool *unrolled_this_block); 896 897static bool 898process_loops_in_block(nir_shader *sh, struct exec_list *block, 899 bool *has_nested_loop_out) 900{ 901 /* We try to unroll as many loops in one pass as possible. 902 * E.g. we can safely unroll both loops in this block: 903 * 904 * if (...) { 905 * loop {...} 906 * } 907 * 908 * if (...) { 909 * loop {...} 910 * } 911 * 912 * Unrolling one loop doesn't affect the other one. 913 * 914 * On the other hand for block with: 915 * 916 * loop {...} 917 * ... 918 * loop {...} 919 * 920 * It is unsafe to unroll both loops in one pass without taking 921 * complicating precautions, since the structure of the block would 922 * change after unrolling the first loop. So in such a case we leave 923 * the second loop for the next iteration of unrolling to handle. 924 */ 925 926 bool progress = false; 927 bool unrolled_this_block = false; 928 929 foreach_list_typed(nir_cf_node, nested_node, node, block) { 930 if (process_loops(sh, nested_node, 931 has_nested_loop_out, &unrolled_this_block)) { 932 progress = true; 933 934 /* If current node is unrolled we could not safely continue 935 * our iteration since we don't know the next node 936 * and it's hard to guarantee that we won't end up unrolling 937 * inner loop of the currently unrolled one, if such exists. 938 */ 939 if (unrolled_this_block) { 940 break; 941 } 942 } 943 } 944 945 return progress; 946} 947 948static bool 949process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out, 950 bool *unrolled_this_block) 951{ 952 bool progress = false; 953 bool has_nested_loop = false; 954 nir_loop *loop; 955 956 switch (cf_node->type) { 957 case nir_cf_node_block: 958 return progress; 959 case nir_cf_node_if: { 960 nir_if *if_stmt = nir_cf_node_as_if(cf_node); 961 progress |= process_loops_in_block(sh, &if_stmt->then_list, 962 has_nested_loop_out); 963 progress |= process_loops_in_block(sh, &if_stmt->else_list, 964 has_nested_loop_out); 965 return progress; 966 } 967 case nir_cf_node_loop: { 968 loop = nir_cf_node_as_loop(cf_node); 969 progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop); 970 971 break; 972 } 973 default: 974 unreachable("unknown cf node type"); 975 } 976 977 const bool unrolled_child_block = progress; 978 979 /* Don't attempt to unroll a second inner loop in this pass, wait until the 980 * next pass as we have altered the cf. 981 */ 982 if (!progress && loop->control != nir_loop_control_dont_unroll) { 983 984 /* Remove the conditional break statements associated with all terminators 985 * that are associated with a fixed iteration count, except for the one 986 * associated with the limiting terminator--that one needs to stay, since 987 * it terminates the loop. 988 */ 989 if (loop->info->limiting_terminator) { 990 list_for_each_entry_safe(nir_loop_terminator, t, 991 &loop->info->loop_terminator_list, 992 loop_terminator_link) { 993 if (t->exact_trip_count_unknown) 994 continue; 995 996 if (t != loop->info->limiting_terminator) { 997 998 /* Only delete the if-statement if the continue block is empty. 999 * We trust that nir_opt_if() does its job well enough to 1000 * remove all instructions from the continue block when possible. 1001 */ 1002 nir_block *first_continue_from_blk = t->continue_from_then ? 1003 nir_if_first_then_block(t->nif) : 1004 nir_if_first_else_block(t->nif); 1005 1006 if (!(nir_cf_node_is_last(&first_continue_from_blk->cf_node) && 1007 exec_list_is_empty(&first_continue_from_blk->instr_list))) 1008 continue; 1009 1010 /* Now delete the if */ 1011 nir_cf_node_remove(&t->nif->cf_node); 1012 1013 /* Also remove it from the terminator list */ 1014 list_del(&t->loop_terminator_link); 1015 1016 progress = true; 1017 } 1018 } 1019 } 1020 1021 /* Check for the classic 1022 * 1023 * do { 1024 * // ... 1025 * } while (false) 1026 * 1027 * that is used to wrap multi-line macros. GLSL IR also wraps switch 1028 * statements in a loop like this. 1029 */ 1030 if (loop->info->limiting_terminator == NULL && 1031 !loop->info->complex_loop) { 1032 1033 nir_block *last_loop_blk = nir_loop_last_block(loop); 1034 if (nir_block_ends_in_break(last_loop_blk)) { 1035 progress = wrapper_unroll(loop); 1036 goto exit; 1037 } 1038 1039 /* If we were able to guess the loop iteration based on array access 1040 * then do a partial unroll. 1041 */ 1042 unsigned num_lt = list_length(&loop->info->loop_terminator_list); 1043 if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled && 1044 loop->info->guessed_trip_count && 1045 check_unrolling_restrictions(sh, loop)) { 1046 partial_unroll(sh, loop, loop->info->guessed_trip_count); 1047 progress = true; 1048 } 1049 } 1050 1051 /* Intentionally don't consider exact_trip_count_known here. When 1052 * max_trip_count is non-zero, it is the upper bound on the number of 1053 * times the loop will iterate, but the loop may iterate less. For 1054 * example, the following loop will iterate 0 or 1 time: 1055 * 1056 * for (i = 0; i < min(x, 1); i++) { ... } 1057 * 1058 * Trivial single-interation loops (e.g., do { ... } while (false)) and 1059 * trivial zero-iteration loops (e.g., while (false) { ... }) will have 1060 * already been handled. 1061 * 1062 * If the loop is known to execute at most once and meets the other 1063 * unrolling criteria, unroll it even if it has nested loops. 1064 * 1065 * It is unlikely that such loops exist in real shaders. GraphicsFuzz is 1066 * known to generate spurious loops that iterate exactly once. It is 1067 * plausible that it could eventually start generating loops like the 1068 * example above, so it seems logical to defend against it now. 1069 */ 1070 if (!loop->info->limiting_terminator || 1071 (loop->info->max_trip_count != 1 && has_nested_loop)) 1072 goto exit; 1073 1074 if (!check_unrolling_restrictions(sh, loop)) 1075 goto exit; 1076 1077 if (loop->info->exact_trip_count_known) { 1078 simple_unroll(loop); 1079 progress = true; 1080 } else { 1081 /* Attempt to unroll loops with two terminators. */ 1082 unsigned num_lt = list_length(&loop->info->loop_terminator_list); 1083 if (num_lt == 2 && 1084 !loop->info->limiting_terminator->exact_trip_count_unknown) { 1085 bool limiting_term_second = true; 1086 nir_loop_terminator *terminator = 1087 list_first_entry(&loop->info->loop_terminator_list, 1088 nir_loop_terminator, loop_terminator_link); 1089 1090 1091 if (terminator->nif == loop->info->limiting_terminator->nif) { 1092 limiting_term_second = false; 1093 terminator = 1094 list_last_entry(&loop->info->loop_terminator_list, 1095 nir_loop_terminator, loop_terminator_link); 1096 } 1097 1098 /* If the first terminator has a trip count of zero and is the 1099 * limiting terminator just do a simple unroll as the second 1100 * terminator can never be reached. 1101 */ 1102 if (loop->info->max_trip_count == 0 && !limiting_term_second) { 1103 simple_unroll(loop); 1104 } else { 1105 complex_unroll(loop, terminator, limiting_term_second); 1106 } 1107 progress = true; 1108 } 1109 1110 if (num_lt == 1) { 1111 assert(loop->info->limiting_terminator->exact_trip_count_unknown); 1112 complex_unroll_single_terminator(loop); 1113 progress = true; 1114 } 1115 } 1116 } 1117 1118exit: 1119 *has_nested_loop_out = true; 1120 if (progress && !unrolled_child_block) 1121 *unrolled_this_block = true; 1122 1123 return progress; 1124} 1125 1126static bool 1127nir_opt_loop_unroll_impl(nir_function_impl *impl, 1128 nir_variable_mode indirect_mask, 1129 bool force_unroll_sampler_indirect) 1130{ 1131 bool progress = false; 1132 nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask, 1133 (int) force_unroll_sampler_indirect); 1134 nir_metadata_require(impl, nir_metadata_block_index); 1135 1136 bool has_nested_loop = false; 1137 progress |= process_loops_in_block(impl->function->shader, &impl->body, 1138 &has_nested_loop); 1139 1140 if (progress) { 1141 nir_metadata_preserve(impl, nir_metadata_none); 1142 nir_lower_regs_to_ssa_impl(impl); 1143 } else { 1144 nir_metadata_preserve(impl, nir_metadata_all); 1145 } 1146 1147 return progress; 1148} 1149 1150/** 1151 * indirect_mask specifies which type of indirectly accessed variables 1152 * should force loop unrolling. 1153 */ 1154bool 1155nir_opt_loop_unroll(nir_shader *shader) 1156{ 1157 bool progress = false; 1158 1159 bool force_unroll_sampler_indirect = shader->options->force_indirect_unrolling_sampler; 1160 nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling; 1161 nir_foreach_function(function, shader) { 1162 if (function->impl) { 1163 progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask, 1164 force_unroll_sampler_indirect); 1165 } 1166 } 1167 return progress; 1168} 1169