1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir/nir_builder.h" 26#include "nir_constant_expressions.h" 27#include "nir_control_flow.h" 28#include "nir_loop_analyze.h" 29 30static nir_ssa_def *clone_alu_and_replace_src_defs(nir_builder *b, 31 const nir_alu_instr *alu, 32 nir_ssa_def **src_defs); 33 34/** 35 * Gets the single block that jumps back to the loop header. Already assumes 36 * there is exactly one such block. 37 */ 38static nir_block* 39find_continue_block(nir_loop *loop) 40{ 41 nir_block *header_block = nir_loop_first_block(loop); 42 nir_block *prev_block = 43 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 44 45 assert(header_block->predecessors->entries == 2); 46 47 set_foreach(header_block->predecessors, pred_entry) { 48 if (pred_entry->key != prev_block) 49 return (nir_block*)pred_entry->key; 50 } 51 52 unreachable("Continue block not found!"); 53} 54 55/** 56 * Does a phi have one constant value from outside a loop and one from inside? 57 */ 58static bool 59phi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr *phi, 60 const nir_block *entry_block, 61 bool *entry_val, 62 bool *continue_val) 63{ 64 /* We already know we have exactly one continue */ 65 assert(exec_list_length(&phi->srcs) == 2); 66 67 *entry_val = false; 68 *continue_val = false; 69 70 nir_foreach_phi_src(src, phi) { 71 if (!nir_src_is_const(src->src)) 72 return false; 73 74 if (src->pred != entry_block) { 75 *continue_val = nir_src_as_bool(src->src); 76 } else { 77 *entry_val = nir_src_as_bool(src->src); 78 } 79 } 80 81 return true; 82} 83 84/** 85 * This optimization detects if statements at the tops of loops where the 86 * condition is a phi node of two constants and moves half of the if to above 87 * the loop and the other half of the if to the end of the loop. A simple for 88 * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end, 89 * ends up looking something like this: 90 * 91 * vec1 32 ssa_0 = load_const (0x00000000) 92 * vec1 32 ssa_1 = load_const (0xffffffff) 93 * loop { 94 * block block_1: 95 * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5 96 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1 97 * if ssa_3 { 98 * block block_2: 99 * vec1 32 ssa_4 = load_const (0x00000001) 100 * vec1 32 ssa_5 = iadd ssa_2, ssa_4 101 * } else { 102 * block block_3: 103 * } 104 * block block_4: 105 * vec1 32 ssa_6 = load_const (0x00000004) 106 * vec1 32 ssa_7 = ilt ssa_5, ssa_6 107 * if ssa_7 { 108 * block block_5: 109 * } else { 110 * block block_6: 111 * break 112 * } 113 * block block_7: 114 * } 115 * 116 * This turns it into something like this: 117 * 118 * // Stuff from block 1 119 * // Stuff from block 3 120 * loop { 121 * block block_1: 122 * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5 123 * vec1 32 ssa_6 = load_const (0x00000004) 124 * vec1 32 ssa_7 = ilt ssa_2, ssa_6 125 * if ssa_7 { 126 * block block_5: 127 * } else { 128 * block block_6: 129 * break 130 * } 131 * block block_7: 132 * // Stuff from block 1 133 * // Stuff from block 2 134 * vec1 32 ssa_4 = load_const (0x00000001) 135 * vec1 32 ssa_5 = iadd ssa_2, ssa_4 136 * } 137 */ 138static bool 139opt_peel_loop_initial_if(nir_loop *loop) 140{ 141 nir_block *header_block = nir_loop_first_block(loop); 142 nir_block *const prev_block = 143 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 144 145 /* It would be insane if this were not true */ 146 assert(_mesa_set_search(header_block->predecessors, prev_block)); 147 148 /* The loop must have exactly one continue block which could be a block 149 * ending in a continue instruction or the "natural" continue from the 150 * last block in the loop back to the top. 151 */ 152 if (header_block->predecessors->entries != 2) 153 return false; 154 155 nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node); 156 if (!if_node || if_node->type != nir_cf_node_if) 157 return false; 158 159 nir_if *nif = nir_cf_node_as_if(if_node); 160 if (!nif->condition.is_ssa) 161 return false; 162 163 nir_ssa_def *cond = nif->condition.ssa; 164 if (cond->parent_instr->type != nir_instr_type_phi) 165 return false; 166 167 nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr); 168 if (cond->parent_instr->block != header_block) 169 return false; 170 171 bool entry_val = false, continue_val = false; 172 if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi, 173 prev_block, 174 &entry_val, 175 &continue_val)) 176 return false; 177 178 /* If they both execute or both don't execute, this is a job for 179 * nir_dead_cf, not this pass. 180 */ 181 if ((entry_val && continue_val) || (!entry_val && !continue_val)) 182 return false; 183 184 struct exec_list *continue_list, *entry_list; 185 if (continue_val) { 186 continue_list = &nif->then_list; 187 entry_list = &nif->else_list; 188 } else { 189 continue_list = &nif->else_list; 190 entry_list = &nif->then_list; 191 } 192 193 /* We want to be moving the contents of entry_list to above the loop so it 194 * can't contain any break or continue instructions. 195 */ 196 foreach_list_typed(nir_cf_node, cf_node, node, entry_list) { 197 nir_foreach_block_in_cf_node(block, cf_node) { 198 nir_instr *last_instr = nir_block_last_instr(block); 199 if (last_instr && last_instr->type == nir_instr_type_jump) 200 return false; 201 } 202 } 203 204 /* We're about to re-arrange a bunch of blocks so make sure that we don't 205 * have deref uses which cross block boundaries. We don't want a deref 206 * accidentally ending up in a phi. 207 */ 208 nir_rematerialize_derefs_in_use_blocks_impl( 209 nir_cf_node_get_function(&loop->cf_node)); 210 211 /* Before we do anything, convert the loop to LCSSA. We're about to 212 * replace a bunch of SSA defs with registers and this will prevent any of 213 * it from leaking outside the loop. 214 */ 215 nir_convert_loop_to_lcssa(loop); 216 217 nir_block *after_if_block = 218 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); 219 220 /* Get rid of phis in the header block since we will be duplicating it */ 221 nir_lower_phis_to_regs_block(header_block); 222 /* Get rid of phis after the if since dominance will change */ 223 nir_lower_phis_to_regs_block(after_if_block); 224 225 /* Get rid of SSA defs in the pieces we're about to move around */ 226 nir_lower_ssa_defs_to_regs_block(header_block); 227 nir_foreach_block_in_cf_node(block, &nif->cf_node) 228 nir_lower_ssa_defs_to_regs_block(block); 229 230 nir_cf_list header, tmp; 231 nir_cf_extract(&header, nir_before_block(header_block), 232 nir_after_block(header_block)); 233 234 nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL); 235 nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node)); 236 nir_cf_extract(&tmp, nir_before_cf_list(entry_list), 237 nir_after_cf_list(entry_list)); 238 nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node)); 239 240 nir_cf_reinsert(&header, 241 nir_after_block_before_jump(find_continue_block(loop))); 242 243 bool continue_list_jumps = 244 nir_block_ends_in_jump(exec_node_data(nir_block, 245 exec_list_get_tail(continue_list), 246 cf_node.node)); 247 248 nir_cf_extract(&tmp, nir_before_cf_list(continue_list), 249 nir_after_cf_list(continue_list)); 250 251 /* Get continue block again as the previous reinsert might have removed the 252 * block. Also, if both the continue list and the continue block ends in 253 * jump instructions, removes the jump from the latter, as it will not be 254 * executed if we insert the continue list before it. */ 255 256 nir_block *continue_block = find_continue_block(loop); 257 258 if (continue_list_jumps) { 259 nir_instr *last_instr = nir_block_last_instr(continue_block); 260 if (last_instr && last_instr->type == nir_instr_type_jump) 261 nir_instr_remove(last_instr); 262 } 263 264 nir_cf_reinsert(&tmp, 265 nir_after_block_before_jump(continue_block)); 266 267 nir_cf_node_remove(&nif->cf_node); 268 269 return true; 270} 271 272static bool 273alu_instr_is_comparison(const nir_alu_instr *alu) 274{ 275 switch (alu->op) { 276 case nir_op_flt32: 277 case nir_op_fge32: 278 case nir_op_feq32: 279 case nir_op_fneu32: 280 case nir_op_ilt32: 281 case nir_op_ult32: 282 case nir_op_ige32: 283 case nir_op_uge32: 284 case nir_op_ieq32: 285 case nir_op_ine32: 286 return true; 287 default: 288 return nir_alu_instr_is_comparison(alu); 289 } 290} 291 292static bool 293alu_instr_is_type_conversion(const nir_alu_instr *alu) 294{ 295 return nir_op_infos[alu->op].num_inputs == 1 && 296 nir_op_infos[alu->op].output_type != nir_op_infos[alu->op].input_types[0]; 297} 298 299static bool 300is_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src) 301{ 302 if (instr->type != nir_instr_type_alu) 303 return false; 304 305 nir_alu_instr *const bcsel = nir_instr_as_alu(instr); 306 if (!nir_op_is_selection(bcsel->op)) 307 return false; 308 309 for (unsigned i = 0; i < 3; i++) { 310 if (!nir_alu_src_is_trivial_ssa(bcsel, i) || 311 bcsel->src[i].src.ssa->parent_instr->block != instr->block) 312 return false; 313 314 if (bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi) { 315 /* opt_split_alu_of_phi() is able to peel that src from the loop */ 316 if (i == 0 || !allow_non_phi_src) 317 return false; 318 allow_non_phi_src = false; 319 } 320 } 321 322 nir_foreach_phi_src(src, nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr)) { 323 if (!nir_src_is_const(src->src)) 324 return false; 325 } 326 327 return true; 328} 329 330/** 331 * Splits ALU instructions that have a source that is a phi node 332 * 333 * ALU instructions in the header block of a loop that meet the following 334 * criteria can be split. 335 * 336 * - The loop has no continue instructions other than the "natural" continue 337 * at the bottom of the loop. 338 * 339 * - At least one source of the instruction is a phi node from the header block. 340 * 341 * - Any non-phi sources of the ALU instruction come from a block that 342 * dominates the block before the loop. The most common failure mode for 343 * this check is sources that are generated in the loop header block. 344 * 345 * - The phi node selects a constant or undef from the block before the loop or 346 * the only ALU user is a trivial bcsel that gets removed by peeling the ALU 347 * 348 * The split process splits the original ALU instruction into two, one at the 349 * bottom of the loop and one at the block before the loop. The instruction 350 * before the loop computes the value on the first iteration, and the 351 * instruction at the bottom computes the value on the second, third, and so 352 * on. A new phi node is added to the header block that selects either the 353 * instruction before the loop or the one at the end, and uses of the original 354 * instruction are replaced by this phi. 355 * 356 * The splitting transforms a loop like: 357 * 358 * vec1 32 ssa_8 = load_const (0x00000001) 359 * vec1 32 ssa_10 = load_const (0x00000000) 360 * // succs: block_1 361 * loop { 362 * block block_1: 363 * // preds: block_0 block_4 364 * vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15 365 * vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15 366 * vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16 367 * vec1 32 ssa_14 = iadd ssa_11, ssa_8 368 * vec1 32 ssa_15 = b32csel ssa_13, ssa_14, ssa_12 369 * ... 370 * // succs: block_1 371 * } 372 * 373 * into: 374 * 375 * vec1 32 ssa_8 = load_const (0x00000001) 376 * vec1 32 ssa_10 = load_const (0x00000000) 377 * vec1 32 ssa_22 = iadd ssa_10, ssa_8 378 * // succs: block_1 379 * loop { 380 * block block_1: 381 * // preds: block_0 block_4 382 * vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15 383 * vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15 384 * vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16 385 * vec1 32 ssa_21 = phi block_0: ssa_22, block_4: ssa_20 386 * vec1 32 ssa_15 = b32csel ssa_13, ssa_21, ssa_12 387 * ... 388 * vec1 32 ssa_20 = iadd ssa_15, ssa_8 389 * // succs: block_1 390 * } 391 */ 392static bool 393opt_split_alu_of_phi(nir_builder *b, nir_loop *loop) 394{ 395 bool progress = false; 396 nir_block *header_block = nir_loop_first_block(loop); 397 nir_block *const prev_block = 398 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 399 400 /* It would be insane if this were not true */ 401 assert(_mesa_set_search(header_block->predecessors, prev_block)); 402 403 /* The loop must have exactly one continue block which could be a block 404 * ending in a continue instruction or the "natural" continue from the 405 * last block in the loop back to the top. 406 */ 407 if (header_block->predecessors->entries != 2) 408 return false; 409 410 nir_block *continue_block = find_continue_block(loop); 411 if (continue_block == header_block) 412 return false; 413 414 nir_foreach_instr_safe(instr, header_block) { 415 if (instr->type != nir_instr_type_alu) 416 continue; 417 418 nir_alu_instr *const alu = nir_instr_as_alu(instr); 419 420 /* nir_op_vec{2,3,4} and nir_op_mov are excluded because they can easily 421 * lead to infinite optimization loops. Splitting comparisons can lead 422 * to loop unrolling not recognizing loop termintators, and type 423 * conversions also lead to regressions. 424 */ 425 if (nir_op_is_vec(alu->op) || 426 alu_instr_is_comparison(alu) || 427 alu_instr_is_type_conversion(alu)) 428 continue; 429 430 bool has_phi_src_from_prev_block = false; 431 bool all_non_phi_exist_in_prev_block = true; 432 bool is_prev_result_undef = true; 433 bool is_prev_result_const = true; 434 nir_ssa_def *prev_srcs[8]; // FINISHME: Array size? 435 nir_ssa_def *continue_srcs[8]; // FINISHME: Array size? 436 437 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 438 nir_instr *const src_instr = alu->src[i].src.ssa->parent_instr; 439 440 /* If the source is a phi in the loop header block, then the 441 * prev_srcs and continue_srcs will come from the different sources 442 * of the phi. 443 */ 444 if (src_instr->type == nir_instr_type_phi && 445 src_instr->block == header_block) { 446 nir_phi_instr *const phi = nir_instr_as_phi(src_instr); 447 448 /* Only strictly need to NULL out the pointers when the assertions 449 * (below) are compiled in. Debugging a NULL pointer deref in the 450 * wild is easier than debugging a random pointer deref, so set 451 * NULL unconditionally just to be safe. 452 */ 453 prev_srcs[i] = NULL; 454 continue_srcs[i] = NULL; 455 456 nir_foreach_phi_src(src_of_phi, phi) { 457 if (src_of_phi->pred == prev_block) { 458 if (src_of_phi->src.ssa->parent_instr->type != 459 nir_instr_type_ssa_undef) { 460 is_prev_result_undef = false; 461 } 462 463 if (src_of_phi->src.ssa->parent_instr->type != 464 nir_instr_type_load_const) { 465 is_prev_result_const = false; 466 } 467 468 prev_srcs[i] = src_of_phi->src.ssa; 469 has_phi_src_from_prev_block = true; 470 } else 471 continue_srcs[i] = src_of_phi->src.ssa; 472 } 473 474 assert(prev_srcs[i] != NULL); 475 assert(continue_srcs[i] != NULL); 476 } else { 477 /* If the source is not a phi (or a phi in a block other than the 478 * loop header), then the value must exist in prev_block. 479 */ 480 if (!nir_block_dominates(src_instr->block, prev_block)) { 481 all_non_phi_exist_in_prev_block = false; 482 break; 483 } 484 485 prev_srcs[i] = alu->src[i].src.ssa; 486 continue_srcs[i] = alu->src[i].src.ssa; 487 } 488 } 489 490 if (!has_phi_src_from_prev_block || !all_non_phi_exist_in_prev_block) 491 continue; 492 493 if (!is_prev_result_undef && !is_prev_result_const) { 494 /* check if the only user is a trivial bcsel */ 495 if (!list_is_empty(&alu->dest.dest.ssa.if_uses) || 496 !list_is_singular(&alu->dest.dest.ssa.uses)) 497 continue; 498 499 nir_src *use = list_first_entry(&alu->dest.dest.ssa.uses, nir_src, use_link); 500 if (!is_trivial_bcsel(use->parent_instr, true)) 501 continue; 502 } 503 504 /* Split ALU of Phi */ 505 b->cursor = nir_after_block(prev_block); 506 nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs); 507 508 /* Make a copy of the original ALU instruction. Replace the sources 509 * of the new instruction that read a phi with an undef source from 510 * prev_block with the non-undef source of that phi. 511 * 512 * Insert the new instruction at the end of the continue block. 513 */ 514 b->cursor = nir_after_block_before_jump(continue_block); 515 516 nir_ssa_def *const alu_copy = 517 clone_alu_and_replace_src_defs(b, alu, continue_srcs); 518 519 /* Make a new phi node that selects a value from prev_block and the 520 * result of the new instruction from continue_block. 521 */ 522 nir_phi_instr *const phi = nir_phi_instr_create(b->shader); 523 nir_phi_instr_add_src(phi, prev_block, nir_src_for_ssa(prev_value)); 524 nir_phi_instr_add_src(phi, continue_block, nir_src_for_ssa(alu_copy)); 525 526 nir_ssa_dest_init(&phi->instr, &phi->dest, 527 alu_copy->num_components, alu_copy->bit_size, NULL); 528 529 b->cursor = nir_after_phis(header_block); 530 nir_builder_instr_insert(b, &phi->instr); 531 532 /* Modify all readers of the original ALU instruction to read the 533 * result of the phi. 534 */ 535 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, 536 &phi->dest.ssa); 537 538 /* Since the original ALU instruction no longer has any readers, just 539 * remove it. 540 */ 541 nir_instr_remove_v(&alu->instr); 542 nir_instr_free(&alu->instr); 543 544 progress = true; 545 } 546 547 return progress; 548} 549 550/** 551 * Simplify a bcsel whose sources are all phi nodes from the loop header block 552 * 553 * bcsel instructions in a loop that meet the following criteria can be 554 * converted to phi nodes: 555 * 556 * - The loop has no continue instructions other than the "natural" continue 557 * at the bottom of the loop. 558 * 559 * - All of the sources of the bcsel are phi nodes in the header block of the 560 * loop. 561 * 562 * - The phi node representing the condition of the bcsel instruction chooses 563 * only constant values. 564 * 565 * The contant value from the condition will select one of the other sources 566 * when entered from outside the loop and the remaining source when entered 567 * from the continue block. Since each of these sources is also a phi node in 568 * the header block, the value of the phi node can be "evaluated." These 569 * evaluated phi nodes provide the sources for a new phi node. All users of 570 * the bcsel result are updated to use the phi node result. 571 * 572 * The replacement transforms loops like: 573 * 574 * vec1 32 ssa_7 = undefined 575 * vec1 32 ssa_8 = load_const (0x00000001) 576 * vec1 32 ssa_9 = load_const (0x000000c8) 577 * vec1 32 ssa_10 = load_const (0x00000000) 578 * // succs: block_1 579 * loop { 580 * block block_1: 581 * // preds: block_0 block_4 582 * vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14 583 * vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15 584 * vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25 585 * vec1 32 ssa_14 = b32csel ssa_12, ssa_13, ssa_11 586 * vec1 32 ssa_16 = ige32 ssa_14, ssa_9 587 * ... 588 * vec1 32 ssa_15 = load_const (0xffffffff) 589 * ... 590 * vec1 32 ssa_25 = iadd ssa_14, ssa_8 591 * // succs: block_1 592 * } 593 * 594 * into: 595 * 596 * vec1 32 ssa_7 = undefined 597 * vec1 32 ssa_8 = load_const (0x00000001) 598 * vec1 32 ssa_9 = load_const (0x000000c8) 599 * vec1 32 ssa_10 = load_const (0x00000000) 600 * // succs: block_1 601 * loop { 602 * block block_1: 603 * // preds: block_0 block_4 604 * vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14 605 * vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15 606 * vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25 607 * vec1 32 sss_26 = phi block_0: ssa_1, block_4: ssa_25 608 * vec1 32 ssa_16 = ige32 ssa_26, ssa_9 609 * ... 610 * vec1 32 ssa_15 = load_const (0xffffffff) 611 * ... 612 * vec1 32 ssa_25 = iadd ssa_26, ssa_8 613 * // succs: block_1 614 * } 615 * 616 * \note 617 * It may be possible modify this function to not require a phi node as the 618 * source of the bcsel that is selected when entering from outside the loop. 619 * The only restriction is that the source must be geneated outside the loop 620 * (since it will become the source of a phi node in the header block of the 621 * loop). 622 */ 623static bool 624opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop) 625{ 626 bool progress = false; 627 nir_block *header_block = nir_loop_first_block(loop); 628 nir_block *const prev_block = 629 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 630 631 /* It would be insane if this were not true */ 632 assert(_mesa_set_search(header_block->predecessors, prev_block)); 633 634 /* The loop must have exactly one continue block which could be a block 635 * ending in a continue instruction or the "natural" continue from the 636 * last block in the loop back to the top. 637 */ 638 if (header_block->predecessors->entries != 2) 639 return false; 640 641 /* We can move any bcsel that can guaranteed to execut on every iteration 642 * of a loop. For now this is accomplished by only taking bcsels from the 643 * header_block. In the future, this could be expanced to include any 644 * bcsel that must come before any break. 645 * 646 * For more details, see 647 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305 648 */ 649 nir_foreach_instr_safe(instr, header_block) { 650 if (!is_trivial_bcsel(instr, false)) 651 continue; 652 653 nir_alu_instr *const bcsel = nir_instr_as_alu(instr); 654 nir_phi_instr *const cond_phi = 655 nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr); 656 657 bool entry_val = false, continue_val = false; 658 if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi, 659 prev_block, 660 &entry_val, 661 &continue_val)) 662 continue; 663 664 /* If they both execute or both don't execute, this is a job for 665 * nir_dead_cf, not this pass. 666 */ 667 if ((entry_val && continue_val) || (!entry_val && !continue_val)) 668 continue; 669 670 const unsigned entry_src = entry_val ? 1 : 2; 671 const unsigned continue_src = entry_val ? 2 : 1; 672 673 /* Create a new phi node that selects the value for prev_block from 674 * the bcsel source that is selected by entry_val and the value for 675 * continue_block from the other bcsel source. Both sources have 676 * already been verified to be phi nodes. 677 */ 678 nir_block *continue_block = find_continue_block(loop); 679 nir_phi_instr *const phi = nir_phi_instr_create(b->shader); 680 nir_phi_instr_add_src(phi, prev_block, 681 nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr), 682 prev_block)->src); 683 684 nir_phi_instr_add_src(phi, continue_block, 685 nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[continue_src].src.ssa->parent_instr), 686 continue_block)->src); 687 688 nir_ssa_dest_init(&phi->instr, 689 &phi->dest, 690 nir_dest_num_components(bcsel->dest.dest), 691 nir_dest_bit_size(bcsel->dest.dest), 692 NULL); 693 694 b->cursor = nir_after_phis(header_block); 695 nir_builder_instr_insert(b, &phi->instr); 696 697 /* Modify all readers of the bcsel instruction to read the result of 698 * the phi. 699 */ 700 nir_ssa_def_rewrite_uses(&bcsel->dest.dest.ssa, 701 &phi->dest.ssa); 702 703 /* Since the original bcsel instruction no longer has any readers, 704 * just remove it. 705 */ 706 nir_instr_remove_v(&bcsel->instr); 707 nir_instr_free(&bcsel->instr); 708 709 progress = true; 710 } 711 712 return progress; 713} 714 715static bool 716is_block_empty(nir_block *block) 717{ 718 return nir_cf_node_is_last(&block->cf_node) && 719 exec_list_is_empty(&block->instr_list); 720} 721 722static bool 723nir_block_ends_in_continue(nir_block *block) 724{ 725 if (exec_list_is_empty(&block->instr_list)) 726 return false; 727 728 nir_instr *instr = nir_block_last_instr(block); 729 return instr->type == nir_instr_type_jump && 730 nir_instr_as_jump(instr)->type == nir_jump_continue; 731} 732 733/** 734 * This optimization turns: 735 * 736 * loop { 737 * ... 738 * if (cond) { 739 * do_work_1(); 740 * continue; 741 * } else { 742 * } 743 * do_work_2(); 744 * } 745 * 746 * into: 747 * 748 * loop { 749 * ... 750 * if (cond) { 751 * do_work_1(); 752 * continue; 753 * } else { 754 * do_work_2(); 755 * } 756 * } 757 * 758 * The continue should then be removed by nir_opt_trivial_continues() and the 759 * loop can potentially be unrolled. 760 * 761 * Note: Unless the function param aggressive_last_continue==true do_work_2() 762 * is only ever blocks and nested loops. We avoid nesting other if-statments 763 * in the branch as this can result in increased register pressure, and in 764 * the i965 driver it causes a large amount of spilling in shader-db. 765 * For RADV however nesting these if-statements allows further continues to be 766 * remove and provides a significant FPS boost in Doom, which is why we have 767 * opted for this special bool to enable more aggresive optimisations. 768 * TODO: The GCM pass solves most of the spilling regressions in i965, if it 769 * is ever enabled we should consider removing the aggressive_last_continue 770 * param. 771 */ 772static bool 773opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue) 774{ 775 nir_if *nif = NULL; 776 bool then_ends_in_continue = false; 777 bool else_ends_in_continue = false; 778 779 /* Scan the control flow of the loop from the last to the first node 780 * looking for an if-statement we can optimise. 781 */ 782 nir_block *last_block = nir_loop_last_block(loop); 783 nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node); 784 while (if_node) { 785 if (if_node->type == nir_cf_node_if) { 786 nif = nir_cf_node_as_if(if_node); 787 nir_block *then_block = nir_if_last_then_block(nif); 788 nir_block *else_block = nir_if_last_else_block(nif); 789 790 then_ends_in_continue = nir_block_ends_in_continue(then_block); 791 else_ends_in_continue = nir_block_ends_in_continue(else_block); 792 793 /* If both branches end in a jump do nothing, this should be handled 794 * by nir_opt_dead_cf(). 795 */ 796 if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) && 797 (else_ends_in_continue || nir_block_ends_in_break(else_block))) 798 return false; 799 800 /* If continue found stop scanning and attempt optimisation, or 801 */ 802 if (then_ends_in_continue || else_ends_in_continue || 803 !aggressive_last_continue) 804 break; 805 } 806 807 if_node = nir_cf_node_prev(if_node); 808 } 809 810 /* If we didn't find an if to optimise return */ 811 if (!nif || (!then_ends_in_continue && !else_ends_in_continue)) 812 return false; 813 814 /* If there is nothing after the if-statement we bail */ 815 if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) && 816 exec_list_is_empty(&last_block->instr_list)) 817 return false; 818 819 /* If there are single-source phis in the last block, 820 * get rid of them first 821 */ 822 nir_opt_remove_phis_block(last_block); 823 824 /* Move the last block of the loop inside the last if-statement */ 825 nir_cf_list tmp; 826 nir_cf_extract(&tmp, nir_after_cf_node(if_node), 827 nir_after_block(last_block)); 828 if (then_ends_in_continue) 829 nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->else_list)); 830 else 831 nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->then_list)); 832 833 /* In order to avoid running nir_lower_regs_to_ssa_impl() every time an if 834 * opt makes progress we leave nir_opt_trivial_continues() to remove the 835 * continue now that the end of the loop has been simplified. 836 */ 837 838 return true; 839} 840 841/* Walk all the phis in the block immediately following the if statement and 842 * swap the blocks. 843 */ 844static void 845rewrite_phi_predecessor_blocks(nir_if *nif, 846 nir_block *old_then_block, 847 nir_block *old_else_block, 848 nir_block *new_then_block, 849 nir_block *new_else_block) 850{ 851 nir_block *after_if_block = 852 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); 853 854 nir_foreach_instr(instr, after_if_block) { 855 if (instr->type != nir_instr_type_phi) 856 continue; 857 858 nir_phi_instr *phi = nir_instr_as_phi(instr); 859 860 nir_foreach_phi_src(src, phi) { 861 if (src->pred == old_then_block) { 862 src->pred = new_then_block; 863 } else if (src->pred == old_else_block) { 864 src->pred = new_else_block; 865 } 866 } 867 } 868} 869 870/** 871 * This optimization turns: 872 * 873 * if (cond) { 874 * } else { 875 * do_work(); 876 * } 877 * 878 * into: 879 * 880 * if (!cond) { 881 * do_work(); 882 * } else { 883 * } 884 */ 885static bool 886opt_if_simplification(nir_builder *b, nir_if *nif) 887{ 888 /* Only simplify if the then block is empty and the else block is not. */ 889 if (!is_block_empty(nir_if_first_then_block(nif)) || 890 is_block_empty(nir_if_first_else_block(nif))) 891 return false; 892 893 /* Make sure the condition is a comparison operation. */ 894 nir_instr *src_instr = nif->condition.ssa->parent_instr; 895 if (src_instr->type != nir_instr_type_alu) 896 return false; 897 898 nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr); 899 if (!nir_alu_instr_is_comparison(alu_instr)) 900 return false; 901 902 /* Insert the inverted instruction and rewrite the condition. */ 903 b->cursor = nir_after_instr(&alu_instr->instr); 904 905 nir_ssa_def *new_condition = 906 nir_inot(b, &alu_instr->dest.dest.ssa); 907 908 nir_if_rewrite_condition(nif, nir_src_for_ssa(new_condition)); 909 910 /* Grab pointers to the last then/else blocks for fixing up the phis. */ 911 nir_block *then_block = nir_if_last_then_block(nif); 912 nir_block *else_block = nir_if_last_else_block(nif); 913 914 if (nir_block_ends_in_jump(else_block)) { 915 /* Even though this if statement has a jump on one side, we may still have 916 * phis afterwards. Single-source phis can be produced by loop unrolling 917 * or dead control-flow passes and are perfectly legal. Run a quick phi 918 * removal on the block after the if to clean up any such phis. 919 */ 920 nir_block *const next_block = 921 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); 922 nir_opt_remove_phis_block(next_block); 923 } 924 925 rewrite_phi_predecessor_blocks(nif, then_block, else_block, else_block, 926 then_block); 927 928 /* Finally, move the else block to the then block. */ 929 nir_cf_list tmp; 930 nir_cf_extract(&tmp, nir_before_cf_list(&nif->else_list), 931 nir_after_cf_list(&nif->else_list)); 932 nir_cf_reinsert(&tmp, nir_before_cf_list(&nif->then_list)); 933 934 return true; 935} 936 937/* Find phi statements after an if that choose between true and false, and 938 * replace them with the if statement's condition (or an inot of it). 939 */ 940static bool 941opt_if_phi_is_condition(nir_builder *b, nir_if *nif) 942{ 943 /* Grab pointers to the last then/else blocks for looking in the phis. */ 944 nir_block *then_block = nir_if_last_then_block(nif); 945 nir_block *else_block = nir_if_last_else_block(nif); 946 nir_ssa_def *cond = nif->condition.ssa; 947 bool progress = false; 948 949 nir_block *after_if_block = nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); 950 nir_foreach_instr_safe(instr, after_if_block) { 951 if (instr->type != nir_instr_type_phi) 952 break; 953 954 nir_phi_instr *phi = nir_instr_as_phi(instr); 955 if (phi->dest.ssa.bit_size != cond->bit_size || 956 phi->dest.ssa.num_components != 1) 957 continue; 958 959 enum opt_bool { 960 T, F, UNKNOWN 961 } then_val = UNKNOWN, else_val = UNKNOWN; 962 963 nir_foreach_phi_src(src, phi) { 964 assert(src->pred == then_block || src->pred == else_block); 965 enum opt_bool *pred_val = src->pred == then_block ? &then_val : &else_val; 966 967 nir_ssa_scalar val = nir_ssa_scalar_resolved(src->src.ssa, 0); 968 if (!nir_ssa_scalar_is_const(val)) 969 break; 970 971 if (nir_ssa_scalar_as_int(val) == -1) 972 *pred_val = T; 973 else if (nir_ssa_scalar_as_uint(val) == 0) 974 *pred_val = F; 975 else 976 break; 977 } 978 if (then_val == T && else_val == F) { 979 nir_ssa_def_rewrite_uses(&phi->dest.ssa, cond); 980 progress = true; 981 } else if (then_val == F && else_val == T) { 982 b->cursor = nir_before_cf_node(&nif->cf_node); 983 nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_inot(b, cond)); 984 progress = true; 985 } 986 } 987 988 return progress; 989} 990 991/** 992 * This optimization tries to merge two break statements into a single break. 993 * For this purpose, it checks if both branch legs end in a break or 994 * if one branch leg ends in a break, and the other one does so after the 995 * branch. 996 * 997 * This optimization turns 998 * 999 * loop { 1000 * ... 1001 * if (cond) { 1002 * do_work_1(); 1003 * break; 1004 * } else { 1005 * do_work_2(); 1006 * break; 1007 * } 1008 * } 1009 * 1010 * into: 1011 * 1012 * loop { 1013 * ... 1014 * if (cond) { 1015 * do_work_1(); 1016 * } else { 1017 * do_work_2(); 1018 * } 1019 * break; 1020 * } 1021 * 1022 * but also situations like 1023 * 1024 * loop { 1025 * ... 1026 * if (cond1) { 1027 * if (cond2) { 1028 * do_work_1(); 1029 * break; 1030 * } else { 1031 * do_work_2(); 1032 * } 1033 * do_work_3(); 1034 * break; 1035 * } else { 1036 * ... 1037 * } 1038 * } 1039 * 1040 * into: 1041 * 1042 * loop { 1043 * ... 1044 * if (cond1) { 1045 * if (cond2) { 1046 * do_work_1(); 1047 * } else { 1048 * do_work_2(); 1049 * do_work_3(); 1050 * } 1051 * break; 1052 * } else { 1053 * ... 1054 * } 1055 * } 1056 */ 1057static bool 1058opt_merge_breaks(nir_if *nif) 1059{ 1060 nir_block *last_then = nir_if_last_then_block(nif); 1061 nir_block *last_else = nir_if_last_else_block(nif); 1062 bool then_break = nir_block_ends_in_break(last_then); 1063 bool else_break = nir_block_ends_in_break(last_else); 1064 1065 /* If both branch legs end in a break, merge the break after the branch */ 1066 if (then_break && else_break) { 1067 nir_block *after_if = nir_cf_node_cf_tree_next(&nif->cf_node); 1068 /* Make sure that the successor is empty. 1069 * If not we let nir_opt_dead_cf() clean it up first. 1070 */ 1071 if (!is_block_empty(after_if)) 1072 return false; 1073 1074 nir_lower_phis_to_regs_block(last_then->successors[0]); 1075 nir_instr_remove_v(nir_block_last_instr(last_then)); 1076 nir_instr *jump = nir_block_last_instr(last_else); 1077 nir_instr_remove_v(jump); 1078 nir_instr_insert(nir_after_block(after_if), jump); 1079 return true; 1080 } 1081 1082 /* Single break: If there's a break after the branch and the non-breaking 1083 * side of the if falls through to it, then hoist that code after up into 1084 * the if and leave just a single break there. 1085 */ 1086 if (then_break || else_break) { 1087 1088 /* At least one branch leg must fall-through */ 1089 if (nir_block_ends_in_jump(last_then) && nir_block_ends_in_jump(last_else)) 1090 return false; 1091 1092 /* Check if there is a single break after the IF */ 1093 nir_cf_node *first = nir_cf_node_next(&nif->cf_node); 1094 nir_cf_node *last = first; 1095 while (!nir_cf_node_is_last(last)) { 1096 if (contains_other_jump (last, NULL)) 1097 return false; 1098 last = nir_cf_node_next(last); 1099 } 1100 1101 assert(last->type == nir_cf_node_block); 1102 if (!nir_block_ends_in_break(nir_cf_node_as_block(last))) 1103 return false; 1104 1105 /* Hoist the code from after the IF into the falling-through branch leg */ 1106 nir_opt_remove_phis_block(nir_cf_node_as_block(first)); 1107 nir_block *break_block = then_break ? last_then : last_else; 1108 nir_lower_phis_to_regs_block(break_block->successors[0]); 1109 1110 nir_cf_list tmp; 1111 nir_cf_extract(&tmp, nir_before_cf_node(first), 1112 nir_after_block_before_jump(nir_cf_node_as_block(last))); 1113 if (then_break) 1114 nir_cf_reinsert(&tmp, nir_after_block(last_else)); 1115 else 1116 nir_cf_reinsert(&tmp, nir_after_block(last_then)); 1117 1118 nir_instr_remove_v(nir_block_last_instr(break_block)); 1119 return true; 1120 } 1121 1122 return false; 1123} 1124 1125/** 1126 * This optimization simplifies potential loop terminators which then allows 1127 * other passes such as opt_if_simplification() and loop unrolling to progress 1128 * further: 1129 * 1130 * if (cond) { 1131 * ... then block instructions ... 1132 * } else { 1133 * ... 1134 * break; 1135 * } 1136 * 1137 * into: 1138 * 1139 * if (cond) { 1140 * } else { 1141 * ... 1142 * break; 1143 * } 1144 * ... then block instructions ... 1145 */ 1146static bool 1147opt_if_loop_terminator(nir_if *nif) 1148{ 1149 nir_block *break_blk = NULL; 1150 nir_block *continue_from_blk = NULL; 1151 bool continue_from_then = true; 1152 1153 nir_block *last_then = nir_if_last_then_block(nif); 1154 nir_block *last_else = nir_if_last_else_block(nif); 1155 1156 if (nir_block_ends_in_break(last_then)) { 1157 break_blk = last_then; 1158 continue_from_blk = last_else; 1159 continue_from_then = false; 1160 } else if (nir_block_ends_in_break(last_else)) { 1161 break_blk = last_else; 1162 continue_from_blk = last_then; 1163 } 1164 1165 /* Continue if the if-statement contained no jumps at all */ 1166 if (!break_blk) 1167 return false; 1168 1169 /* If the continue from block is empty then return as there is nothing to 1170 * move. 1171 */ 1172 nir_block *first_continue_from_blk = continue_from_then ? 1173 nir_if_first_then_block(nif) : 1174 nir_if_first_else_block(nif); 1175 if (is_block_empty(first_continue_from_blk)) 1176 return false; 1177 1178 if (nir_block_ends_in_jump(continue_from_blk)) 1179 return false; 1180 1181 /* Even though this if statement has a jump on one side, we may still have 1182 * phis afterwards. Single-source phis can be produced by loop unrolling 1183 * or dead control-flow passes and are perfectly legal. Run a quick phi 1184 * removal on the block after the if to clean up any such phis. 1185 */ 1186 nir_opt_remove_phis_block(nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node))); 1187 1188 /* Finally, move the continue from branch after the if-statement. */ 1189 nir_cf_list tmp; 1190 nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk), 1191 nir_after_block(continue_from_blk)); 1192 nir_cf_reinsert(&tmp, nir_after_cf_node(&nif->cf_node)); 1193 1194 return true; 1195} 1196 1197static bool 1198evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value) 1199{ 1200 nir_block *use_block = nir_cursor_current_block(cursor); 1201 if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) { 1202 *value = true; 1203 return true; 1204 } else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) { 1205 *value = false; 1206 return true; 1207 } else { 1208 return false; 1209 } 1210} 1211 1212static nir_ssa_def * 1213clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, 1214 nir_ssa_def **src_defs) 1215{ 1216 nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op); 1217 nalu->exact = alu->exact; 1218 1219 nir_ssa_dest_init(&nalu->instr, &nalu->dest.dest, 1220 alu->dest.dest.ssa.num_components, 1221 alu->dest.dest.ssa.bit_size, NULL); 1222 1223 nalu->dest.saturate = alu->dest.saturate; 1224 nalu->dest.write_mask = alu->dest.write_mask; 1225 1226 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 1227 assert(alu->src[i].src.is_ssa); 1228 nalu->src[i].src = nir_src_for_ssa(src_defs[i]); 1229 nalu->src[i].negate = alu->src[i].negate; 1230 nalu->src[i].abs = alu->src[i].abs; 1231 memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, 1232 sizeof(nalu->src[i].swizzle)); 1233 } 1234 1235 nir_builder_instr_insert(b, &nalu->instr); 1236 1237 return &nalu->dest.dest.ssa;; 1238} 1239 1240/* 1241 * This propagates if condition evaluation down the chain of some alu 1242 * instructions. For example by checking the use of some of the following alu 1243 * instruction we can eventually replace ssa_107 with NIR_TRUE. 1244 * 1245 * loop { 1246 * block block_1: 1247 * vec1 32 ssa_85 = load_const (0x00000002) 1248 * vec1 32 ssa_86 = ieq ssa_48, ssa_85 1249 * vec1 32 ssa_87 = load_const (0x00000001) 1250 * vec1 32 ssa_88 = ieq ssa_48, ssa_87 1251 * vec1 32 ssa_89 = ior ssa_86, ssa_88 1252 * vec1 32 ssa_90 = ieq ssa_48, ssa_0 1253 * vec1 32 ssa_91 = ior ssa_89, ssa_90 1254 * if ssa_86 { 1255 * block block_2: 1256 * ... 1257 * break 1258 * } else { 1259 * block block_3: 1260 * } 1261 * block block_4: 1262 * if ssa_88 { 1263 * block block_5: 1264 * ... 1265 * break 1266 * } else { 1267 * block block_6: 1268 * } 1269 * block block_7: 1270 * if ssa_90 { 1271 * block block_8: 1272 * ... 1273 * break 1274 * } else { 1275 * block block_9: 1276 * } 1277 * block block_10: 1278 * vec1 32 ssa_107 = inot ssa_91 1279 * if ssa_107 { 1280 * block block_11: 1281 * break 1282 * } else { 1283 * block block_12: 1284 * } 1285 * } 1286 */ 1287static bool 1288propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, 1289 nir_src *alu_use, nir_alu_instr *alu, 1290 bool is_if_condition) 1291{ 1292 bool bool_value; 1293 b->cursor = nir_before_src(alu_use, is_if_condition); 1294 if (!evaluate_if_condition(nif, b->cursor, &bool_value)) 1295 return false; 1296 1297 nir_ssa_def *def[NIR_MAX_VEC_COMPONENTS] = {0}; 1298 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 1299 if (alu->src[i].src.ssa == use_src->ssa) { 1300 def[i] = nir_imm_bool(b, bool_value); 1301 } else { 1302 def[i] = alu->src[i].src.ssa; 1303 } 1304 } 1305 1306 nir_ssa_def *nalu = clone_alu_and_replace_src_defs(b, alu, def); 1307 1308 /* Rewrite use to use new alu instruction */ 1309 nir_src new_src = nir_src_for_ssa(nalu); 1310 1311 if (is_if_condition) 1312 nir_if_rewrite_condition(alu_use->parent_if, new_src); 1313 else 1314 nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src); 1315 1316 return true; 1317} 1318 1319static bool 1320can_propagate_through_alu(nir_src *src) 1321{ 1322 if (src->parent_instr->type != nir_instr_type_alu) 1323 return false; 1324 1325 nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr); 1326 switch (alu->op) { 1327 case nir_op_ior: 1328 case nir_op_iand: 1329 case nir_op_inot: 1330 case nir_op_b2i32: 1331 return true; 1332 case nir_op_bcsel: 1333 return src == &alu->src[0].src; 1334 default: 1335 return false; 1336 } 1337} 1338 1339static bool 1340evaluate_condition_use(nir_builder *b, nir_if *nif, nir_src *use_src, 1341 bool is_if_condition) 1342{ 1343 bool progress = false; 1344 1345 b->cursor = nir_before_src(use_src, is_if_condition); 1346 1347 bool bool_value; 1348 if (evaluate_if_condition(nif, b->cursor, &bool_value)) { 1349 /* Rewrite use to use const */ 1350 nir_src imm_src = nir_src_for_ssa(nir_imm_bool(b, bool_value)); 1351 if (is_if_condition) 1352 nir_if_rewrite_condition(use_src->parent_if, imm_src); 1353 else 1354 nir_instr_rewrite_src(use_src->parent_instr, use_src, imm_src); 1355 1356 progress = true; 1357 } 1358 1359 if (!is_if_condition && can_propagate_through_alu(use_src)) { 1360 nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr); 1361 1362 nir_foreach_use_safe(alu_use, &alu->dest.dest.ssa) { 1363 progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu, 1364 false); 1365 } 1366 1367 nir_foreach_if_use_safe(alu_use, &alu->dest.dest.ssa) { 1368 progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu, 1369 true); 1370 } 1371 } 1372 1373 return progress; 1374} 1375 1376static bool 1377opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif) 1378{ 1379 bool progress = false; 1380 1381 /* Evaluate any uses of the if condition inside the if branches */ 1382 assert(nif->condition.is_ssa); 1383 nir_foreach_use_safe(use_src, nif->condition.ssa) { 1384 progress |= evaluate_condition_use(b, nif, use_src, false); 1385 } 1386 1387 nir_foreach_if_use_safe(use_src, nif->condition.ssa) { 1388 if (use_src->parent_if != nif) 1389 progress |= evaluate_condition_use(b, nif, use_src, true); 1390 } 1391 1392 return progress; 1393} 1394 1395static bool 1396rewrite_comp_uses_within_if(nir_builder *b, nir_if *nif, bool invert, 1397 nir_ssa_scalar scalar, nir_ssa_scalar new_scalar) 1398{ 1399 bool progress = false; 1400 1401 nir_block *first = invert ? nir_if_first_else_block(nif) : nir_if_first_then_block(nif); 1402 nir_block *last = invert ? nir_if_last_else_block(nif) : nir_if_last_then_block(nif); 1403 1404 nir_ssa_def *new_ssa = NULL; 1405 nir_foreach_use_safe(use, scalar.def) { 1406 if (use->parent_instr->block->index < first->index || 1407 use->parent_instr->block->index > last->index) 1408 continue; 1409 1410 /* Only rewrite users which use only the new component. This is to avoid a 1411 * situation where copy propagation will undo the rewrite and we risk an infinite 1412 * loop. 1413 * 1414 * We could rewrite users which use a mix of the old and new components, but if 1415 * nir_src_components_read() is incomplete, then we risk the new component actually being 1416 * unused and some optimization later undoing the rewrite. 1417 */ 1418 if (nir_src_components_read(use) != BITFIELD64_BIT(scalar.comp)) 1419 continue; 1420 1421 if (!new_ssa) { 1422 b->cursor = nir_before_cf_node(&nif->cf_node); 1423 new_ssa = nir_channel(b, new_scalar.def, new_scalar.comp); 1424 if (scalar.def->num_components > 1) { 1425 nir_ssa_def *vec = nir_ssa_undef(b, scalar.def->num_components, scalar.def->bit_size); 1426 new_ssa = nir_vector_insert_imm(b, vec, new_ssa, scalar.comp); 1427 } 1428 } 1429 1430 nir_instr_rewrite_src_ssa(use->parent_instr, use, new_ssa); 1431 progress = true; 1432 } 1433 1434 return progress; 1435} 1436 1437/* 1438 * This optimization turns: 1439 * 1440 * if (a == (b=readfirstlane(a))) 1441 * use(a) 1442 * if (c == (d=load_const)) 1443 * use(c) 1444 * 1445 * into: 1446 * 1447 * if (a == (b=readfirstlane(a))) 1448 * use(b) 1449 * if (c == (d=load_const)) 1450 * use(d) 1451*/ 1452static bool 1453opt_if_rewrite_uniform_uses(nir_builder *b, nir_if *nif, nir_ssa_scalar cond, bool accept_ine) 1454{ 1455 bool progress = false; 1456 1457 if (!nir_ssa_scalar_is_alu(cond)) 1458 return false; 1459 1460 nir_op op = nir_ssa_scalar_alu_op(cond); 1461 if (op == nir_op_iand) { 1462 progress |= opt_if_rewrite_uniform_uses(b, nif, nir_ssa_scalar_chase_alu_src(cond, 0), false); 1463 progress |= opt_if_rewrite_uniform_uses(b, nif, nir_ssa_scalar_chase_alu_src(cond, 1), false); 1464 return progress; 1465 } 1466 1467 if (op != nir_op_ieq && (op != nir_op_ine || !accept_ine)) 1468 return false; 1469 1470 for (unsigned i = 0; i < 2; i++) { 1471 nir_ssa_scalar src_uni = nir_ssa_scalar_chase_alu_src(cond, i); 1472 nir_ssa_scalar src_div = nir_ssa_scalar_chase_alu_src(cond, !i); 1473 1474 if (src_uni.def->parent_instr->type == nir_instr_type_load_const && src_div.def != src_uni.def) 1475 return rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, src_div, src_uni); 1476 1477 if (src_uni.def->parent_instr->type != nir_instr_type_intrinsic) 1478 continue; 1479 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src_uni.def->parent_instr); 1480 if (intrin->intrinsic != nir_intrinsic_read_first_invocation && 1481 (intrin->intrinsic != nir_intrinsic_reduce || nir_intrinsic_cluster_size(intrin))) 1482 continue; 1483 1484 nir_ssa_scalar intrin_src = {intrin->src[0].ssa, src_uni.comp}; 1485 nir_ssa_scalar resolved_intrin_src = nir_ssa_scalar_resolved(intrin_src.def, intrin_src.comp); 1486 1487 if (resolved_intrin_src.comp != src_div.comp || resolved_intrin_src.def != src_div.def) 1488 continue; 1489 1490 progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, resolved_intrin_src, src_uni); 1491 if (intrin_src.comp != resolved_intrin_src.comp || intrin_src.def != resolved_intrin_src.def) 1492 progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, intrin_src, src_uni); 1493 1494 return progress; 1495 } 1496 1497 return false; 1498} 1499 1500static void 1501simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then, 1502 bool src_if_then) 1503{ 1504 /* Now merge the if branch */ 1505 nir_block *dest_blk = dest_if_then ? nir_if_last_then_block(dest_if) 1506 : nir_if_last_else_block(dest_if); 1507 1508 struct exec_list *list = src_if_then ? &src_if->then_list 1509 : &src_if->else_list; 1510 1511 nir_cf_list if_cf_list; 1512 nir_cf_extract(&if_cf_list, nir_before_cf_list(list), 1513 nir_after_cf_list(list)); 1514 nir_cf_reinsert(&if_cf_list, nir_after_block(dest_blk)); 1515} 1516 1517static bool 1518opt_if_merge(nir_if *nif) 1519{ 1520 bool progress = false; 1521 1522 nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node); 1523 if (!next_blk || !nif->condition.is_ssa) 1524 return false; 1525 1526 nir_if *next_if = nir_block_get_following_if(next_blk); 1527 if (!next_if || !next_if->condition.is_ssa) 1528 return false; 1529 1530 /* Here we merge two consecutive ifs that have the same condition e.g: 1531 * 1532 * if ssa_12 { 1533 * ... 1534 * } else { 1535 * ... 1536 * } 1537 * if ssa_12 { 1538 * ... 1539 * } else { 1540 * ... 1541 * } 1542 * 1543 * Note: This only merges if-statements when the block between them is 1544 * empty. The reason we don't try to merge ifs that just have phis between 1545 * them is because this can result in increased register pressure. For 1546 * example when merging if ladders created by indirect indexing. 1547 */ 1548 if (nif->condition.ssa == next_if->condition.ssa && 1549 exec_list_is_empty(&next_blk->instr_list)) { 1550 1551 /* This optimization isn't made to work in this case and 1552 * opt_if_evaluate_condition_use will optimize it later. 1553 */ 1554 if (nir_block_ends_in_jump(nir_if_last_then_block(nif)) || 1555 nir_block_ends_in_jump(nir_if_last_else_block(nif))) 1556 return false; 1557 1558 simple_merge_if(nif, next_if, true, true); 1559 simple_merge_if(nif, next_if, false, false); 1560 1561 nir_block *new_then_block = nir_if_last_then_block(nif); 1562 nir_block *new_else_block = nir_if_last_else_block(nif); 1563 1564 nir_block *old_then_block = nir_if_last_then_block(next_if); 1565 nir_block *old_else_block = nir_if_last_else_block(next_if); 1566 1567 /* Rewrite the predecessor block for any phis following the second 1568 * if-statement. 1569 */ 1570 rewrite_phi_predecessor_blocks(next_if, old_then_block, 1571 old_else_block, 1572 new_then_block, 1573 new_else_block); 1574 1575 /* Move phis after merged if to avoid them being deleted when we remove 1576 * the merged if-statement. 1577 */ 1578 nir_block *after_next_if_block = 1579 nir_cf_node_as_block(nir_cf_node_next(&next_if->cf_node)); 1580 1581 nir_foreach_instr_safe(instr, after_next_if_block) { 1582 if (instr->type != nir_instr_type_phi) 1583 break; 1584 1585 exec_node_remove(&instr->node); 1586 exec_list_push_tail(&next_blk->instr_list, &instr->node); 1587 instr->block = next_blk; 1588 } 1589 1590 nir_cf_node_remove(&next_if->cf_node); 1591 1592 progress = true; 1593 } 1594 1595 return progress; 1596} 1597 1598static bool 1599opt_if_cf_list(nir_builder *b, struct exec_list *cf_list, 1600 nir_opt_if_options options) 1601{ 1602 bool progress = false; 1603 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { 1604 switch (cf_node->type) { 1605 case nir_cf_node_block: 1606 break; 1607 1608 case nir_cf_node_if: { 1609 nir_if *nif = nir_cf_node_as_if(cf_node); 1610 progress |= opt_if_cf_list(b, &nif->then_list, 1611 options); 1612 progress |= opt_if_cf_list(b, &nif->else_list, 1613 options); 1614 progress |= opt_if_loop_terminator(nif); 1615 progress |= opt_if_merge(nif); 1616 progress |= opt_if_simplification(b, nif); 1617 if (options & nir_opt_if_optimize_phi_true_false) 1618 progress |= opt_if_phi_is_condition(b, nif); 1619 break; 1620 } 1621 1622 case nir_cf_node_loop: { 1623 nir_loop *loop = nir_cf_node_as_loop(cf_node); 1624 progress |= opt_if_cf_list(b, &loop->body, 1625 options); 1626 progress |= opt_simplify_bcsel_of_phi(b, loop); 1627 progress |= opt_if_loop_last_continue(loop, 1628 options & nir_opt_if_aggressive_last_continue); 1629 break; 1630 } 1631 1632 case nir_cf_node_function: 1633 unreachable("Invalid cf type"); 1634 } 1635 } 1636 1637 return progress; 1638} 1639 1640/** 1641 * Optimizations which can create registers are done after other optimizations 1642 * which require SSA. 1643 */ 1644static bool 1645opt_if_regs_cf_list(struct exec_list *cf_list) 1646{ 1647 bool progress = false; 1648 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { 1649 switch (cf_node->type) { 1650 case nir_cf_node_block: 1651 break; 1652 1653 case nir_cf_node_if: { 1654 nir_if *nif = nir_cf_node_as_if(cf_node); 1655 progress |= opt_if_regs_cf_list(&nif->then_list); 1656 progress |= opt_if_regs_cf_list(&nif->else_list); 1657 if (opt_merge_breaks(nif)) { 1658 /* This optimization might move blocks 1659 * from after the NIF into the NIF */ 1660 progress = true; 1661 opt_if_regs_cf_list(&nif->then_list); 1662 opt_if_regs_cf_list(&nif->else_list); 1663 } 1664 break; 1665 } 1666 1667 case nir_cf_node_loop: { 1668 nir_loop *loop = nir_cf_node_as_loop(cf_node); 1669 progress |= opt_if_regs_cf_list(&loop->body); 1670 progress |= opt_peel_loop_initial_if(loop); 1671 break; 1672 } 1673 1674 case nir_cf_node_function: 1675 unreachable("Invalid cf type"); 1676 } 1677 } 1678 1679 return progress; 1680} 1681 1682/** 1683 * These optimisations depend on nir_metadata_block_index and therefore must 1684 * not do anything to cause the metadata to become invalid. 1685 */ 1686static bool 1687opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list) 1688{ 1689 bool progress = false; 1690 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { 1691 switch (cf_node->type) { 1692 case nir_cf_node_block: 1693 break; 1694 1695 case nir_cf_node_if: { 1696 nir_if *nif = nir_cf_node_as_if(cf_node); 1697 progress |= opt_if_safe_cf_list(b, &nif->then_list); 1698 progress |= opt_if_safe_cf_list(b, &nif->else_list); 1699 progress |= opt_if_evaluate_condition_use(b, nif); 1700 nir_ssa_scalar cond = nir_ssa_scalar_resolved(nif->condition.ssa, 0); 1701 progress |= opt_if_rewrite_uniform_uses(b, nif, cond, true); 1702 break; 1703 } 1704 1705 case nir_cf_node_loop: { 1706 nir_loop *loop = nir_cf_node_as_loop(cf_node); 1707 progress |= opt_if_safe_cf_list(b, &loop->body); 1708 progress |= opt_split_alu_of_phi(b, loop); 1709 break; 1710 } 1711 1712 case nir_cf_node_function: 1713 unreachable("Invalid cf type"); 1714 } 1715 } 1716 1717 return progress; 1718} 1719 1720bool 1721nir_opt_if(nir_shader *shader, nir_opt_if_options options) 1722{ 1723 bool progress = false; 1724 1725 nir_foreach_function(function, shader) { 1726 if (function->impl == NULL) 1727 continue; 1728 1729 nir_builder b; 1730 nir_builder_init(&b, function->impl); 1731 1732 nir_metadata_require(function->impl, nir_metadata_block_index | 1733 nir_metadata_dominance); 1734 progress = opt_if_safe_cf_list(&b, &function->impl->body); 1735 nir_metadata_preserve(function->impl, nir_metadata_block_index | 1736 nir_metadata_dominance); 1737 1738 bool preserve = true; 1739 1740 if (opt_if_cf_list(&b, &function->impl->body, options)) { 1741 preserve = false; 1742 progress = true; 1743 } 1744 1745 if (opt_if_regs_cf_list(&function->impl->body)) { 1746 preserve = false; 1747 progress = true; 1748 1749 /* If that made progress, we're no longer really in SSA form. We 1750 * need to convert registers back into SSA defs and clean up SSA defs 1751 * that don't dominate their uses. 1752 */ 1753 nir_lower_regs_to_ssa_impl(function->impl); 1754 } 1755 1756 if (preserve) { 1757 nir_metadata_preserve(function->impl, nir_metadata_none); 1758 } else { 1759 nir_metadata_preserve(function->impl, nir_metadata_all); 1760 } 1761 } 1762 1763 return progress; 1764} 1765