1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci#include "nir_control_flow.h" 27bf215546Sopenharmony_ci#include "nir_loop_analyze.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci/* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32 31bf215546Sopenharmony_ci * instructions. (Multiply counting nodes and magic number 5.) But there is 32bf215546Sopenharmony_ci * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed 33bf215546Sopenharmony_ci * to give about the same results. Around 5 instructions per node. But some 34bf215546Sopenharmony_ci * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so 35bf215546Sopenharmony_ci * we set it to 26. 36bf215546Sopenharmony_ci */ 37bf215546Sopenharmony_ci#define LOOP_UNROLL_LIMIT 26 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci/* Prepare this loop for unrolling by first converting to lcssa and then 40bf215546Sopenharmony_ci * converting the phis from the top level of the loop body to regs. 41bf215546Sopenharmony_ci * Partially converting out of SSA allows us to unroll the loop without having 42bf215546Sopenharmony_ci * to keep track of and update phis along the way which gets tricky and 43bf215546Sopenharmony_ci * doesn't add much value over converting to regs. 44bf215546Sopenharmony_ci * 45bf215546Sopenharmony_ci * The loop may have a jump instruction at the end of the loop which does 46bf215546Sopenharmony_ci * nothing. Once we're out of SSA, we can safely delete it so we don't have 47bf215546Sopenharmony_ci * to deal with it later. 48bf215546Sopenharmony_ci */ 49bf215546Sopenharmony_cistatic void 50bf215546Sopenharmony_ciloop_prepare_for_unroll(nir_loop *loop) 51bf215546Sopenharmony_ci{ 52bf215546Sopenharmony_ci nir_rematerialize_derefs_in_use_blocks_impl( 53bf215546Sopenharmony_ci nir_cf_node_get_function(&loop->cf_node)); 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci nir_convert_loop_to_lcssa(loop); 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci /* Lower phis at the top level of the loop body */ 58bf215546Sopenharmony_ci foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { 59bf215546Sopenharmony_ci if (nir_cf_node_block == node->type) { 60bf215546Sopenharmony_ci nir_lower_phis_to_regs_block(nir_cf_node_as_block(node)); 61bf215546Sopenharmony_ci } 62bf215546Sopenharmony_ci } 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci /* Lower phis after the loop */ 65bf215546Sopenharmony_ci nir_block *block_after_loop = 66bf215546Sopenharmony_ci nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci nir_lower_phis_to_regs_block(block_after_loop); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci /* Remove jump if it's the last instruction in the loop */ 71bf215546Sopenharmony_ci nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop)); 72bf215546Sopenharmony_ci if (last_instr && last_instr->type == nir_instr_type_jump) { 73bf215546Sopenharmony_ci nir_instr_remove(last_instr); 74bf215546Sopenharmony_ci } 75bf215546Sopenharmony_ci} 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_cistatic void 78bf215546Sopenharmony_ciget_first_blocks_in_terminator(nir_loop_terminator *term, 79bf215546Sopenharmony_ci nir_block **first_break_block, 80bf215546Sopenharmony_ci nir_block **first_continue_block) 81bf215546Sopenharmony_ci{ 82bf215546Sopenharmony_ci if (term->continue_from_then) { 83bf215546Sopenharmony_ci *first_continue_block = nir_if_first_then_block(term->nif); 84bf215546Sopenharmony_ci *first_break_block = nir_if_first_else_block(term->nif); 85bf215546Sopenharmony_ci } else { 86bf215546Sopenharmony_ci *first_continue_block = nir_if_first_else_block(term->nif); 87bf215546Sopenharmony_ci *first_break_block = nir_if_first_then_block(term->nif); 88bf215546Sopenharmony_ci } 89bf215546Sopenharmony_ci} 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci/** 92bf215546Sopenharmony_ci * Unroll a loop where we know exactly how many iterations there are and there 93bf215546Sopenharmony_ci * is only a single exit point. Note here we can unroll loops with multiple 94bf215546Sopenharmony_ci * theoretical exits that only have a single terminating exit that we always 95bf215546Sopenharmony_ci * know is the "real" exit. 96bf215546Sopenharmony_ci * 97bf215546Sopenharmony_ci * loop { 98bf215546Sopenharmony_ci * ...instrs... 99bf215546Sopenharmony_ci * } 100bf215546Sopenharmony_ci * 101bf215546Sopenharmony_ci * And the iteration count is 3, the output will be: 102bf215546Sopenharmony_ci * 103bf215546Sopenharmony_ci * ...instrs... ...instrs... ...instrs... 104bf215546Sopenharmony_ci */ 105bf215546Sopenharmony_cistatic void 106bf215546Sopenharmony_cisimple_unroll(nir_loop *loop) 107bf215546Sopenharmony_ci{ 108bf215546Sopenharmony_ci nir_loop_terminator *limiting_term = loop->info->limiting_terminator; 109bf215546Sopenharmony_ci assert(nir_is_trivial_loop_if(limiting_term->nif, 110bf215546Sopenharmony_ci limiting_term->break_block)); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci loop_prepare_for_unroll(loop); 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci /* Skip over loop terminator and get the loop body. */ 115bf215546Sopenharmony_ci list_for_each_entry(nir_loop_terminator, terminator, 116bf215546Sopenharmony_ci &loop->info->loop_terminator_list, 117bf215546Sopenharmony_ci loop_terminator_link) { 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci /* Remove all but the limiting terminator as we know the other exit 120bf215546Sopenharmony_ci * conditions can never be met. Note we need to extract any instructions 121bf215546Sopenharmony_ci * in the continue from branch and insert then into the loop body before 122bf215546Sopenharmony_ci * removing it. 123bf215546Sopenharmony_ci */ 124bf215546Sopenharmony_ci if (terminator->nif != limiting_term->nif) { 125bf215546Sopenharmony_ci nir_block *first_break_block; 126bf215546Sopenharmony_ci nir_block *first_continue_block; 127bf215546Sopenharmony_ci get_first_blocks_in_terminator(terminator, &first_break_block, 128bf215546Sopenharmony_ci &first_continue_block); 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci assert(nir_is_trivial_loop_if(terminator->nif, 131bf215546Sopenharmony_ci terminator->break_block)); 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci nir_cf_list continue_from_lst; 134bf215546Sopenharmony_ci nir_cf_extract(&continue_from_lst, 135bf215546Sopenharmony_ci nir_before_block(first_continue_block), 136bf215546Sopenharmony_ci nir_after_block(terminator->continue_from_block)); 137bf215546Sopenharmony_ci nir_cf_reinsert(&continue_from_lst, 138bf215546Sopenharmony_ci nir_after_cf_node(&terminator->nif->cf_node)); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci nir_cf_node_remove(&terminator->nif->cf_node); 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci } 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci nir_block *first_break_block; 145bf215546Sopenharmony_ci nir_block *first_continue_block; 146bf215546Sopenharmony_ci get_first_blocks_in_terminator(limiting_term, &first_break_block, 147bf215546Sopenharmony_ci &first_continue_block); 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci /* Pluck out the loop header */ 150bf215546Sopenharmony_ci nir_block *header_blk = nir_loop_first_block(loop); 151bf215546Sopenharmony_ci nir_cf_list lp_header; 152bf215546Sopenharmony_ci nir_cf_extract(&lp_header, nir_before_block(header_blk), 153bf215546Sopenharmony_ci nir_before_cf_node(&limiting_term->nif->cf_node)); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci /* Add the continue from block of the limiting terminator to the loop body 156bf215546Sopenharmony_ci */ 157bf215546Sopenharmony_ci nir_cf_list continue_from_lst; 158bf215546Sopenharmony_ci nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block), 159bf215546Sopenharmony_ci nir_after_block(limiting_term->continue_from_block)); 160bf215546Sopenharmony_ci nir_cf_reinsert(&continue_from_lst, 161bf215546Sopenharmony_ci nir_after_cf_node(&limiting_term->nif->cf_node)); 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci /* Pluck out the loop body */ 164bf215546Sopenharmony_ci nir_cf_list loop_body; 165bf215546Sopenharmony_ci nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node), 166bf215546Sopenharmony_ci nir_after_block(nir_loop_last_block(loop))); 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci /* Clone the loop header and insert before the loop */ 171bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 172bf215546Sopenharmony_ci nir_before_cf_node(&loop->cf_node), 173bf215546Sopenharmony_ci remap_table); 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci for (unsigned i = 0; i < loop->info->max_trip_count; i++) { 176bf215546Sopenharmony_ci /* Clone loop body and insert before the loop */ 177bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent, 178bf215546Sopenharmony_ci nir_before_cf_node(&loop->cf_node), 179bf215546Sopenharmony_ci remap_table); 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci /* Clone loop header and insert after loop body */ 182bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 183bf215546Sopenharmony_ci nir_before_cf_node(&loop->cf_node), 184bf215546Sopenharmony_ci remap_table); 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci /* Remove the break from the loop terminator and add instructions from 188bf215546Sopenharmony_ci * the break block after the unrolled loop. 189bf215546Sopenharmony_ci */ 190bf215546Sopenharmony_ci nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block); 191bf215546Sopenharmony_ci nir_instr_remove(break_instr); 192bf215546Sopenharmony_ci nir_cf_list break_list; 193bf215546Sopenharmony_ci nir_cf_extract(&break_list, nir_before_block(first_break_block), 194bf215546Sopenharmony_ci nir_after_block(limiting_term->break_block)); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* Clone so things get properly remapped */ 197bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent, 198bf215546Sopenharmony_ci nir_before_cf_node(&loop->cf_node), 199bf215546Sopenharmony_ci remap_table); 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci /* Remove the loop */ 202bf215546Sopenharmony_ci nir_cf_node_remove(&loop->cf_node); 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci /* Delete the original loop body, break block & header */ 205bf215546Sopenharmony_ci nir_cf_delete(&lp_header); 206bf215546Sopenharmony_ci nir_cf_delete(&loop_body); 207bf215546Sopenharmony_ci nir_cf_delete(&break_list); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci _mesa_hash_table_destroy(remap_table, NULL); 210bf215546Sopenharmony_ci} 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_cistatic void 213bf215546Sopenharmony_cimove_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term) 214bf215546Sopenharmony_ci{ 215bf215546Sopenharmony_ci /* Move the rest of the loop inside the continue-from-block */ 216bf215546Sopenharmony_ci nir_cf_reinsert(lst, nir_after_block(term->continue_from_block)); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci /* Remove the break */ 219bf215546Sopenharmony_ci nir_instr_remove(nir_block_last_instr(term->break_block)); 220bf215546Sopenharmony_ci} 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_cistatic nir_cursor 223bf215546Sopenharmony_ciget_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then) 224bf215546Sopenharmony_ci{ 225bf215546Sopenharmony_ci if (node->type == nir_cf_node_loop) { 226bf215546Sopenharmony_ci return nir_before_cf_node(node); 227bf215546Sopenharmony_ci } else { 228bf215546Sopenharmony_ci nir_if *if_stmt = nir_cf_node_as_if(node); 229bf215546Sopenharmony_ci if (continue_from_then) { 230bf215546Sopenharmony_ci return nir_after_block(nir_if_last_then_block(if_stmt)); 231bf215546Sopenharmony_ci } else { 232bf215546Sopenharmony_ci return nir_after_block(nir_if_last_else_block(if_stmt)); 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci} 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_cistatic nir_cf_node * 238bf215546Sopenharmony_cicomplex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term, 239bf215546Sopenharmony_ci nir_cf_list *lp_header, nir_cf_list *lp_body, 240bf215546Sopenharmony_ci struct hash_table *remap_table, 241bf215546Sopenharmony_ci unsigned num_times_to_clone) 242bf215546Sopenharmony_ci{ 243bf215546Sopenharmony_ci /* In the terminator that we have no trip count for move everything after 244bf215546Sopenharmony_ci * the terminator into the continue from branch. 245bf215546Sopenharmony_ci */ 246bf215546Sopenharmony_ci nir_cf_list loop_end; 247bf215546Sopenharmony_ci nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node), 248bf215546Sopenharmony_ci nir_after_block(nir_loop_last_block(loop))); 249bf215546Sopenharmony_ci move_cf_list_into_loop_term(&loop_end, unlimit_term); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci /* Pluck out the loop body. */ 252bf215546Sopenharmony_ci nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)), 253bf215546Sopenharmony_ci nir_after_block(nir_loop_last_block(loop))); 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci /* Set unroll_loc to the loop as we will insert the unrolled loop before it 256bf215546Sopenharmony_ci */ 257bf215546Sopenharmony_ci nir_cf_node *unroll_loc = &loop->cf_node; 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci /* Temp list to store the cloned loop as we unroll */ 260bf215546Sopenharmony_ci nir_cf_list unrolled_lp_body; 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci for (unsigned i = 0; i < num_times_to_clone; i++) { 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci nir_cursor cursor = 265bf215546Sopenharmony_ci get_complex_unroll_insert_location(unroll_loc, 266bf215546Sopenharmony_ci unlimit_term->continue_from_then); 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci /* Clone loop header and insert in if branch */ 269bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent, 270bf215546Sopenharmony_ci cursor, remap_table); 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci cursor = 273bf215546Sopenharmony_ci get_complex_unroll_insert_location(unroll_loc, 274bf215546Sopenharmony_ci unlimit_term->continue_from_then); 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci /* Clone loop body */ 277bf215546Sopenharmony_ci nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent, 278bf215546Sopenharmony_ci remap_table); 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci unroll_loc = exec_node_data(nir_cf_node, 281bf215546Sopenharmony_ci exec_list_get_tail(&unrolled_lp_body.list), 282bf215546Sopenharmony_ci node); 283bf215546Sopenharmony_ci assert(unroll_loc->type == nir_cf_node_block && 284bf215546Sopenharmony_ci exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list)); 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci /* Get the unrolled if node */ 287bf215546Sopenharmony_ci unroll_loc = nir_cf_node_prev(unroll_loc); 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci /* Insert unrolled loop body */ 290bf215546Sopenharmony_ci nir_cf_reinsert(&unrolled_lp_body, cursor); 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci return unroll_loc; 294bf215546Sopenharmony_ci} 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci/** 297bf215546Sopenharmony_ci * Unroll a loop with two exists when the trip count of one of the exits is 298bf215546Sopenharmony_ci * unknown. If continue_from_then is true, the loop is repeated only when the 299bf215546Sopenharmony_ci * "then" branch of the if is taken; otherwise it is repeated only 300bf215546Sopenharmony_ci * when the "else" branch of the if is taken. 301bf215546Sopenharmony_ci * 302bf215546Sopenharmony_ci * For example, if the input is: 303bf215546Sopenharmony_ci * 304bf215546Sopenharmony_ci * loop { 305bf215546Sopenharmony_ci * ...phis/condition... 306bf215546Sopenharmony_ci * if condition { 307bf215546Sopenharmony_ci * ...then instructions... 308bf215546Sopenharmony_ci * } else { 309bf215546Sopenharmony_ci * ...continue instructions... 310bf215546Sopenharmony_ci * break 311bf215546Sopenharmony_ci * } 312bf215546Sopenharmony_ci * ...body... 313bf215546Sopenharmony_ci * } 314bf215546Sopenharmony_ci * 315bf215546Sopenharmony_ci * And the iteration count is 3, and unlimit_term->continue_from_then is true, 316bf215546Sopenharmony_ci * then the output will be: 317bf215546Sopenharmony_ci * 318bf215546Sopenharmony_ci * ...condition... 319bf215546Sopenharmony_ci * if condition { 320bf215546Sopenharmony_ci * ...then instructions... 321bf215546Sopenharmony_ci * ...body... 322bf215546Sopenharmony_ci * if condition { 323bf215546Sopenharmony_ci * ...then instructions... 324bf215546Sopenharmony_ci * ...body... 325bf215546Sopenharmony_ci * if condition { 326bf215546Sopenharmony_ci * ...then instructions... 327bf215546Sopenharmony_ci * ...body... 328bf215546Sopenharmony_ci * } else { 329bf215546Sopenharmony_ci * ...continue instructions... 330bf215546Sopenharmony_ci * } 331bf215546Sopenharmony_ci * } else { 332bf215546Sopenharmony_ci * ...continue instructions... 333bf215546Sopenharmony_ci * } 334bf215546Sopenharmony_ci * } else { 335bf215546Sopenharmony_ci * ...continue instructions... 336bf215546Sopenharmony_ci * } 337bf215546Sopenharmony_ci */ 338bf215546Sopenharmony_cistatic void 339bf215546Sopenharmony_cicomplex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term, 340bf215546Sopenharmony_ci bool limiting_term_second) 341bf215546Sopenharmony_ci{ 342bf215546Sopenharmony_ci assert(nir_is_trivial_loop_if(unlimit_term->nif, 343bf215546Sopenharmony_ci unlimit_term->break_block)); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci nir_loop_terminator *limiting_term = loop->info->limiting_terminator; 346bf215546Sopenharmony_ci assert(nir_is_trivial_loop_if(limiting_term->nif, 347bf215546Sopenharmony_ci limiting_term->break_block)); 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci loop_prepare_for_unroll(loop); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci nir_block *header_blk = nir_loop_first_block(loop); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci nir_cf_list lp_header; 354bf215546Sopenharmony_ci nir_cf_list limit_break_list; 355bf215546Sopenharmony_ci unsigned num_times_to_clone; 356bf215546Sopenharmony_ci if (limiting_term_second) { 357bf215546Sopenharmony_ci /* Pluck out the loop header */ 358bf215546Sopenharmony_ci nir_cf_extract(&lp_header, nir_before_block(header_blk), 359bf215546Sopenharmony_ci nir_before_cf_node(&unlimit_term->nif->cf_node)); 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci /* We need some special handling when its the second terminator causing 362bf215546Sopenharmony_ci * us to exit the loop for example: 363bf215546Sopenharmony_ci * 364bf215546Sopenharmony_ci * for (int i = 0; i < uniform_lp_count; i++) { 365bf215546Sopenharmony_ci * colour = vec4(0.0, 1.0, 0.0, 1.0); 366bf215546Sopenharmony_ci * 367bf215546Sopenharmony_ci * if (i == 1) { 368bf215546Sopenharmony_ci * break; 369bf215546Sopenharmony_ci * } 370bf215546Sopenharmony_ci * ... any further code is unreachable after i == 1 ... 371bf215546Sopenharmony_ci * } 372bf215546Sopenharmony_ci */ 373bf215546Sopenharmony_ci nir_cf_list after_lt; 374bf215546Sopenharmony_ci nir_if *limit_if = limiting_term->nif; 375bf215546Sopenharmony_ci nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node), 376bf215546Sopenharmony_ci nir_after_block(nir_loop_last_block(loop))); 377bf215546Sopenharmony_ci move_cf_list_into_loop_term(&after_lt, limiting_term); 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci /* Because the trip count is the number of times we pass over the entire 380bf215546Sopenharmony_ci * loop before hitting a break when the second terminator is the 381bf215546Sopenharmony_ci * limiting terminator we can actually execute code inside the loop when 382bf215546Sopenharmony_ci * trip count == 0 e.g. the code above the break. So we need to bump 383bf215546Sopenharmony_ci * the trip_count in order for the code below to clone anything. When 384bf215546Sopenharmony_ci * trip count == 1 we execute the code above the break twice and the 385bf215546Sopenharmony_ci * code below it once so we need clone things twice and so on. 386bf215546Sopenharmony_ci */ 387bf215546Sopenharmony_ci num_times_to_clone = loop->info->max_trip_count + 1; 388bf215546Sopenharmony_ci } else { 389bf215546Sopenharmony_ci /* Pluck out the loop header */ 390bf215546Sopenharmony_ci nir_cf_extract(&lp_header, nir_before_block(header_blk), 391bf215546Sopenharmony_ci nir_before_cf_node(&limiting_term->nif->cf_node)); 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci nir_block *first_break_block; 394bf215546Sopenharmony_ci nir_block *first_continue_block; 395bf215546Sopenharmony_ci get_first_blocks_in_terminator(limiting_term, &first_break_block, 396bf215546Sopenharmony_ci &first_continue_block); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci /* Remove the break then extract instructions from the break block so we 399bf215546Sopenharmony_ci * can insert them in the innermost else of the unrolled loop. 400bf215546Sopenharmony_ci */ 401bf215546Sopenharmony_ci nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block); 402bf215546Sopenharmony_ci nir_instr_remove(break_instr); 403bf215546Sopenharmony_ci nir_cf_extract(&limit_break_list, nir_before_block(first_break_block), 404bf215546Sopenharmony_ci nir_after_block(limiting_term->break_block)); 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci nir_cf_list continue_list; 407bf215546Sopenharmony_ci nir_cf_extract(&continue_list, nir_before_block(first_continue_block), 408bf215546Sopenharmony_ci nir_after_block(limiting_term->continue_from_block)); 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci nir_cf_reinsert(&continue_list, 411bf215546Sopenharmony_ci nir_after_cf_node(&limiting_term->nif->cf_node)); 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci nir_cf_node_remove(&limiting_term->nif->cf_node); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci num_times_to_clone = loop->info->max_trip_count; 416bf215546Sopenharmony_ci } 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci nir_cf_list lp_body; 421bf215546Sopenharmony_ci nir_cf_node *unroll_loc = 422bf215546Sopenharmony_ci complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body, 423bf215546Sopenharmony_ci remap_table, num_times_to_clone); 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci if (!limiting_term_second) { 426bf215546Sopenharmony_ci assert(unroll_loc->type == nir_cf_node_if); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci nir_cursor cursor = 429bf215546Sopenharmony_ci get_complex_unroll_insert_location(unroll_loc, 430bf215546Sopenharmony_ci unlimit_term->continue_from_then); 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci /* Clone loop header and insert in if branch */ 433bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 434bf215546Sopenharmony_ci cursor, remap_table); 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci cursor = 437bf215546Sopenharmony_ci get_complex_unroll_insert_location(unroll_loc, 438bf215546Sopenharmony_ci unlimit_term->continue_from_then); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci /* Clone so things get properly remapped, and insert break block from 441bf215546Sopenharmony_ci * the limiting terminator. 442bf215546Sopenharmony_ci */ 443bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent, 444bf215546Sopenharmony_ci cursor, remap_table); 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci nir_cf_delete(&limit_break_list); 447bf215546Sopenharmony_ci } 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci /* The loop has been unrolled so remove it. */ 450bf215546Sopenharmony_ci nir_cf_node_remove(&loop->cf_node); 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci /* Delete the original loop header and body */ 453bf215546Sopenharmony_ci nir_cf_delete(&lp_header); 454bf215546Sopenharmony_ci nir_cf_delete(&lp_body); 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci _mesa_hash_table_destroy(remap_table, NULL); 457bf215546Sopenharmony_ci} 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci/** 460bf215546Sopenharmony_ci * Unroll loops where we only have a single terminator but the exact trip 461bf215546Sopenharmony_ci * count is unknown. For example: 462bf215546Sopenharmony_ci * 463bf215546Sopenharmony_ci * for (int i = 0; i < imin(x, 4); i++) 464bf215546Sopenharmony_ci * ... 465bf215546Sopenharmony_ci */ 466bf215546Sopenharmony_cistatic void 467bf215546Sopenharmony_cicomplex_unroll_single_terminator(nir_loop *loop) 468bf215546Sopenharmony_ci{ 469bf215546Sopenharmony_ci assert(list_length(&loop->info->loop_terminator_list) == 1); 470bf215546Sopenharmony_ci assert(loop->info->limiting_terminator); 471bf215546Sopenharmony_ci assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif, 472bf215546Sopenharmony_ci loop->info->limiting_terminator->break_block)); 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci nir_loop_terminator *terminator = loop->info->limiting_terminator; 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci loop_prepare_for_unroll(loop); 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci /* Pluck out the loop header */ 479bf215546Sopenharmony_ci nir_cf_list lp_header; 480bf215546Sopenharmony_ci nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)), 481bf215546Sopenharmony_ci nir_before_cf_node(&terminator->nif->cf_node)); 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci struct hash_table *remap_table = 484bf215546Sopenharmony_ci _mesa_hash_table_create(NULL, _mesa_hash_pointer, 485bf215546Sopenharmony_ci _mesa_key_pointer_equal); 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci /* We need to clone the loop one extra time in order to clone the lcssa 488bf215546Sopenharmony_ci * vars for the last iteration (they are inside the following ifs break 489bf215546Sopenharmony_ci * branch). We leave other passes to clean up this redundant if. 490bf215546Sopenharmony_ci */ 491bf215546Sopenharmony_ci unsigned num_times_to_clone = loop->info->max_trip_count + 1; 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci nir_cf_list lp_body; 494bf215546Sopenharmony_ci UNUSED nir_cf_node *unroll_loc = 495bf215546Sopenharmony_ci complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body, 496bf215546Sopenharmony_ci remap_table, num_times_to_clone); 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci assert(unroll_loc->type == nir_cf_node_if); 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci /* We need to clone the lcssa vars in order to insert them on both sides 501bf215546Sopenharmony_ci * of the if in the last iteration/if-statement. Otherwise the optimisation 502bf215546Sopenharmony_ci * passes will have trouble optimising the unrolled if ladder. 503bf215546Sopenharmony_ci */ 504bf215546Sopenharmony_ci nir_cursor cursor = 505bf215546Sopenharmony_ci get_complex_unroll_insert_location(unroll_loc, 506bf215546Sopenharmony_ci terminator->continue_from_then); 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci nir_if *if_stmt = nir_cf_node_as_if(unroll_loc); 509bf215546Sopenharmony_ci nir_cursor start_cursor; 510bf215546Sopenharmony_ci nir_cursor end_cursor; 511bf215546Sopenharmony_ci if (terminator->continue_from_then) { 512bf215546Sopenharmony_ci start_cursor = nir_before_block(nir_if_first_else_block(if_stmt)); 513bf215546Sopenharmony_ci end_cursor = nir_after_block(nir_if_last_else_block(if_stmt)); 514bf215546Sopenharmony_ci } else { 515bf215546Sopenharmony_ci start_cursor = nir_before_block(nir_if_first_then_block(if_stmt)); 516bf215546Sopenharmony_ci end_cursor = nir_after_block(nir_if_last_then_block(if_stmt)); 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci nir_cf_list lcssa_list; 520bf215546Sopenharmony_ci nir_cf_extract(&lcssa_list, start_cursor, end_cursor); 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci /* Insert the cloned vars in the last continue branch */ 523bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&lcssa_list, loop->cf_node.parent, 524bf215546Sopenharmony_ci cursor, remap_table); 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci start_cursor = terminator->continue_from_then ? 527bf215546Sopenharmony_ci nir_before_block(nir_if_first_else_block(if_stmt)) : 528bf215546Sopenharmony_ci nir_before_block(nir_if_first_then_block(if_stmt)); 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci /* Reinsert the cloned vars back where they came from */ 531bf215546Sopenharmony_ci nir_cf_reinsert(&lcssa_list, start_cursor); 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci /* Delete the original loop header and body */ 534bf215546Sopenharmony_ci nir_cf_delete(&lp_header); 535bf215546Sopenharmony_ci nir_cf_delete(&lp_body); 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci /* The original loop has been replaced so remove it. */ 538bf215546Sopenharmony_ci nir_cf_node_remove(&loop->cf_node); 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci _mesa_hash_table_destroy(remap_table, NULL); 541bf215546Sopenharmony_ci} 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci/* Unrolls the classic wrapper loops e.g 544bf215546Sopenharmony_ci * 545bf215546Sopenharmony_ci * do { 546bf215546Sopenharmony_ci * // ... 547bf215546Sopenharmony_ci * } while (false) 548bf215546Sopenharmony_ci */ 549bf215546Sopenharmony_cistatic bool 550bf215546Sopenharmony_ciwrapper_unroll(nir_loop *loop) 551bf215546Sopenharmony_ci{ 552bf215546Sopenharmony_ci if (!list_is_empty(&loop->info->loop_terminator_list)) { 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci /* Unrolling a loop with a large number of exits can result in a 555bf215546Sopenharmony_ci * large inrease in register pressure. For now we just skip 556bf215546Sopenharmony_ci * unrolling if we have more than 3 exits (not including the break 557bf215546Sopenharmony_ci * at the end of the loop). 558bf215546Sopenharmony_ci * 559bf215546Sopenharmony_ci * TODO: Most loops that fit this pattern are simply switch 560bf215546Sopenharmony_ci * statements that are converted to a loop to take advantage of 561bf215546Sopenharmony_ci * exiting jump instruction handling. In this case we could make 562bf215546Sopenharmony_ci * use of a binary seach pattern like we do in 563bf215546Sopenharmony_ci * nir_lower_indirect_derefs(), this should allow us to unroll the 564bf215546Sopenharmony_ci * loops in an optimal way and should also avoid some of the 565bf215546Sopenharmony_ci * register pressure that comes from simply nesting the 566bf215546Sopenharmony_ci * terminators one after the other. 567bf215546Sopenharmony_ci */ 568bf215546Sopenharmony_ci if (list_length(&loop->info->loop_terminator_list) > 3) 569bf215546Sopenharmony_ci return false; 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci loop_prepare_for_unroll(loop); 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop)); 574bf215546Sopenharmony_ci list_for_each_entry(nir_loop_terminator, terminator, 575bf215546Sopenharmony_ci &loop->info->loop_terminator_list, 576bf215546Sopenharmony_ci loop_terminator_link) { 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci /* Remove break from the terminator */ 579bf215546Sopenharmony_ci nir_instr *break_instr = 580bf215546Sopenharmony_ci nir_block_last_instr(terminator->break_block); 581bf215546Sopenharmony_ci nir_instr_remove(break_instr); 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci /* Pluck out the loop body. */ 584bf215546Sopenharmony_ci nir_cf_list loop_body; 585bf215546Sopenharmony_ci nir_cf_extract(&loop_body, 586bf215546Sopenharmony_ci nir_after_cf_node(&terminator->nif->cf_node), 587bf215546Sopenharmony_ci loop_end); 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci /* Reinsert loop body into continue from block */ 590bf215546Sopenharmony_ci nir_cf_reinsert(&loop_body, 591bf215546Sopenharmony_ci nir_after_block(terminator->continue_from_block)); 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci loop_end = terminator->continue_from_then ? 594bf215546Sopenharmony_ci nir_after_block(nir_if_last_then_block(terminator->nif)) : 595bf215546Sopenharmony_ci nir_after_block(nir_if_last_else_block(terminator->nif)); 596bf215546Sopenharmony_ci } 597bf215546Sopenharmony_ci } else { 598bf215546Sopenharmony_ci loop_prepare_for_unroll(loop); 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci /* Pluck out the loop body. */ 602bf215546Sopenharmony_ci nir_cf_list loop_body; 603bf215546Sopenharmony_ci nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)), 604bf215546Sopenharmony_ci nir_after_block(nir_loop_last_block(loop))); 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci /* Reinsert loop body after the loop */ 607bf215546Sopenharmony_ci nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node)); 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci /* The loop has been unrolled so remove it. */ 610bf215546Sopenharmony_ci nir_cf_node_remove(&loop->cf_node); 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci return true; 613bf215546Sopenharmony_ci} 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_cistatic bool 616bf215546Sopenharmony_ciis_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref, 617bf215546Sopenharmony_ci unsigned trip_count) 618bf215546Sopenharmony_ci{ 619bf215546Sopenharmony_ci for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 620bf215546Sopenharmony_ci if (d->deref_type != nir_deref_type_array) 621bf215546Sopenharmony_ci continue; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr); 624bf215546Sopenharmony_ci nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src; 625bf215546Sopenharmony_ci if (!nir_srcs_equal(d->arr.index, src)) 626bf215546Sopenharmony_ci continue; 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(d); 629bf215546Sopenharmony_ci assert(glsl_type_is_array(parent->type) || 630bf215546Sopenharmony_ci glsl_type_is_matrix(parent->type) || 631bf215546Sopenharmony_ci glsl_type_is_vector(parent->type)); 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci /* We have already unrolled the loop and the new one will be imbedded in 634bf215546Sopenharmony_ci * the innermost continue branch. So unless the array is greater than 635bf215546Sopenharmony_ci * the trip count any iteration over the loop will be an out of bounds 636bf215546Sopenharmony_ci * access of the array. 637bf215546Sopenharmony_ci */ 638bf215546Sopenharmony_ci unsigned length = glsl_type_is_vector(parent->type) ? 639bf215546Sopenharmony_ci glsl_get_vector_elements(parent->type) : 640bf215546Sopenharmony_ci glsl_get_length(parent->type); 641bf215546Sopenharmony_ci return length <= trip_count; 642bf215546Sopenharmony_ci } 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci return false; 645bf215546Sopenharmony_ci} 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci/* If we know an array access is going to be out of bounds remove or replace 648bf215546Sopenharmony_ci * the access with an undef. This can later result in the entire loop being 649bf215546Sopenharmony_ci * removed by nir_opt_dead_cf(). 650bf215546Sopenharmony_ci */ 651bf215546Sopenharmony_cistatic void 652bf215546Sopenharmony_ciremove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop, 653bf215546Sopenharmony_ci nir_loop_terminator *term, 654bf215546Sopenharmony_ci nir_cf_list *lp_header, 655bf215546Sopenharmony_ci nir_cf_list *lp_body, 656bf215546Sopenharmony_ci unsigned trip_count) 657bf215546Sopenharmony_ci{ 658bf215546Sopenharmony_ci if (!loop->info->guessed_trip_count) 659bf215546Sopenharmony_ci return; 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci /* Temporarily recreate the original loop so we can alter it */ 662bf215546Sopenharmony_ci nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop))); 663bf215546Sopenharmony_ci nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop))); 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci nir_builder b; 666bf215546Sopenharmony_ci nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node)); 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci nir_foreach_block_in_cf_node(block, &loop->cf_node) { 669bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 670bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 671bf215546Sopenharmony_ci continue; 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci /* Check for arrays variably-indexed by a loop induction variable. 676bf215546Sopenharmony_ci * If this access is out of bounds remove the instruction or replace 677bf215546Sopenharmony_ci * its use with an undefined instruction. 678bf215546Sopenharmony_ci * If the loop is no longer useful we leave it for the appropriate 679bf215546Sopenharmony_ci * pass to clean it up for us. 680bf215546Sopenharmony_ci */ 681bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref || 682bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_deref || 683bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_copy_deref) { 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]), 686bf215546Sopenharmony_ci trip_count)) { 687bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref) { 688bf215546Sopenharmony_ci nir_ssa_def *undef = 689bf215546Sopenharmony_ci nir_ssa_undef(&b, intrin->dest.ssa.num_components, 690bf215546Sopenharmony_ci intrin->dest.ssa.bit_size); 691bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 692bf215546Sopenharmony_ci undef); 693bf215546Sopenharmony_ci } else { 694bf215546Sopenharmony_ci nir_instr_remove(instr); 695bf215546Sopenharmony_ci continue; 696bf215546Sopenharmony_ci } 697bf215546Sopenharmony_ci } 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_copy_deref && 700bf215546Sopenharmony_ci is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]), 701bf215546Sopenharmony_ci trip_count)) { 702bf215546Sopenharmony_ci nir_instr_remove(instr); 703bf215546Sopenharmony_ci } 704bf215546Sopenharmony_ci } 705bf215546Sopenharmony_ci } 706bf215546Sopenharmony_ci } 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci /* Now that we are done extract the loop header and body again */ 709bf215546Sopenharmony_ci nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)), 710bf215546Sopenharmony_ci nir_before_cf_node(&term->nif->cf_node)); 711bf215546Sopenharmony_ci nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)), 712bf215546Sopenharmony_ci nir_after_block(nir_loop_last_block(loop))); 713bf215546Sopenharmony_ci} 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci/* Partially unrolls loops that don't have a known trip count. 716bf215546Sopenharmony_ci */ 717bf215546Sopenharmony_cistatic void 718bf215546Sopenharmony_cipartial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count) 719bf215546Sopenharmony_ci{ 720bf215546Sopenharmony_ci assert(list_length(&loop->info->loop_terminator_list) == 1); 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci nir_loop_terminator *terminator = 723bf215546Sopenharmony_ci list_first_entry(&loop->info->loop_terminator_list, 724bf215546Sopenharmony_ci nir_loop_terminator, loop_terminator_link); 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block)); 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci loop_prepare_for_unroll(loop); 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci /* Pluck out the loop header */ 731bf215546Sopenharmony_ci nir_cf_list lp_header; 732bf215546Sopenharmony_ci nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)), 733bf215546Sopenharmony_ci nir_before_cf_node(&terminator->nif->cf_node)); 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_ci struct hash_table *remap_table = 736bf215546Sopenharmony_ci _mesa_hash_table_create(NULL, _mesa_hash_pointer, 737bf215546Sopenharmony_ci _mesa_key_pointer_equal); 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci nir_cf_list lp_body; 740bf215546Sopenharmony_ci nir_cf_node *unroll_loc = 741bf215546Sopenharmony_ci complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body, 742bf215546Sopenharmony_ci remap_table, trip_count); 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci /* Attempt to remove out of bounds array access */ 745bf215546Sopenharmony_ci remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header, 746bf215546Sopenharmony_ci &lp_body, trip_count); 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci nir_cursor cursor = 749bf215546Sopenharmony_ci get_complex_unroll_insert_location(unroll_loc, 750bf215546Sopenharmony_ci terminator->continue_from_then); 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci /* Reinsert the loop in the innermost nested continue branch of the unrolled 753bf215546Sopenharmony_ci * loop. 754bf215546Sopenharmony_ci */ 755bf215546Sopenharmony_ci nir_loop *new_loop = nir_loop_create(shader); 756bf215546Sopenharmony_ci nir_cf_node_insert(cursor, &new_loop->cf_node); 757bf215546Sopenharmony_ci new_loop->partially_unrolled = true; 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci /* Clone loop header and insert into new loop */ 760bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 761bf215546Sopenharmony_ci nir_after_cf_list(&new_loop->body), 762bf215546Sopenharmony_ci remap_table); 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci /* Clone loop body and insert into new loop */ 765bf215546Sopenharmony_ci nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent, 766bf215546Sopenharmony_ci nir_after_cf_list(&new_loop->body), 767bf215546Sopenharmony_ci remap_table); 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci /* Insert break back into terminator */ 770bf215546Sopenharmony_ci nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break); 771bf215546Sopenharmony_ci nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop)); 772bf215546Sopenharmony_ci if (terminator->continue_from_then) { 773bf215546Sopenharmony_ci nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr); 774bf215546Sopenharmony_ci } else { 775bf215546Sopenharmony_ci nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr); 776bf215546Sopenharmony_ci } 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci /* Delete the original loop header and body */ 779bf215546Sopenharmony_ci nir_cf_delete(&lp_header); 780bf215546Sopenharmony_ci nir_cf_delete(&lp_body); 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci /* The original loop has been replaced so remove it. */ 783bf215546Sopenharmony_ci nir_cf_node_remove(&loop->cf_node); 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci _mesa_hash_table_destroy(remap_table, NULL); 786bf215546Sopenharmony_ci} 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_cistatic bool 789bf215546Sopenharmony_ciis_indirect_load(nir_instr *instr) 790bf215546Sopenharmony_ci{ 791bf215546Sopenharmony_ci if (instr->type == nir_instr_type_intrinsic) { 792bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci if ((intrin->intrinsic == nir_intrinsic_load_ubo || 795bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ssbo) && 796bf215546Sopenharmony_ci !nir_src_is_const(intrin->src[1])) { 797bf215546Sopenharmony_ci return true; 798bf215546Sopenharmony_ci } 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_global) 801bf215546Sopenharmony_ci return true; 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref || 804bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_deref) { 805bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 806bf215546Sopenharmony_ci nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global; 807bf215546Sopenharmony_ci if (!nir_deref_mode_may_be(deref, mem_modes)) 808bf215546Sopenharmony_ci return false; 809bf215546Sopenharmony_ci while (deref) { 810bf215546Sopenharmony_ci if ((deref->deref_type == nir_deref_type_array || 811bf215546Sopenharmony_ci deref->deref_type == nir_deref_type_ptr_as_array) && 812bf215546Sopenharmony_ci !nir_src_is_const(deref->arr.index)) { 813bf215546Sopenharmony_ci return true; 814bf215546Sopenharmony_ci } 815bf215546Sopenharmony_ci deref = nir_deref_instr_parent(deref); 816bf215546Sopenharmony_ci } 817bf215546Sopenharmony_ci } 818bf215546Sopenharmony_ci } else if (instr->type == nir_instr_type_tex) { 819bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(instr); 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci for (unsigned i = 0; i < tex->num_srcs; i++) { 822bf215546Sopenharmony_ci if (!nir_src_is_const(tex->src[i].src)) 823bf215546Sopenharmony_ci return true; 824bf215546Sopenharmony_ci } 825bf215546Sopenharmony_ci } 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci return false; 828bf215546Sopenharmony_ci} 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_cistatic bool 831bf215546Sopenharmony_cican_pipeline_loads(nir_loop *loop) 832bf215546Sopenharmony_ci{ 833bf215546Sopenharmony_ci if (!loop->info->exact_trip_count_known) 834bf215546Sopenharmony_ci return false; 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_ci bool interesting_loads = false; 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { 839bf215546Sopenharmony_ci if (cf_node == &loop->info->limiting_terminator->nif->cf_node) 840bf215546Sopenharmony_ci continue; 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci /* Control flow usually prevents useful scheduling */ 843bf215546Sopenharmony_ci if (cf_node->type != nir_cf_node_block) 844bf215546Sopenharmony_ci return false; 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_ci if (interesting_loads) 847bf215546Sopenharmony_ci continue; 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci nir_block *block = nir_cf_node_as_block(cf_node); 850bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 851bf215546Sopenharmony_ci if (is_indirect_load(instr)) { 852bf215546Sopenharmony_ci interesting_loads = true; 853bf215546Sopenharmony_ci break; 854bf215546Sopenharmony_ci } 855bf215546Sopenharmony_ci } 856bf215546Sopenharmony_ci } 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci return interesting_loads; 859bf215546Sopenharmony_ci} 860bf215546Sopenharmony_ci 861bf215546Sopenharmony_ci/* 862bf215546Sopenharmony_ci * Returns true if we should unroll the loop, otherwise false. 863bf215546Sopenharmony_ci */ 864bf215546Sopenharmony_cistatic bool 865bf215546Sopenharmony_cicheck_unrolling_restrictions(nir_shader *shader, nir_loop *loop) 866bf215546Sopenharmony_ci{ 867bf215546Sopenharmony_ci if (loop->control == nir_loop_control_unroll) 868bf215546Sopenharmony_ci return true; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci if (loop->control == nir_loop_control_dont_unroll) 871bf215546Sopenharmony_ci return false; 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_ci nir_loop_info *li = loop->info; 874bf215546Sopenharmony_ci unsigned max_iter = shader->options->max_unroll_iterations; 875bf215546Sopenharmony_ci /* Unroll much more aggressively if it can hide load latency. */ 876bf215546Sopenharmony_ci if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop)) 877bf215546Sopenharmony_ci max_iter = shader->options->max_unroll_iterations_aggressive; 878bf215546Sopenharmony_ci unsigned trip_count = 879bf215546Sopenharmony_ci li->max_trip_count ? li->max_trip_count : li->guessed_trip_count; 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter) 882bf215546Sopenharmony_ci return true; 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT; 885bf215546Sopenharmony_ci unsigned cost = li->instr_cost * trip_count; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci if (cost <= cost_limit && trip_count <= max_iter) 888bf215546Sopenharmony_ci return true; 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci return false; 891bf215546Sopenharmony_ci} 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_cistatic bool 894bf215546Sopenharmony_ciprocess_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out, 895bf215546Sopenharmony_ci bool *unrolled_this_block); 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_cistatic bool 898bf215546Sopenharmony_ciprocess_loops_in_block(nir_shader *sh, struct exec_list *block, 899bf215546Sopenharmony_ci bool *has_nested_loop_out) 900bf215546Sopenharmony_ci{ 901bf215546Sopenharmony_ci /* We try to unroll as many loops in one pass as possible. 902bf215546Sopenharmony_ci * E.g. we can safely unroll both loops in this block: 903bf215546Sopenharmony_ci * 904bf215546Sopenharmony_ci * if (...) { 905bf215546Sopenharmony_ci * loop {...} 906bf215546Sopenharmony_ci * } 907bf215546Sopenharmony_ci * 908bf215546Sopenharmony_ci * if (...) { 909bf215546Sopenharmony_ci * loop {...} 910bf215546Sopenharmony_ci * } 911bf215546Sopenharmony_ci * 912bf215546Sopenharmony_ci * Unrolling one loop doesn't affect the other one. 913bf215546Sopenharmony_ci * 914bf215546Sopenharmony_ci * On the other hand for block with: 915bf215546Sopenharmony_ci * 916bf215546Sopenharmony_ci * loop {...} 917bf215546Sopenharmony_ci * ... 918bf215546Sopenharmony_ci * loop {...} 919bf215546Sopenharmony_ci * 920bf215546Sopenharmony_ci * It is unsafe to unroll both loops in one pass without taking 921bf215546Sopenharmony_ci * complicating precautions, since the structure of the block would 922bf215546Sopenharmony_ci * change after unrolling the first loop. So in such a case we leave 923bf215546Sopenharmony_ci * the second loop for the next iteration of unrolling to handle. 924bf215546Sopenharmony_ci */ 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci bool progress = false; 927bf215546Sopenharmony_ci bool unrolled_this_block = false; 928bf215546Sopenharmony_ci 929bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, nested_node, node, block) { 930bf215546Sopenharmony_ci if (process_loops(sh, nested_node, 931bf215546Sopenharmony_ci has_nested_loop_out, &unrolled_this_block)) { 932bf215546Sopenharmony_ci progress = true; 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci /* If current node is unrolled we could not safely continue 935bf215546Sopenharmony_ci * our iteration since we don't know the next node 936bf215546Sopenharmony_ci * and it's hard to guarantee that we won't end up unrolling 937bf215546Sopenharmony_ci * inner loop of the currently unrolled one, if such exists. 938bf215546Sopenharmony_ci */ 939bf215546Sopenharmony_ci if (unrolled_this_block) { 940bf215546Sopenharmony_ci break; 941bf215546Sopenharmony_ci } 942bf215546Sopenharmony_ci } 943bf215546Sopenharmony_ci } 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci return progress; 946bf215546Sopenharmony_ci} 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_cistatic bool 949bf215546Sopenharmony_ciprocess_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out, 950bf215546Sopenharmony_ci bool *unrolled_this_block) 951bf215546Sopenharmony_ci{ 952bf215546Sopenharmony_ci bool progress = false; 953bf215546Sopenharmony_ci bool has_nested_loop = false; 954bf215546Sopenharmony_ci nir_loop *loop; 955bf215546Sopenharmony_ci 956bf215546Sopenharmony_ci switch (cf_node->type) { 957bf215546Sopenharmony_ci case nir_cf_node_block: 958bf215546Sopenharmony_ci return progress; 959bf215546Sopenharmony_ci case nir_cf_node_if: { 960bf215546Sopenharmony_ci nir_if *if_stmt = nir_cf_node_as_if(cf_node); 961bf215546Sopenharmony_ci progress |= process_loops_in_block(sh, &if_stmt->then_list, 962bf215546Sopenharmony_ci has_nested_loop_out); 963bf215546Sopenharmony_ci progress |= process_loops_in_block(sh, &if_stmt->else_list, 964bf215546Sopenharmony_ci has_nested_loop_out); 965bf215546Sopenharmony_ci return progress; 966bf215546Sopenharmony_ci } 967bf215546Sopenharmony_ci case nir_cf_node_loop: { 968bf215546Sopenharmony_ci loop = nir_cf_node_as_loop(cf_node); 969bf215546Sopenharmony_ci progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop); 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_ci break; 972bf215546Sopenharmony_ci } 973bf215546Sopenharmony_ci default: 974bf215546Sopenharmony_ci unreachable("unknown cf node type"); 975bf215546Sopenharmony_ci } 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci const bool unrolled_child_block = progress; 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci /* Don't attempt to unroll a second inner loop in this pass, wait until the 980bf215546Sopenharmony_ci * next pass as we have altered the cf. 981bf215546Sopenharmony_ci */ 982bf215546Sopenharmony_ci if (!progress && loop->control != nir_loop_control_dont_unroll) { 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci /* Remove the conditional break statements associated with all terminators 985bf215546Sopenharmony_ci * that are associated with a fixed iteration count, except for the one 986bf215546Sopenharmony_ci * associated with the limiting terminator--that one needs to stay, since 987bf215546Sopenharmony_ci * it terminates the loop. 988bf215546Sopenharmony_ci */ 989bf215546Sopenharmony_ci if (loop->info->limiting_terminator) { 990bf215546Sopenharmony_ci list_for_each_entry_safe(nir_loop_terminator, t, 991bf215546Sopenharmony_ci &loop->info->loop_terminator_list, 992bf215546Sopenharmony_ci loop_terminator_link) { 993bf215546Sopenharmony_ci if (t->exact_trip_count_unknown) 994bf215546Sopenharmony_ci continue; 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci if (t != loop->info->limiting_terminator) { 997bf215546Sopenharmony_ci 998bf215546Sopenharmony_ci /* Only delete the if-statement if the continue block is empty. 999bf215546Sopenharmony_ci * We trust that nir_opt_if() does its job well enough to 1000bf215546Sopenharmony_ci * remove all instructions from the continue block when possible. 1001bf215546Sopenharmony_ci */ 1002bf215546Sopenharmony_ci nir_block *first_continue_from_blk = t->continue_from_then ? 1003bf215546Sopenharmony_ci nir_if_first_then_block(t->nif) : 1004bf215546Sopenharmony_ci nir_if_first_else_block(t->nif); 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci if (!(nir_cf_node_is_last(&first_continue_from_blk->cf_node) && 1007bf215546Sopenharmony_ci exec_list_is_empty(&first_continue_from_blk->instr_list))) 1008bf215546Sopenharmony_ci continue; 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_ci /* Now delete the if */ 1011bf215546Sopenharmony_ci nir_cf_node_remove(&t->nif->cf_node); 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci /* Also remove it from the terminator list */ 1014bf215546Sopenharmony_ci list_del(&t->loop_terminator_link); 1015bf215546Sopenharmony_ci 1016bf215546Sopenharmony_ci progress = true; 1017bf215546Sopenharmony_ci } 1018bf215546Sopenharmony_ci } 1019bf215546Sopenharmony_ci } 1020bf215546Sopenharmony_ci 1021bf215546Sopenharmony_ci /* Check for the classic 1022bf215546Sopenharmony_ci * 1023bf215546Sopenharmony_ci * do { 1024bf215546Sopenharmony_ci * // ... 1025bf215546Sopenharmony_ci * } while (false) 1026bf215546Sopenharmony_ci * 1027bf215546Sopenharmony_ci * that is used to wrap multi-line macros. GLSL IR also wraps switch 1028bf215546Sopenharmony_ci * statements in a loop like this. 1029bf215546Sopenharmony_ci */ 1030bf215546Sopenharmony_ci if (loop->info->limiting_terminator == NULL && 1031bf215546Sopenharmony_ci !loop->info->complex_loop) { 1032bf215546Sopenharmony_ci 1033bf215546Sopenharmony_ci nir_block *last_loop_blk = nir_loop_last_block(loop); 1034bf215546Sopenharmony_ci if (nir_block_ends_in_break(last_loop_blk)) { 1035bf215546Sopenharmony_ci progress = wrapper_unroll(loop); 1036bf215546Sopenharmony_ci goto exit; 1037bf215546Sopenharmony_ci } 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci /* If we were able to guess the loop iteration based on array access 1040bf215546Sopenharmony_ci * then do a partial unroll. 1041bf215546Sopenharmony_ci */ 1042bf215546Sopenharmony_ci unsigned num_lt = list_length(&loop->info->loop_terminator_list); 1043bf215546Sopenharmony_ci if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled && 1044bf215546Sopenharmony_ci loop->info->guessed_trip_count && 1045bf215546Sopenharmony_ci check_unrolling_restrictions(sh, loop)) { 1046bf215546Sopenharmony_ci partial_unroll(sh, loop, loop->info->guessed_trip_count); 1047bf215546Sopenharmony_ci progress = true; 1048bf215546Sopenharmony_ci } 1049bf215546Sopenharmony_ci } 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci /* Intentionally don't consider exact_trip_count_known here. When 1052bf215546Sopenharmony_ci * max_trip_count is non-zero, it is the upper bound on the number of 1053bf215546Sopenharmony_ci * times the loop will iterate, but the loop may iterate less. For 1054bf215546Sopenharmony_ci * example, the following loop will iterate 0 or 1 time: 1055bf215546Sopenharmony_ci * 1056bf215546Sopenharmony_ci * for (i = 0; i < min(x, 1); i++) { ... } 1057bf215546Sopenharmony_ci * 1058bf215546Sopenharmony_ci * Trivial single-interation loops (e.g., do { ... } while (false)) and 1059bf215546Sopenharmony_ci * trivial zero-iteration loops (e.g., while (false) { ... }) will have 1060bf215546Sopenharmony_ci * already been handled. 1061bf215546Sopenharmony_ci * 1062bf215546Sopenharmony_ci * If the loop is known to execute at most once and meets the other 1063bf215546Sopenharmony_ci * unrolling criteria, unroll it even if it has nested loops. 1064bf215546Sopenharmony_ci * 1065bf215546Sopenharmony_ci * It is unlikely that such loops exist in real shaders. GraphicsFuzz is 1066bf215546Sopenharmony_ci * known to generate spurious loops that iterate exactly once. It is 1067bf215546Sopenharmony_ci * plausible that it could eventually start generating loops like the 1068bf215546Sopenharmony_ci * example above, so it seems logical to defend against it now. 1069bf215546Sopenharmony_ci */ 1070bf215546Sopenharmony_ci if (!loop->info->limiting_terminator || 1071bf215546Sopenharmony_ci (loop->info->max_trip_count != 1 && has_nested_loop)) 1072bf215546Sopenharmony_ci goto exit; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci if (!check_unrolling_restrictions(sh, loop)) 1075bf215546Sopenharmony_ci goto exit; 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci if (loop->info->exact_trip_count_known) { 1078bf215546Sopenharmony_ci simple_unroll(loop); 1079bf215546Sopenharmony_ci progress = true; 1080bf215546Sopenharmony_ci } else { 1081bf215546Sopenharmony_ci /* Attempt to unroll loops with two terminators. */ 1082bf215546Sopenharmony_ci unsigned num_lt = list_length(&loop->info->loop_terminator_list); 1083bf215546Sopenharmony_ci if (num_lt == 2 && 1084bf215546Sopenharmony_ci !loop->info->limiting_terminator->exact_trip_count_unknown) { 1085bf215546Sopenharmony_ci bool limiting_term_second = true; 1086bf215546Sopenharmony_ci nir_loop_terminator *terminator = 1087bf215546Sopenharmony_ci list_first_entry(&loop->info->loop_terminator_list, 1088bf215546Sopenharmony_ci nir_loop_terminator, loop_terminator_link); 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci if (terminator->nif == loop->info->limiting_terminator->nif) { 1092bf215546Sopenharmony_ci limiting_term_second = false; 1093bf215546Sopenharmony_ci terminator = 1094bf215546Sopenharmony_ci list_last_entry(&loop->info->loop_terminator_list, 1095bf215546Sopenharmony_ci nir_loop_terminator, loop_terminator_link); 1096bf215546Sopenharmony_ci } 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci /* If the first terminator has a trip count of zero and is the 1099bf215546Sopenharmony_ci * limiting terminator just do a simple unroll as the second 1100bf215546Sopenharmony_ci * terminator can never be reached. 1101bf215546Sopenharmony_ci */ 1102bf215546Sopenharmony_ci if (loop->info->max_trip_count == 0 && !limiting_term_second) { 1103bf215546Sopenharmony_ci simple_unroll(loop); 1104bf215546Sopenharmony_ci } else { 1105bf215546Sopenharmony_ci complex_unroll(loop, terminator, limiting_term_second); 1106bf215546Sopenharmony_ci } 1107bf215546Sopenharmony_ci progress = true; 1108bf215546Sopenharmony_ci } 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_ci if (num_lt == 1) { 1111bf215546Sopenharmony_ci assert(loop->info->limiting_terminator->exact_trip_count_unknown); 1112bf215546Sopenharmony_ci complex_unroll_single_terminator(loop); 1113bf215546Sopenharmony_ci progress = true; 1114bf215546Sopenharmony_ci } 1115bf215546Sopenharmony_ci } 1116bf215546Sopenharmony_ci } 1117bf215546Sopenharmony_ci 1118bf215546Sopenharmony_ciexit: 1119bf215546Sopenharmony_ci *has_nested_loop_out = true; 1120bf215546Sopenharmony_ci if (progress && !unrolled_child_block) 1121bf215546Sopenharmony_ci *unrolled_this_block = true; 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci return progress; 1124bf215546Sopenharmony_ci} 1125bf215546Sopenharmony_ci 1126bf215546Sopenharmony_cistatic bool 1127bf215546Sopenharmony_cinir_opt_loop_unroll_impl(nir_function_impl *impl, 1128bf215546Sopenharmony_ci nir_variable_mode indirect_mask, 1129bf215546Sopenharmony_ci bool force_unroll_sampler_indirect) 1130bf215546Sopenharmony_ci{ 1131bf215546Sopenharmony_ci bool progress = false; 1132bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask, 1133bf215546Sopenharmony_ci (int) force_unroll_sampler_indirect); 1134bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_block_index); 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci bool has_nested_loop = false; 1137bf215546Sopenharmony_ci progress |= process_loops_in_block(impl->function->shader, &impl->body, 1138bf215546Sopenharmony_ci &has_nested_loop); 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci if (progress) { 1141bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_none); 1142bf215546Sopenharmony_ci nir_lower_regs_to_ssa_impl(impl); 1143bf215546Sopenharmony_ci } else { 1144bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 1145bf215546Sopenharmony_ci } 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci return progress; 1148bf215546Sopenharmony_ci} 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_ci/** 1151bf215546Sopenharmony_ci * indirect_mask specifies which type of indirectly accessed variables 1152bf215546Sopenharmony_ci * should force loop unrolling. 1153bf215546Sopenharmony_ci */ 1154bf215546Sopenharmony_cibool 1155bf215546Sopenharmony_cinir_opt_loop_unroll(nir_shader *shader) 1156bf215546Sopenharmony_ci{ 1157bf215546Sopenharmony_ci bool progress = false; 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci bool force_unroll_sampler_indirect = shader->options->force_indirect_unrolling_sampler; 1160bf215546Sopenharmony_ci nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling; 1161bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 1162bf215546Sopenharmony_ci if (function->impl) { 1163bf215546Sopenharmony_ci progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask, 1164bf215546Sopenharmony_ci force_unroll_sampler_indirect); 1165bf215546Sopenharmony_ci } 1166bf215546Sopenharmony_ci } 1167bf215546Sopenharmony_ci return progress; 1168bf215546Sopenharmony_ci} 1169