1/* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_shader.h" 25 26using namespace brw; 27 28/** @file brw_predicated_break.cpp 29 * 30 * Loops are often structured as 31 * 32 * loop: 33 * CMP.f0 34 * (+f0) IF 35 * BREAK 36 * ENDIF 37 * ... 38 * WHILE loop 39 * 40 * This peephole pass removes the IF and ENDIF instructions and predicates the 41 * BREAK, dropping two instructions from the loop body. 42 * 43 * If the loop was a DO { ... } WHILE loop, it looks like 44 * 45 * loop: 46 * ... 47 * CMP.f0 48 * (+f0) IF 49 * BREAK 50 * ENDIF 51 * WHILE loop 52 * 53 * and we can remove the BREAK instruction and predicate the WHILE. 54 */ 55 56#define MAX_NESTING 128 57 58struct loop_continue_tracking { 59 BITSET_WORD has_continue[BITSET_WORDS(MAX_NESTING)]; 60 unsigned depth; 61}; 62 63static void 64enter_loop(struct loop_continue_tracking *s) 65{ 66 s->depth++; 67 68 /* Any loops deeper than that maximum nesting will just re-use the last 69 * flag. This simplifies most of the code. MAX_NESTING is chosen to be 70 * large enough that it is unlikely to occur. Even if it does, the 71 * optimization that uses this tracking is unlikely to make much 72 * difference. 73 */ 74 if (s->depth < MAX_NESTING) 75 BITSET_CLEAR(s->has_continue, s->depth); 76} 77 78static void 79exit_loop(struct loop_continue_tracking *s) 80{ 81 assert(s->depth > 0); 82 s->depth--; 83} 84 85static void 86set_continue(struct loop_continue_tracking *s) 87{ 88 const unsigned i = MIN2(s->depth, MAX_NESTING - 1); 89 90 BITSET_SET(s->has_continue, i); 91} 92 93static bool 94has_continue(const struct loop_continue_tracking *s) 95{ 96 const unsigned i = MIN2(s->depth, MAX_NESTING - 1); 97 98 return BITSET_TEST(s->has_continue, i); 99} 100 101bool 102opt_predicated_break(backend_shader *s) 103{ 104 bool progress = false; 105 struct loop_continue_tracking state = { {0, }, 0 }; 106 107 foreach_block (block, s->cfg) { 108 /* DO instructions, by definition, can only be found at the beginning of 109 * basic blocks. 110 */ 111 backend_instruction *const do_inst = block->start(); 112 113 /* BREAK, CONTINUE, and WHILE instructions, by definition, can only be 114 * found at the ends of basic blocks. 115 */ 116 backend_instruction *jump_inst = block->end(); 117 118 if (do_inst->opcode == BRW_OPCODE_DO) 119 enter_loop(&state); 120 121 if (jump_inst->opcode == BRW_OPCODE_CONTINUE) 122 set_continue(&state); 123 else if (jump_inst->opcode == BRW_OPCODE_WHILE) 124 exit_loop(&state); 125 126 if (block->start_ip != block->end_ip) 127 continue; 128 129 if (jump_inst->opcode != BRW_OPCODE_BREAK && 130 jump_inst->opcode != BRW_OPCODE_CONTINUE) 131 continue; 132 133 backend_instruction *if_inst = block->prev()->end(); 134 if (if_inst->opcode != BRW_OPCODE_IF) 135 continue; 136 137 backend_instruction *endif_inst = block->next()->start(); 138 if (endif_inst->opcode != BRW_OPCODE_ENDIF) 139 continue; 140 141 bblock_t *jump_block = block; 142 bblock_t *if_block = jump_block->prev(); 143 bblock_t *endif_block = jump_block->next(); 144 145 jump_inst->predicate = if_inst->predicate; 146 jump_inst->predicate_inverse = if_inst->predicate_inverse; 147 148 bblock_t *earlier_block = if_block; 149 if (if_block->start_ip == if_block->end_ip) { 150 earlier_block = if_block->prev(); 151 } 152 153 if_inst->remove(if_block); 154 155 bblock_t *later_block = endif_block; 156 if (endif_block->start_ip == endif_block->end_ip) { 157 later_block = endif_block->next(); 158 } 159 endif_inst->remove(endif_block); 160 161 if (!earlier_block->ends_with_control_flow()) { 162 earlier_block->children.make_empty(); 163 earlier_block->add_successor(s->cfg->mem_ctx, jump_block, 164 bblock_link_logical); 165 } 166 167 if (!later_block->starts_with_control_flow()) { 168 later_block->parents.make_empty(); 169 } 170 jump_block->add_successor(s->cfg->mem_ctx, later_block, 171 bblock_link_logical); 172 173 if (earlier_block->can_combine_with(jump_block)) { 174 earlier_block->combine_with(jump_block); 175 176 block = earlier_block; 177 } 178 179 /* Now look at the first instruction of the block following the BREAK. If 180 * it's a WHILE, we can delete the break, predicate the WHILE, and join 181 * the two basic blocks. 182 * 183 * This optimization can only be applied if the only instruction that 184 * can transfer control to the WHILE is the BREAK. If other paths can 185 * lead to the while, the flags may be in an unknown state, and the loop 186 * could terminate prematurely. This can occur if the loop contains a 187 * CONT instruction. 188 */ 189 bblock_t *while_block = earlier_block->next(); 190 backend_instruction *while_inst = while_block->start(); 191 192 if (jump_inst->opcode == BRW_OPCODE_BREAK && 193 while_inst->opcode == BRW_OPCODE_WHILE && 194 while_inst->predicate == BRW_PREDICATE_NONE && 195 !has_continue(&state)) { 196 jump_inst->remove(earlier_block); 197 while_inst->predicate = jump_inst->predicate; 198 while_inst->predicate_inverse = !jump_inst->predicate_inverse; 199 200 assert(earlier_block->can_combine_with(while_block)); 201 earlier_block->combine_with(while_block); 202 } 203 204 progress = true; 205 } 206 207 if (progress) 208 s->invalidate_analysis(DEPENDENCY_BLOCKS | DEPENDENCY_INSTRUCTIONS); 209 210 return progress; 211} 212