1/*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_shader.h"
25
26using namespace brw;
27
28/** @file brw_predicated_break.cpp
29 *
30 * Loops are often structured as
31 *
32 * loop:
33 *    CMP.f0
34 *    (+f0) IF
35 *    BREAK
36 *    ENDIF
37 *    ...
38 *    WHILE loop
39 *
40 * This peephole pass removes the IF and ENDIF instructions and predicates the
41 * BREAK, dropping two instructions from the loop body.
42 *
43 * If the loop was a DO { ... } WHILE loop, it looks like
44 *
45 * loop:
46 *    ...
47 *    CMP.f0
48 *    (+f0) IF
49 *    BREAK
50 *    ENDIF
51 *    WHILE loop
52 *
53 * and we can remove the BREAK instruction and predicate the WHILE.
54 */
55
56#define MAX_NESTING 128
57
58struct loop_continue_tracking {
59   BITSET_WORD has_continue[BITSET_WORDS(MAX_NESTING)];
60   unsigned depth;
61};
62
63static void
64enter_loop(struct loop_continue_tracking *s)
65{
66   s->depth++;
67
68   /* Any loops deeper than that maximum nesting will just re-use the last
69    * flag.  This simplifies most of the code.  MAX_NESTING is chosen to be
70    * large enough that it is unlikely to occur.  Even if it does, the
71    * optimization that uses this tracking is unlikely to make much
72    * difference.
73    */
74   if (s->depth < MAX_NESTING)
75      BITSET_CLEAR(s->has_continue, s->depth);
76}
77
78static void
79exit_loop(struct loop_continue_tracking *s)
80{
81   assert(s->depth > 0);
82   s->depth--;
83}
84
85static void
86set_continue(struct loop_continue_tracking *s)
87{
88   const unsigned i = MIN2(s->depth, MAX_NESTING - 1);
89
90   BITSET_SET(s->has_continue, i);
91}
92
93static bool
94has_continue(const struct loop_continue_tracking *s)
95{
96   const unsigned i = MIN2(s->depth, MAX_NESTING - 1);
97
98   return BITSET_TEST(s->has_continue, i);
99}
100
101bool
102opt_predicated_break(backend_shader *s)
103{
104   bool progress = false;
105   struct loop_continue_tracking state = { {0, }, 0 };
106
107   foreach_block (block, s->cfg) {
108      /* DO instructions, by definition, can only be found at the beginning of
109       * basic blocks.
110       */
111      backend_instruction *const do_inst = block->start();
112
113      /* BREAK, CONTINUE, and WHILE instructions, by definition, can only be
114       * found at the ends of basic blocks.
115       */
116      backend_instruction *jump_inst = block->end();
117
118      if (do_inst->opcode == BRW_OPCODE_DO)
119         enter_loop(&state);
120
121      if (jump_inst->opcode == BRW_OPCODE_CONTINUE)
122         set_continue(&state);
123      else if (jump_inst->opcode == BRW_OPCODE_WHILE)
124         exit_loop(&state);
125
126      if (block->start_ip != block->end_ip)
127         continue;
128
129      if (jump_inst->opcode != BRW_OPCODE_BREAK &&
130          jump_inst->opcode != BRW_OPCODE_CONTINUE)
131         continue;
132
133      backend_instruction *if_inst = block->prev()->end();
134      if (if_inst->opcode != BRW_OPCODE_IF)
135         continue;
136
137      backend_instruction *endif_inst = block->next()->start();
138      if (endif_inst->opcode != BRW_OPCODE_ENDIF)
139         continue;
140
141      bblock_t *jump_block = block;
142      bblock_t *if_block = jump_block->prev();
143      bblock_t *endif_block = jump_block->next();
144
145      jump_inst->predicate = if_inst->predicate;
146      jump_inst->predicate_inverse = if_inst->predicate_inverse;
147
148      bblock_t *earlier_block = if_block;
149      if (if_block->start_ip == if_block->end_ip) {
150         earlier_block = if_block->prev();
151      }
152
153      if_inst->remove(if_block);
154
155      bblock_t *later_block = endif_block;
156      if (endif_block->start_ip == endif_block->end_ip) {
157         later_block = endif_block->next();
158      }
159      endif_inst->remove(endif_block);
160
161      if (!earlier_block->ends_with_control_flow()) {
162         earlier_block->children.make_empty();
163         earlier_block->add_successor(s->cfg->mem_ctx, jump_block,
164                                      bblock_link_logical);
165      }
166
167      if (!later_block->starts_with_control_flow()) {
168         later_block->parents.make_empty();
169      }
170      jump_block->add_successor(s->cfg->mem_ctx, later_block,
171                                bblock_link_logical);
172
173      if (earlier_block->can_combine_with(jump_block)) {
174         earlier_block->combine_with(jump_block);
175
176         block = earlier_block;
177      }
178
179      /* Now look at the first instruction of the block following the BREAK. If
180       * it's a WHILE, we can delete the break, predicate the WHILE, and join
181       * the two basic blocks.
182       *
183       * This optimization can only be applied if the only instruction that
184       * can transfer control to the WHILE is the BREAK.  If other paths can
185       * lead to the while, the flags may be in an unknown state, and the loop
186       * could terminate prematurely.  This can occur if the loop contains a
187       * CONT instruction.
188       */
189      bblock_t *while_block = earlier_block->next();
190      backend_instruction *while_inst = while_block->start();
191
192      if (jump_inst->opcode == BRW_OPCODE_BREAK &&
193          while_inst->opcode == BRW_OPCODE_WHILE &&
194          while_inst->predicate == BRW_PREDICATE_NONE &&
195          !has_continue(&state)) {
196         jump_inst->remove(earlier_block);
197         while_inst->predicate = jump_inst->predicate;
198         while_inst->predicate_inverse = !jump_inst->predicate_inverse;
199
200         assert(earlier_block->can_combine_with(while_block));
201         earlier_block->combine_with(while_block);
202      }
203
204      progress = true;
205   }
206
207   if (progress)
208      s->invalidate_analysis(DEPENDENCY_BLOCKS | DEPENDENCY_INSTRUCTIONS);
209
210   return progress;
211}
212