1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir.h"
25bf215546Sopenharmony_ci#include "nir_builder.h"
26bf215546Sopenharmony_ci#include "nir_control_flow.h"
27bf215546Sopenharmony_ci#include "nir_loop_analyze.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci/* This limit is chosen fairly arbitrarily.  GLSL IR max iteration is 32
31bf215546Sopenharmony_ci * instructions. (Multiply counting nodes and magic number 5.)  But there is
32bf215546Sopenharmony_ci * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
33bf215546Sopenharmony_ci * to give about the same results. Around 5 instructions per node.  But some
34bf215546Sopenharmony_ci * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
35bf215546Sopenharmony_ci * we set it to 26.
36bf215546Sopenharmony_ci */
37bf215546Sopenharmony_ci#define LOOP_UNROLL_LIMIT 26
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci/* Prepare this loop for unrolling by first converting to lcssa and then
40bf215546Sopenharmony_ci * converting the phis from the top level of the loop body to regs.
41bf215546Sopenharmony_ci * Partially converting out of SSA allows us to unroll the loop without having
42bf215546Sopenharmony_ci * to keep track of and update phis along the way which gets tricky and
43bf215546Sopenharmony_ci * doesn't add much value over converting to regs.
44bf215546Sopenharmony_ci *
45bf215546Sopenharmony_ci * The loop may have a jump instruction at the end of the loop which does
46bf215546Sopenharmony_ci * nothing.  Once we're out of SSA, we can safely delete it so we don't have
47bf215546Sopenharmony_ci * to deal with it later.
48bf215546Sopenharmony_ci */
49bf215546Sopenharmony_cistatic void
50bf215546Sopenharmony_ciloop_prepare_for_unroll(nir_loop *loop)
51bf215546Sopenharmony_ci{
52bf215546Sopenharmony_ci   nir_rematerialize_derefs_in_use_blocks_impl(
53bf215546Sopenharmony_ci      nir_cf_node_get_function(&loop->cf_node));
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   nir_convert_loop_to_lcssa(loop);
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   /* Lower phis at the top level of the loop body */
58bf215546Sopenharmony_ci   foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
59bf215546Sopenharmony_ci      if (nir_cf_node_block == node->type) {
60bf215546Sopenharmony_ci         nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
61bf215546Sopenharmony_ci      }
62bf215546Sopenharmony_ci   }
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   /* Lower phis after the loop */
65bf215546Sopenharmony_ci   nir_block *block_after_loop =
66bf215546Sopenharmony_ci      nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   nir_lower_phis_to_regs_block(block_after_loop);
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci   /* Remove jump if it's the last instruction in the loop */
71bf215546Sopenharmony_ci   nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
72bf215546Sopenharmony_ci   if (last_instr && last_instr->type == nir_instr_type_jump) {
73bf215546Sopenharmony_ci      nir_instr_remove(last_instr);
74bf215546Sopenharmony_ci   }
75bf215546Sopenharmony_ci}
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_cistatic void
78bf215546Sopenharmony_ciget_first_blocks_in_terminator(nir_loop_terminator *term,
79bf215546Sopenharmony_ci                               nir_block **first_break_block,
80bf215546Sopenharmony_ci                               nir_block **first_continue_block)
81bf215546Sopenharmony_ci{
82bf215546Sopenharmony_ci   if (term->continue_from_then) {
83bf215546Sopenharmony_ci      *first_continue_block = nir_if_first_then_block(term->nif);
84bf215546Sopenharmony_ci      *first_break_block = nir_if_first_else_block(term->nif);
85bf215546Sopenharmony_ci   } else {
86bf215546Sopenharmony_ci      *first_continue_block = nir_if_first_else_block(term->nif);
87bf215546Sopenharmony_ci      *first_break_block = nir_if_first_then_block(term->nif);
88bf215546Sopenharmony_ci   }
89bf215546Sopenharmony_ci}
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci/**
92bf215546Sopenharmony_ci * Unroll a loop where we know exactly how many iterations there are and there
93bf215546Sopenharmony_ci * is only a single exit point.  Note here we can unroll loops with multiple
94bf215546Sopenharmony_ci * theoretical exits that only have a single terminating exit that we always
95bf215546Sopenharmony_ci * know is the "real" exit.
96bf215546Sopenharmony_ci *
97bf215546Sopenharmony_ci *     loop {
98bf215546Sopenharmony_ci *         ...instrs...
99bf215546Sopenharmony_ci *     }
100bf215546Sopenharmony_ci *
101bf215546Sopenharmony_ci * And the iteration count is 3, the output will be:
102bf215546Sopenharmony_ci *
103bf215546Sopenharmony_ci *     ...instrs... ...instrs... ...instrs...
104bf215546Sopenharmony_ci */
105bf215546Sopenharmony_cistatic void
106bf215546Sopenharmony_cisimple_unroll(nir_loop *loop)
107bf215546Sopenharmony_ci{
108bf215546Sopenharmony_ci   nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
109bf215546Sopenharmony_ci   assert(nir_is_trivial_loop_if(limiting_term->nif,
110bf215546Sopenharmony_ci                                 limiting_term->break_block));
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   loop_prepare_for_unroll(loop);
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci   /* Skip over loop terminator and get the loop body. */
115bf215546Sopenharmony_ci   list_for_each_entry(nir_loop_terminator, terminator,
116bf215546Sopenharmony_ci                       &loop->info->loop_terminator_list,
117bf215546Sopenharmony_ci                       loop_terminator_link) {
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci      /* Remove all but the limiting terminator as we know the other exit
120bf215546Sopenharmony_ci       * conditions can never be met. Note we need to extract any instructions
121bf215546Sopenharmony_ci       * in the continue from branch and insert then into the loop body before
122bf215546Sopenharmony_ci       * removing it.
123bf215546Sopenharmony_ci       */
124bf215546Sopenharmony_ci      if (terminator->nif != limiting_term->nif) {
125bf215546Sopenharmony_ci         nir_block *first_break_block;
126bf215546Sopenharmony_ci         nir_block *first_continue_block;
127bf215546Sopenharmony_ci         get_first_blocks_in_terminator(terminator, &first_break_block,
128bf215546Sopenharmony_ci                                        &first_continue_block);
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci         assert(nir_is_trivial_loop_if(terminator->nif,
131bf215546Sopenharmony_ci                                       terminator->break_block));
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci         nir_cf_list continue_from_lst;
134bf215546Sopenharmony_ci         nir_cf_extract(&continue_from_lst,
135bf215546Sopenharmony_ci                        nir_before_block(first_continue_block),
136bf215546Sopenharmony_ci                        nir_after_block(terminator->continue_from_block));
137bf215546Sopenharmony_ci         nir_cf_reinsert(&continue_from_lst,
138bf215546Sopenharmony_ci                         nir_after_cf_node(&terminator->nif->cf_node));
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci         nir_cf_node_remove(&terminator->nif->cf_node);
141bf215546Sopenharmony_ci      }
142bf215546Sopenharmony_ci   }
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci   nir_block *first_break_block;
145bf215546Sopenharmony_ci   nir_block *first_continue_block;
146bf215546Sopenharmony_ci   get_first_blocks_in_terminator(limiting_term, &first_break_block,
147bf215546Sopenharmony_ci                                  &first_continue_block);
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci   /* Pluck out the loop header */
150bf215546Sopenharmony_ci   nir_block *header_blk = nir_loop_first_block(loop);
151bf215546Sopenharmony_ci   nir_cf_list lp_header;
152bf215546Sopenharmony_ci   nir_cf_extract(&lp_header, nir_before_block(header_blk),
153bf215546Sopenharmony_ci                  nir_before_cf_node(&limiting_term->nif->cf_node));
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   /* Add the continue from block of the limiting terminator to the loop body
156bf215546Sopenharmony_ci    */
157bf215546Sopenharmony_ci   nir_cf_list continue_from_lst;
158bf215546Sopenharmony_ci   nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
159bf215546Sopenharmony_ci                  nir_after_block(limiting_term->continue_from_block));
160bf215546Sopenharmony_ci   nir_cf_reinsert(&continue_from_lst,
161bf215546Sopenharmony_ci                   nir_after_cf_node(&limiting_term->nif->cf_node));
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   /* Pluck out the loop body */
164bf215546Sopenharmony_ci   nir_cf_list loop_body;
165bf215546Sopenharmony_ci   nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
166bf215546Sopenharmony_ci                  nir_after_block(nir_loop_last_block(loop)));
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci   /* Clone the loop header and insert before the loop */
171bf215546Sopenharmony_ci   nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
172bf215546Sopenharmony_ci                                  nir_before_cf_node(&loop->cf_node),
173bf215546Sopenharmony_ci                                  remap_table);
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
176bf215546Sopenharmony_ci      /* Clone loop body and insert before the loop */
177bf215546Sopenharmony_ci      nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
178bf215546Sopenharmony_ci                                     nir_before_cf_node(&loop->cf_node),
179bf215546Sopenharmony_ci                                     remap_table);
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci      /* Clone loop header and insert after loop body */
182bf215546Sopenharmony_ci      nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
183bf215546Sopenharmony_ci                                     nir_before_cf_node(&loop->cf_node),
184bf215546Sopenharmony_ci                                     remap_table);
185bf215546Sopenharmony_ci   }
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   /* Remove the break from the loop terminator and add instructions from
188bf215546Sopenharmony_ci    * the break block after the unrolled loop.
189bf215546Sopenharmony_ci    */
190bf215546Sopenharmony_ci   nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
191bf215546Sopenharmony_ci   nir_instr_remove(break_instr);
192bf215546Sopenharmony_ci   nir_cf_list break_list;
193bf215546Sopenharmony_ci   nir_cf_extract(&break_list, nir_before_block(first_break_block),
194bf215546Sopenharmony_ci                  nir_after_block(limiting_term->break_block));
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   /* Clone so things get properly remapped */
197bf215546Sopenharmony_ci   nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
198bf215546Sopenharmony_ci                                  nir_before_cf_node(&loop->cf_node),
199bf215546Sopenharmony_ci                                  remap_table);
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   /* Remove the loop */
202bf215546Sopenharmony_ci   nir_cf_node_remove(&loop->cf_node);
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci   /* Delete the original loop body, break block & header */
205bf215546Sopenharmony_ci   nir_cf_delete(&lp_header);
206bf215546Sopenharmony_ci   nir_cf_delete(&loop_body);
207bf215546Sopenharmony_ci   nir_cf_delete(&break_list);
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   _mesa_hash_table_destroy(remap_table, NULL);
210bf215546Sopenharmony_ci}
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_cistatic void
213bf215546Sopenharmony_cimove_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
214bf215546Sopenharmony_ci{
215bf215546Sopenharmony_ci   /* Move the rest of the loop inside the continue-from-block */
216bf215546Sopenharmony_ci   nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci   /* Remove the break */
219bf215546Sopenharmony_ci   nir_instr_remove(nir_block_last_instr(term->break_block));
220bf215546Sopenharmony_ci}
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_cistatic nir_cursor
223bf215546Sopenharmony_ciget_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
224bf215546Sopenharmony_ci{
225bf215546Sopenharmony_ci   if (node->type == nir_cf_node_loop) {
226bf215546Sopenharmony_ci      return nir_before_cf_node(node);
227bf215546Sopenharmony_ci   } else {
228bf215546Sopenharmony_ci      nir_if *if_stmt = nir_cf_node_as_if(node);
229bf215546Sopenharmony_ci      if (continue_from_then) {
230bf215546Sopenharmony_ci         return nir_after_block(nir_if_last_then_block(if_stmt));
231bf215546Sopenharmony_ci      } else {
232bf215546Sopenharmony_ci         return nir_after_block(nir_if_last_else_block(if_stmt));
233bf215546Sopenharmony_ci      }
234bf215546Sopenharmony_ci   }
235bf215546Sopenharmony_ci}
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_cistatic nir_cf_node *
238bf215546Sopenharmony_cicomplex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
239bf215546Sopenharmony_ci                         nir_cf_list *lp_header, nir_cf_list *lp_body,
240bf215546Sopenharmony_ci                         struct hash_table *remap_table,
241bf215546Sopenharmony_ci                         unsigned num_times_to_clone)
242bf215546Sopenharmony_ci{
243bf215546Sopenharmony_ci   /* In the terminator that we have no trip count for move everything after
244bf215546Sopenharmony_ci    * the terminator into the continue from branch.
245bf215546Sopenharmony_ci    */
246bf215546Sopenharmony_ci   nir_cf_list loop_end;
247bf215546Sopenharmony_ci   nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
248bf215546Sopenharmony_ci                  nir_after_block(nir_loop_last_block(loop)));
249bf215546Sopenharmony_ci   move_cf_list_into_loop_term(&loop_end, unlimit_term);
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   /* Pluck out the loop body. */
252bf215546Sopenharmony_ci   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
253bf215546Sopenharmony_ci                  nir_after_block(nir_loop_last_block(loop)));
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
256bf215546Sopenharmony_ci    */
257bf215546Sopenharmony_ci   nir_cf_node *unroll_loc = &loop->cf_node;
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   /* Temp list to store the cloned loop as we unroll */
260bf215546Sopenharmony_ci   nir_cf_list unrolled_lp_body;
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_times_to_clone; i++) {
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci      nir_cursor cursor =
265bf215546Sopenharmony_ci         get_complex_unroll_insert_location(unroll_loc,
266bf215546Sopenharmony_ci                                            unlimit_term->continue_from_then);
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci      /* Clone loop header and insert in if branch */
269bf215546Sopenharmony_ci      nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
270bf215546Sopenharmony_ci                                     cursor, remap_table);
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci      cursor =
273bf215546Sopenharmony_ci         get_complex_unroll_insert_location(unroll_loc,
274bf215546Sopenharmony_ci                                            unlimit_term->continue_from_then);
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci      /* Clone loop body */
277bf215546Sopenharmony_ci      nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
278bf215546Sopenharmony_ci                        remap_table);
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci      unroll_loc = exec_node_data(nir_cf_node,
281bf215546Sopenharmony_ci                                  exec_list_get_tail(&unrolled_lp_body.list),
282bf215546Sopenharmony_ci                                  node);
283bf215546Sopenharmony_ci      assert(unroll_loc->type == nir_cf_node_block &&
284bf215546Sopenharmony_ci             exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci      /* Get the unrolled if node */
287bf215546Sopenharmony_ci      unroll_loc = nir_cf_node_prev(unroll_loc);
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci      /* Insert unrolled loop body */
290bf215546Sopenharmony_ci      nir_cf_reinsert(&unrolled_lp_body, cursor);
291bf215546Sopenharmony_ci   }
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   return unroll_loc;
294bf215546Sopenharmony_ci}
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci/**
297bf215546Sopenharmony_ci * Unroll a loop with two exists when the trip count of one of the exits is
298bf215546Sopenharmony_ci * unknown.  If continue_from_then is true, the loop is repeated only when the
299bf215546Sopenharmony_ci * "then" branch of the if is taken; otherwise it is repeated only
300bf215546Sopenharmony_ci * when the "else" branch of the if is taken.
301bf215546Sopenharmony_ci *
302bf215546Sopenharmony_ci * For example, if the input is:
303bf215546Sopenharmony_ci *
304bf215546Sopenharmony_ci *      loop {
305bf215546Sopenharmony_ci *         ...phis/condition...
306bf215546Sopenharmony_ci *         if condition {
307bf215546Sopenharmony_ci *            ...then instructions...
308bf215546Sopenharmony_ci *         } else {
309bf215546Sopenharmony_ci *            ...continue instructions...
310bf215546Sopenharmony_ci *            break
311bf215546Sopenharmony_ci *         }
312bf215546Sopenharmony_ci *         ...body...
313bf215546Sopenharmony_ci *      }
314bf215546Sopenharmony_ci *
315bf215546Sopenharmony_ci * And the iteration count is 3, and unlimit_term->continue_from_then is true,
316bf215546Sopenharmony_ci * then the output will be:
317bf215546Sopenharmony_ci *
318bf215546Sopenharmony_ci *      ...condition...
319bf215546Sopenharmony_ci *      if condition {
320bf215546Sopenharmony_ci *         ...then instructions...
321bf215546Sopenharmony_ci *         ...body...
322bf215546Sopenharmony_ci *         if condition {
323bf215546Sopenharmony_ci *            ...then instructions...
324bf215546Sopenharmony_ci *            ...body...
325bf215546Sopenharmony_ci *            if condition {
326bf215546Sopenharmony_ci *               ...then instructions...
327bf215546Sopenharmony_ci *               ...body...
328bf215546Sopenharmony_ci *            } else {
329bf215546Sopenharmony_ci *               ...continue instructions...
330bf215546Sopenharmony_ci *            }
331bf215546Sopenharmony_ci *         } else {
332bf215546Sopenharmony_ci *            ...continue instructions...
333bf215546Sopenharmony_ci *         }
334bf215546Sopenharmony_ci *      } else {
335bf215546Sopenharmony_ci *         ...continue instructions...
336bf215546Sopenharmony_ci *      }
337bf215546Sopenharmony_ci */
338bf215546Sopenharmony_cistatic void
339bf215546Sopenharmony_cicomplex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
340bf215546Sopenharmony_ci               bool limiting_term_second)
341bf215546Sopenharmony_ci{
342bf215546Sopenharmony_ci   assert(nir_is_trivial_loop_if(unlimit_term->nif,
343bf215546Sopenharmony_ci                                 unlimit_term->break_block));
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci   nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
346bf215546Sopenharmony_ci   assert(nir_is_trivial_loop_if(limiting_term->nif,
347bf215546Sopenharmony_ci                                 limiting_term->break_block));
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   loop_prepare_for_unroll(loop);
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci   nir_block *header_blk = nir_loop_first_block(loop);
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   nir_cf_list lp_header;
354bf215546Sopenharmony_ci   nir_cf_list limit_break_list;
355bf215546Sopenharmony_ci   unsigned num_times_to_clone;
356bf215546Sopenharmony_ci   if (limiting_term_second) {
357bf215546Sopenharmony_ci      /* Pluck out the loop header */
358bf215546Sopenharmony_ci      nir_cf_extract(&lp_header, nir_before_block(header_blk),
359bf215546Sopenharmony_ci                     nir_before_cf_node(&unlimit_term->nif->cf_node));
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci      /* We need some special handling when its the second terminator causing
362bf215546Sopenharmony_ci       * us to exit the loop for example:
363bf215546Sopenharmony_ci       *
364bf215546Sopenharmony_ci       *   for (int i = 0; i < uniform_lp_count; i++) {
365bf215546Sopenharmony_ci       *      colour = vec4(0.0, 1.0, 0.0, 1.0);
366bf215546Sopenharmony_ci       *
367bf215546Sopenharmony_ci       *      if (i == 1) {
368bf215546Sopenharmony_ci       *         break;
369bf215546Sopenharmony_ci       *      }
370bf215546Sopenharmony_ci       *      ... any further code is unreachable after i == 1 ...
371bf215546Sopenharmony_ci       *   }
372bf215546Sopenharmony_ci       */
373bf215546Sopenharmony_ci      nir_cf_list after_lt;
374bf215546Sopenharmony_ci      nir_if *limit_if = limiting_term->nif;
375bf215546Sopenharmony_ci      nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
376bf215546Sopenharmony_ci                     nir_after_block(nir_loop_last_block(loop)));
377bf215546Sopenharmony_ci      move_cf_list_into_loop_term(&after_lt, limiting_term);
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci      /* Because the trip count is the number of times we pass over the entire
380bf215546Sopenharmony_ci       * loop before hitting a break when the second terminator is the
381bf215546Sopenharmony_ci       * limiting terminator we can actually execute code inside the loop when
382bf215546Sopenharmony_ci       * trip count == 0 e.g. the code above the break.  So we need to bump
383bf215546Sopenharmony_ci       * the trip_count in order for the code below to clone anything.  When
384bf215546Sopenharmony_ci       * trip count == 1 we execute the code above the break twice and the
385bf215546Sopenharmony_ci       * code below it once so we need clone things twice and so on.
386bf215546Sopenharmony_ci       */
387bf215546Sopenharmony_ci      num_times_to_clone = loop->info->max_trip_count + 1;
388bf215546Sopenharmony_ci   } else {
389bf215546Sopenharmony_ci      /* Pluck out the loop header */
390bf215546Sopenharmony_ci      nir_cf_extract(&lp_header, nir_before_block(header_blk),
391bf215546Sopenharmony_ci                     nir_before_cf_node(&limiting_term->nif->cf_node));
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci      nir_block *first_break_block;
394bf215546Sopenharmony_ci      nir_block *first_continue_block;
395bf215546Sopenharmony_ci      get_first_blocks_in_terminator(limiting_term, &first_break_block,
396bf215546Sopenharmony_ci                                     &first_continue_block);
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci      /* Remove the break then extract instructions from the break block so we
399bf215546Sopenharmony_ci       * can insert them in the innermost else of the unrolled loop.
400bf215546Sopenharmony_ci       */
401bf215546Sopenharmony_ci      nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
402bf215546Sopenharmony_ci      nir_instr_remove(break_instr);
403bf215546Sopenharmony_ci      nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
404bf215546Sopenharmony_ci                     nir_after_block(limiting_term->break_block));
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci      nir_cf_list continue_list;
407bf215546Sopenharmony_ci      nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
408bf215546Sopenharmony_ci                     nir_after_block(limiting_term->continue_from_block));
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci      nir_cf_reinsert(&continue_list,
411bf215546Sopenharmony_ci                      nir_after_cf_node(&limiting_term->nif->cf_node));
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci      nir_cf_node_remove(&limiting_term->nif->cf_node);
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci      num_times_to_clone = loop->info->max_trip_count;
416bf215546Sopenharmony_ci   }
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci   struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   nir_cf_list lp_body;
421bf215546Sopenharmony_ci   nir_cf_node *unroll_loc =
422bf215546Sopenharmony_ci      complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
423bf215546Sopenharmony_ci                               remap_table, num_times_to_clone);
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   if (!limiting_term_second) {
426bf215546Sopenharmony_ci      assert(unroll_loc->type == nir_cf_node_if);
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci      nir_cursor cursor =
429bf215546Sopenharmony_ci         get_complex_unroll_insert_location(unroll_loc,
430bf215546Sopenharmony_ci                                            unlimit_term->continue_from_then);
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci      /* Clone loop header and insert in if branch */
433bf215546Sopenharmony_ci      nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
434bf215546Sopenharmony_ci                                     cursor, remap_table);
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci      cursor =
437bf215546Sopenharmony_ci         get_complex_unroll_insert_location(unroll_loc,
438bf215546Sopenharmony_ci                                            unlimit_term->continue_from_then);
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci      /* Clone so things get properly remapped, and insert break block from
441bf215546Sopenharmony_ci       * the limiting terminator.
442bf215546Sopenharmony_ci       */
443bf215546Sopenharmony_ci      nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
444bf215546Sopenharmony_ci                                     cursor, remap_table);
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci      nir_cf_delete(&limit_break_list);
447bf215546Sopenharmony_ci   }
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci   /* The loop has been unrolled so remove it. */
450bf215546Sopenharmony_ci   nir_cf_node_remove(&loop->cf_node);
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci   /* Delete the original loop header and body */
453bf215546Sopenharmony_ci   nir_cf_delete(&lp_header);
454bf215546Sopenharmony_ci   nir_cf_delete(&lp_body);
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci   _mesa_hash_table_destroy(remap_table, NULL);
457bf215546Sopenharmony_ci}
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci/**
460bf215546Sopenharmony_ci * Unroll loops where we only have a single terminator but the exact trip
461bf215546Sopenharmony_ci * count is unknown. For example:
462bf215546Sopenharmony_ci *
463bf215546Sopenharmony_ci *    for (int i = 0; i < imin(x, 4); i++)
464bf215546Sopenharmony_ci *       ...
465bf215546Sopenharmony_ci */
466bf215546Sopenharmony_cistatic void
467bf215546Sopenharmony_cicomplex_unroll_single_terminator(nir_loop *loop)
468bf215546Sopenharmony_ci{
469bf215546Sopenharmony_ci   assert(list_length(&loop->info->loop_terminator_list) == 1);
470bf215546Sopenharmony_ci   assert(loop->info->limiting_terminator);
471bf215546Sopenharmony_ci   assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
472bf215546Sopenharmony_ci                                 loop->info->limiting_terminator->break_block));
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci   nir_loop_terminator *terminator = loop->info->limiting_terminator;
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci   loop_prepare_for_unroll(loop);
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci   /* Pluck out the loop header */
479bf215546Sopenharmony_ci   nir_cf_list lp_header;
480bf215546Sopenharmony_ci   nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
481bf215546Sopenharmony_ci                  nir_before_cf_node(&terminator->nif->cf_node));
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_ci   struct hash_table *remap_table =
484bf215546Sopenharmony_ci      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
485bf215546Sopenharmony_ci                              _mesa_key_pointer_equal);
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   /* We need to clone the loop one extra time in order to clone the lcssa
488bf215546Sopenharmony_ci    * vars for the last iteration (they are inside the following ifs break
489bf215546Sopenharmony_ci    * branch). We leave other passes to clean up this redundant if.
490bf215546Sopenharmony_ci    */
491bf215546Sopenharmony_ci   unsigned num_times_to_clone = loop->info->max_trip_count + 1;
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_ci   nir_cf_list lp_body;
494bf215546Sopenharmony_ci   UNUSED nir_cf_node *unroll_loc =
495bf215546Sopenharmony_ci      complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
496bf215546Sopenharmony_ci                               remap_table, num_times_to_clone);
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   assert(unroll_loc->type == nir_cf_node_if);
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_ci   /* We need to clone the lcssa vars in order to insert them on both sides
501bf215546Sopenharmony_ci    * of the if in the last iteration/if-statement. Otherwise the optimisation
502bf215546Sopenharmony_ci    * passes will have trouble optimising the unrolled if ladder.
503bf215546Sopenharmony_ci    */
504bf215546Sopenharmony_ci   nir_cursor cursor =
505bf215546Sopenharmony_ci      get_complex_unroll_insert_location(unroll_loc,
506bf215546Sopenharmony_ci                                         terminator->continue_from_then);
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci   nir_if *if_stmt = nir_cf_node_as_if(unroll_loc);
509bf215546Sopenharmony_ci   nir_cursor start_cursor;
510bf215546Sopenharmony_ci   nir_cursor end_cursor;
511bf215546Sopenharmony_ci   if (terminator->continue_from_then) {
512bf215546Sopenharmony_ci      start_cursor = nir_before_block(nir_if_first_else_block(if_stmt));
513bf215546Sopenharmony_ci      end_cursor = nir_after_block(nir_if_last_else_block(if_stmt));
514bf215546Sopenharmony_ci   } else {
515bf215546Sopenharmony_ci      start_cursor = nir_before_block(nir_if_first_then_block(if_stmt));
516bf215546Sopenharmony_ci      end_cursor = nir_after_block(nir_if_last_then_block(if_stmt));
517bf215546Sopenharmony_ci   }
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci   nir_cf_list lcssa_list;
520bf215546Sopenharmony_ci   nir_cf_extract(&lcssa_list, start_cursor, end_cursor);
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci   /* Insert the cloned vars in the last continue branch */
523bf215546Sopenharmony_ci   nir_cf_list_clone_and_reinsert(&lcssa_list, loop->cf_node.parent,
524bf215546Sopenharmony_ci                                  cursor, remap_table);
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci   start_cursor = terminator->continue_from_then ?
527bf215546Sopenharmony_ci      nir_before_block(nir_if_first_else_block(if_stmt)) :
528bf215546Sopenharmony_ci      nir_before_block(nir_if_first_then_block(if_stmt));
529bf215546Sopenharmony_ci
530bf215546Sopenharmony_ci   /* Reinsert the cloned vars back where they came from */
531bf215546Sopenharmony_ci   nir_cf_reinsert(&lcssa_list, start_cursor);
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci   /* Delete the original loop header and body */
534bf215546Sopenharmony_ci   nir_cf_delete(&lp_header);
535bf215546Sopenharmony_ci   nir_cf_delete(&lp_body);
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci   /* The original loop has been replaced so remove it. */
538bf215546Sopenharmony_ci   nir_cf_node_remove(&loop->cf_node);
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci   _mesa_hash_table_destroy(remap_table, NULL);
541bf215546Sopenharmony_ci}
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci/* Unrolls the classic wrapper loops e.g
544bf215546Sopenharmony_ci *
545bf215546Sopenharmony_ci *    do {
546bf215546Sopenharmony_ci *        // ...
547bf215546Sopenharmony_ci *    } while (false)
548bf215546Sopenharmony_ci */
549bf215546Sopenharmony_cistatic bool
550bf215546Sopenharmony_ciwrapper_unroll(nir_loop *loop)
551bf215546Sopenharmony_ci{
552bf215546Sopenharmony_ci   if (!list_is_empty(&loop->info->loop_terminator_list)) {
553bf215546Sopenharmony_ci
554bf215546Sopenharmony_ci      /* Unrolling a loop with a large number of exits can result in a
555bf215546Sopenharmony_ci       * large inrease in register pressure. For now we just skip
556bf215546Sopenharmony_ci       * unrolling if we have more than 3 exits (not including the break
557bf215546Sopenharmony_ci       * at the end of the loop).
558bf215546Sopenharmony_ci       *
559bf215546Sopenharmony_ci       * TODO: Most loops that fit this pattern are simply switch
560bf215546Sopenharmony_ci       * statements that are converted to a loop to take advantage of
561bf215546Sopenharmony_ci       * exiting jump instruction handling. In this case we could make
562bf215546Sopenharmony_ci       * use of a binary seach pattern like we do in
563bf215546Sopenharmony_ci       * nir_lower_indirect_derefs(), this should allow us to unroll the
564bf215546Sopenharmony_ci       * loops in an optimal way and should also avoid some of the
565bf215546Sopenharmony_ci       * register pressure that comes from simply nesting the
566bf215546Sopenharmony_ci       * terminators one after the other.
567bf215546Sopenharmony_ci       */
568bf215546Sopenharmony_ci      if (list_length(&loop->info->loop_terminator_list) > 3)
569bf215546Sopenharmony_ci         return false;
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci      loop_prepare_for_unroll(loop);
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_ci      nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
574bf215546Sopenharmony_ci      list_for_each_entry(nir_loop_terminator, terminator,
575bf215546Sopenharmony_ci                          &loop->info->loop_terminator_list,
576bf215546Sopenharmony_ci                          loop_terminator_link) {
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci         /* Remove break from the terminator */
579bf215546Sopenharmony_ci         nir_instr *break_instr =
580bf215546Sopenharmony_ci            nir_block_last_instr(terminator->break_block);
581bf215546Sopenharmony_ci         nir_instr_remove(break_instr);
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci         /* Pluck out the loop body. */
584bf215546Sopenharmony_ci         nir_cf_list loop_body;
585bf215546Sopenharmony_ci         nir_cf_extract(&loop_body,
586bf215546Sopenharmony_ci                        nir_after_cf_node(&terminator->nif->cf_node),
587bf215546Sopenharmony_ci                        loop_end);
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci         /* Reinsert loop body into continue from block */
590bf215546Sopenharmony_ci         nir_cf_reinsert(&loop_body,
591bf215546Sopenharmony_ci                         nir_after_block(terminator->continue_from_block));
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci         loop_end = terminator->continue_from_then ?
594bf215546Sopenharmony_ci           nir_after_block(nir_if_last_then_block(terminator->nif)) :
595bf215546Sopenharmony_ci           nir_after_block(nir_if_last_else_block(terminator->nif));
596bf215546Sopenharmony_ci      }
597bf215546Sopenharmony_ci   } else {
598bf215546Sopenharmony_ci      loop_prepare_for_unroll(loop);
599bf215546Sopenharmony_ci   }
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   /* Pluck out the loop body. */
602bf215546Sopenharmony_ci   nir_cf_list loop_body;
603bf215546Sopenharmony_ci   nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
604bf215546Sopenharmony_ci                  nir_after_block(nir_loop_last_block(loop)));
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_ci   /* Reinsert loop body after the loop */
607bf215546Sopenharmony_ci   nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci   /* The loop has been unrolled so remove it. */
610bf215546Sopenharmony_ci   nir_cf_node_remove(&loop->cf_node);
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci   return true;
613bf215546Sopenharmony_ci}
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_cistatic bool
616bf215546Sopenharmony_ciis_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
617bf215546Sopenharmony_ci                        unsigned trip_count)
618bf215546Sopenharmony_ci{
619bf215546Sopenharmony_ci   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
620bf215546Sopenharmony_ci      if (d->deref_type != nir_deref_type_array)
621bf215546Sopenharmony_ci         continue;
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci      nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
624bf215546Sopenharmony_ci      nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
625bf215546Sopenharmony_ci      if (!nir_srcs_equal(d->arr.index, src))
626bf215546Sopenharmony_ci         continue;
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci      nir_deref_instr *parent = nir_deref_instr_parent(d);
629bf215546Sopenharmony_ci      assert(glsl_type_is_array(parent->type) ||
630bf215546Sopenharmony_ci             glsl_type_is_matrix(parent->type) ||
631bf215546Sopenharmony_ci             glsl_type_is_vector(parent->type));
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci      /* We have already unrolled the loop and the new one will be imbedded in
634bf215546Sopenharmony_ci       * the innermost continue branch. So unless the array is greater than
635bf215546Sopenharmony_ci       * the trip count any iteration over the loop will be an out of bounds
636bf215546Sopenharmony_ci       * access of the array.
637bf215546Sopenharmony_ci       */
638bf215546Sopenharmony_ci      unsigned length = glsl_type_is_vector(parent->type) ?
639bf215546Sopenharmony_ci                        glsl_get_vector_elements(parent->type) :
640bf215546Sopenharmony_ci                        glsl_get_length(parent->type);
641bf215546Sopenharmony_ci      return length <= trip_count;
642bf215546Sopenharmony_ci   }
643bf215546Sopenharmony_ci
644bf215546Sopenharmony_ci   return false;
645bf215546Sopenharmony_ci}
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci/* If we know an array access is going to be out of bounds remove or replace
648bf215546Sopenharmony_ci * the access with an undef. This can later result in the entire loop being
649bf215546Sopenharmony_ci * removed by nir_opt_dead_cf().
650bf215546Sopenharmony_ci */
651bf215546Sopenharmony_cistatic void
652bf215546Sopenharmony_ciremove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
653bf215546Sopenharmony_ci                                   nir_loop_terminator *term,
654bf215546Sopenharmony_ci                                   nir_cf_list *lp_header,
655bf215546Sopenharmony_ci                                   nir_cf_list *lp_body,
656bf215546Sopenharmony_ci                                   unsigned trip_count)
657bf215546Sopenharmony_ci{
658bf215546Sopenharmony_ci   if (!loop->info->guessed_trip_count)
659bf215546Sopenharmony_ci      return;
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci   /* Temporarily recreate the original loop so we can alter it */
662bf215546Sopenharmony_ci   nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
663bf215546Sopenharmony_ci   nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   nir_builder b;
666bf215546Sopenharmony_ci   nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node));
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci   nir_foreach_block_in_cf_node(block, &loop->cf_node) {
669bf215546Sopenharmony_ci      nir_foreach_instr_safe(instr, block) {
670bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
671bf215546Sopenharmony_ci            continue;
672bf215546Sopenharmony_ci
673bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_ci         /* Check for arrays variably-indexed by a loop induction variable.
676bf215546Sopenharmony_ci          * If this access is out of bounds remove the instruction or replace
677bf215546Sopenharmony_ci          * its use with an undefined instruction.
678bf215546Sopenharmony_ci          * If the loop is no longer useful we leave it for the appropriate
679bf215546Sopenharmony_ci          * pass to clean it up for us.
680bf215546Sopenharmony_ci          */
681bf215546Sopenharmony_ci         if (intrin->intrinsic == nir_intrinsic_load_deref ||
682bf215546Sopenharmony_ci             intrin->intrinsic == nir_intrinsic_store_deref ||
683bf215546Sopenharmony_ci             intrin->intrinsic == nir_intrinsic_copy_deref) {
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci            if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
686bf215546Sopenharmony_ci                                        trip_count)) {
687bf215546Sopenharmony_ci               if (intrin->intrinsic == nir_intrinsic_load_deref) {
688bf215546Sopenharmony_ci                  nir_ssa_def *undef =
689bf215546Sopenharmony_ci                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
690bf215546Sopenharmony_ci                                   intrin->dest.ssa.bit_size);
691bf215546Sopenharmony_ci                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
692bf215546Sopenharmony_ci                                           undef);
693bf215546Sopenharmony_ci               } else {
694bf215546Sopenharmony_ci                  nir_instr_remove(instr);
695bf215546Sopenharmony_ci                  continue;
696bf215546Sopenharmony_ci               }
697bf215546Sopenharmony_ci            }
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci            if (intrin->intrinsic == nir_intrinsic_copy_deref &&
700bf215546Sopenharmony_ci                is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
701bf215546Sopenharmony_ci                                        trip_count)) {
702bf215546Sopenharmony_ci               nir_instr_remove(instr);
703bf215546Sopenharmony_ci            }
704bf215546Sopenharmony_ci         }
705bf215546Sopenharmony_ci      }
706bf215546Sopenharmony_ci   }
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci   /* Now that we are done extract the loop header and body again */
709bf215546Sopenharmony_ci   nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
710bf215546Sopenharmony_ci                  nir_before_cf_node(&term->nif->cf_node));
711bf215546Sopenharmony_ci   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
712bf215546Sopenharmony_ci                  nir_after_block(nir_loop_last_block(loop)));
713bf215546Sopenharmony_ci}
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci/* Partially unrolls loops that don't have a known trip count.
716bf215546Sopenharmony_ci */
717bf215546Sopenharmony_cistatic void
718bf215546Sopenharmony_cipartial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
719bf215546Sopenharmony_ci{
720bf215546Sopenharmony_ci   assert(list_length(&loop->info->loop_terminator_list) == 1);
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci   nir_loop_terminator *terminator =
723bf215546Sopenharmony_ci      list_first_entry(&loop->info->loop_terminator_list,
724bf215546Sopenharmony_ci                        nir_loop_terminator, loop_terminator_link);
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_ci   assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci   loop_prepare_for_unroll(loop);
729bf215546Sopenharmony_ci
730bf215546Sopenharmony_ci   /* Pluck out the loop header */
731bf215546Sopenharmony_ci   nir_cf_list lp_header;
732bf215546Sopenharmony_ci   nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
733bf215546Sopenharmony_ci                  nir_before_cf_node(&terminator->nif->cf_node));
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci   struct hash_table *remap_table =
736bf215546Sopenharmony_ci      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
737bf215546Sopenharmony_ci                              _mesa_key_pointer_equal);
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci   nir_cf_list lp_body;
740bf215546Sopenharmony_ci   nir_cf_node *unroll_loc =
741bf215546Sopenharmony_ci      complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
742bf215546Sopenharmony_ci                               remap_table, trip_count);
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci   /* Attempt to remove out of bounds array access */
745bf215546Sopenharmony_ci   remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
746bf215546Sopenharmony_ci                                      &lp_body, trip_count);
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci   nir_cursor cursor =
749bf215546Sopenharmony_ci      get_complex_unroll_insert_location(unroll_loc,
750bf215546Sopenharmony_ci                                         terminator->continue_from_then);
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci   /* Reinsert the loop in the innermost nested continue branch of the unrolled
753bf215546Sopenharmony_ci    * loop.
754bf215546Sopenharmony_ci    */
755bf215546Sopenharmony_ci   nir_loop *new_loop = nir_loop_create(shader);
756bf215546Sopenharmony_ci   nir_cf_node_insert(cursor, &new_loop->cf_node);
757bf215546Sopenharmony_ci   new_loop->partially_unrolled = true;
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci   /* Clone loop header and insert into new loop */
760bf215546Sopenharmony_ci   nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
761bf215546Sopenharmony_ci                                  nir_after_cf_list(&new_loop->body),
762bf215546Sopenharmony_ci                                  remap_table);
763bf215546Sopenharmony_ci
764bf215546Sopenharmony_ci   /* Clone loop body and insert into new loop */
765bf215546Sopenharmony_ci   nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
766bf215546Sopenharmony_ci                                  nir_after_cf_list(&new_loop->body),
767bf215546Sopenharmony_ci                                  remap_table);
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci   /* Insert break back into terminator */
770bf215546Sopenharmony_ci   nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
771bf215546Sopenharmony_ci   nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop));
772bf215546Sopenharmony_ci   if (terminator->continue_from_then) {
773bf215546Sopenharmony_ci      nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr);
774bf215546Sopenharmony_ci   } else {
775bf215546Sopenharmony_ci      nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr);
776bf215546Sopenharmony_ci   }
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci   /* Delete the original loop header and body */
779bf215546Sopenharmony_ci   nir_cf_delete(&lp_header);
780bf215546Sopenharmony_ci   nir_cf_delete(&lp_body);
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci   /* The original loop has been replaced so remove it. */
783bf215546Sopenharmony_ci   nir_cf_node_remove(&loop->cf_node);
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci   _mesa_hash_table_destroy(remap_table, NULL);
786bf215546Sopenharmony_ci}
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_cistatic bool
789bf215546Sopenharmony_ciis_indirect_load(nir_instr *instr)
790bf215546Sopenharmony_ci{
791bf215546Sopenharmony_ci   if (instr->type == nir_instr_type_intrinsic) {
792bf215546Sopenharmony_ci      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci      if ((intrin->intrinsic == nir_intrinsic_load_ubo ||
795bf215546Sopenharmony_ci           intrin->intrinsic == nir_intrinsic_load_ssbo) &&
796bf215546Sopenharmony_ci          !nir_src_is_const(intrin->src[1])) {
797bf215546Sopenharmony_ci         return true;
798bf215546Sopenharmony_ci      }
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci      if (intrin->intrinsic == nir_intrinsic_load_global)
801bf215546Sopenharmony_ci         return true;
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci      if (intrin->intrinsic == nir_intrinsic_load_deref ||
804bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_store_deref) {
805bf215546Sopenharmony_ci         nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
806bf215546Sopenharmony_ci         nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global;
807bf215546Sopenharmony_ci         if (!nir_deref_mode_may_be(deref, mem_modes))
808bf215546Sopenharmony_ci            return false;
809bf215546Sopenharmony_ci         while (deref) {
810bf215546Sopenharmony_ci            if ((deref->deref_type == nir_deref_type_array ||
811bf215546Sopenharmony_ci                 deref->deref_type == nir_deref_type_ptr_as_array) &&
812bf215546Sopenharmony_ci                !nir_src_is_const(deref->arr.index)) {
813bf215546Sopenharmony_ci               return true;
814bf215546Sopenharmony_ci            }
815bf215546Sopenharmony_ci            deref = nir_deref_instr_parent(deref);
816bf215546Sopenharmony_ci         }
817bf215546Sopenharmony_ci      }
818bf215546Sopenharmony_ci   } else if (instr->type == nir_instr_type_tex) {
819bf215546Sopenharmony_ci      nir_tex_instr *tex = nir_instr_as_tex(instr);
820bf215546Sopenharmony_ci
821bf215546Sopenharmony_ci      for (unsigned i = 0; i < tex->num_srcs; i++) {
822bf215546Sopenharmony_ci         if (!nir_src_is_const(tex->src[i].src))
823bf215546Sopenharmony_ci            return true;
824bf215546Sopenharmony_ci      }
825bf215546Sopenharmony_ci   }
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci   return false;
828bf215546Sopenharmony_ci}
829bf215546Sopenharmony_ci
830bf215546Sopenharmony_cistatic bool
831bf215546Sopenharmony_cican_pipeline_loads(nir_loop *loop)
832bf215546Sopenharmony_ci{
833bf215546Sopenharmony_ci   if (!loop->info->exact_trip_count_known)
834bf215546Sopenharmony_ci      return false;
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_ci   bool interesting_loads = false;
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
839bf215546Sopenharmony_ci      if (cf_node == &loop->info->limiting_terminator->nif->cf_node)
840bf215546Sopenharmony_ci         continue;
841bf215546Sopenharmony_ci
842bf215546Sopenharmony_ci      /* Control flow usually prevents useful scheduling */
843bf215546Sopenharmony_ci      if (cf_node->type != nir_cf_node_block)
844bf215546Sopenharmony_ci         return false;
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_ci      if (interesting_loads)
847bf215546Sopenharmony_ci         continue;
848bf215546Sopenharmony_ci
849bf215546Sopenharmony_ci      nir_block *block = nir_cf_node_as_block(cf_node);
850bf215546Sopenharmony_ci      nir_foreach_instr(instr, block) {
851bf215546Sopenharmony_ci         if (is_indirect_load(instr)) {
852bf215546Sopenharmony_ci            interesting_loads = true;
853bf215546Sopenharmony_ci            break;
854bf215546Sopenharmony_ci         }
855bf215546Sopenharmony_ci      }
856bf215546Sopenharmony_ci   }
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci   return interesting_loads;
859bf215546Sopenharmony_ci}
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_ci/*
862bf215546Sopenharmony_ci * Returns true if we should unroll the loop, otherwise false.
863bf215546Sopenharmony_ci */
864bf215546Sopenharmony_cistatic bool
865bf215546Sopenharmony_cicheck_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
866bf215546Sopenharmony_ci{
867bf215546Sopenharmony_ci   if (loop->control == nir_loop_control_unroll)
868bf215546Sopenharmony_ci      return true;
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_ci   if (loop->control == nir_loop_control_dont_unroll)
871bf215546Sopenharmony_ci      return false;
872bf215546Sopenharmony_ci
873bf215546Sopenharmony_ci   nir_loop_info *li = loop->info;
874bf215546Sopenharmony_ci   unsigned max_iter = shader->options->max_unroll_iterations;
875bf215546Sopenharmony_ci   /* Unroll much more aggressively if it can hide load latency. */
876bf215546Sopenharmony_ci   if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop))
877bf215546Sopenharmony_ci      max_iter = shader->options->max_unroll_iterations_aggressive;
878bf215546Sopenharmony_ci   unsigned trip_count =
879bf215546Sopenharmony_ci      li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_ci   if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter)
882bf215546Sopenharmony_ci      return true;
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ci   unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT;
885bf215546Sopenharmony_ci   unsigned cost = li->instr_cost * trip_count;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci   if (cost <= cost_limit && trip_count <= max_iter)
888bf215546Sopenharmony_ci      return true;
889bf215546Sopenharmony_ci
890bf215546Sopenharmony_ci   return false;
891bf215546Sopenharmony_ci}
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_cistatic bool
894bf215546Sopenharmony_ciprocess_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
895bf215546Sopenharmony_ci              bool *unrolled_this_block);
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_cistatic bool
898bf215546Sopenharmony_ciprocess_loops_in_block(nir_shader *sh, struct exec_list *block,
899bf215546Sopenharmony_ci                       bool *has_nested_loop_out)
900bf215546Sopenharmony_ci{
901bf215546Sopenharmony_ci   /* We try to unroll as many loops in one pass as possible.
902bf215546Sopenharmony_ci    * E.g. we can safely unroll both loops in this block:
903bf215546Sopenharmony_ci    *
904bf215546Sopenharmony_ci    *    if (...) {
905bf215546Sopenharmony_ci    *       loop {...}
906bf215546Sopenharmony_ci    *    }
907bf215546Sopenharmony_ci    *
908bf215546Sopenharmony_ci    *    if (...) {
909bf215546Sopenharmony_ci    *       loop {...}
910bf215546Sopenharmony_ci    *    }
911bf215546Sopenharmony_ci    *
912bf215546Sopenharmony_ci    * Unrolling one loop doesn't affect the other one.
913bf215546Sopenharmony_ci    *
914bf215546Sopenharmony_ci    * On the other hand for block with:
915bf215546Sopenharmony_ci    *
916bf215546Sopenharmony_ci    *    loop {...}
917bf215546Sopenharmony_ci    *    ...
918bf215546Sopenharmony_ci    *    loop {...}
919bf215546Sopenharmony_ci    *
920bf215546Sopenharmony_ci    * It is unsafe to unroll both loops in one pass without taking
921bf215546Sopenharmony_ci    * complicating precautions, since the structure of the block would
922bf215546Sopenharmony_ci    * change after unrolling the first loop. So in such a case we leave
923bf215546Sopenharmony_ci    * the second loop for the next iteration of unrolling to handle.
924bf215546Sopenharmony_ci    */
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_ci   bool progress = false;
927bf215546Sopenharmony_ci   bool unrolled_this_block = false;
928bf215546Sopenharmony_ci
929bf215546Sopenharmony_ci   foreach_list_typed(nir_cf_node, nested_node, node, block) {
930bf215546Sopenharmony_ci      if (process_loops(sh, nested_node,
931bf215546Sopenharmony_ci                        has_nested_loop_out, &unrolled_this_block)) {
932bf215546Sopenharmony_ci         progress = true;
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci         /* If current node is unrolled we could not safely continue
935bf215546Sopenharmony_ci          * our iteration since we don't know the next node
936bf215546Sopenharmony_ci          * and it's hard to guarantee that we won't end up unrolling
937bf215546Sopenharmony_ci          * inner loop of the currently unrolled one, if such exists.
938bf215546Sopenharmony_ci          */
939bf215546Sopenharmony_ci         if (unrolled_this_block) {
940bf215546Sopenharmony_ci            break;
941bf215546Sopenharmony_ci         }
942bf215546Sopenharmony_ci      }
943bf215546Sopenharmony_ci   }
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci   return progress;
946bf215546Sopenharmony_ci}
947bf215546Sopenharmony_ci
948bf215546Sopenharmony_cistatic bool
949bf215546Sopenharmony_ciprocess_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
950bf215546Sopenharmony_ci              bool *unrolled_this_block)
951bf215546Sopenharmony_ci{
952bf215546Sopenharmony_ci   bool progress = false;
953bf215546Sopenharmony_ci   bool has_nested_loop = false;
954bf215546Sopenharmony_ci   nir_loop *loop;
955bf215546Sopenharmony_ci
956bf215546Sopenharmony_ci   switch (cf_node->type) {
957bf215546Sopenharmony_ci   case nir_cf_node_block:
958bf215546Sopenharmony_ci      return progress;
959bf215546Sopenharmony_ci   case nir_cf_node_if: {
960bf215546Sopenharmony_ci      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
961bf215546Sopenharmony_ci      progress |= process_loops_in_block(sh, &if_stmt->then_list,
962bf215546Sopenharmony_ci                                         has_nested_loop_out);
963bf215546Sopenharmony_ci      progress |= process_loops_in_block(sh, &if_stmt->else_list,
964bf215546Sopenharmony_ci                                         has_nested_loop_out);
965bf215546Sopenharmony_ci      return progress;
966bf215546Sopenharmony_ci   }
967bf215546Sopenharmony_ci   case nir_cf_node_loop: {
968bf215546Sopenharmony_ci      loop = nir_cf_node_as_loop(cf_node);
969bf215546Sopenharmony_ci      progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop);
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_ci      break;
972bf215546Sopenharmony_ci   }
973bf215546Sopenharmony_ci   default:
974bf215546Sopenharmony_ci      unreachable("unknown cf node type");
975bf215546Sopenharmony_ci   }
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_ci   const bool unrolled_child_block = progress;
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci   /* Don't attempt to unroll a second inner loop in this pass, wait until the
980bf215546Sopenharmony_ci    * next pass as we have altered the cf.
981bf215546Sopenharmony_ci    */
982bf215546Sopenharmony_ci   if (!progress && loop->control != nir_loop_control_dont_unroll) {
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci      /* Remove the conditional break statements associated with all terminators
985bf215546Sopenharmony_ci       * that are associated with a fixed iteration count, except for the one
986bf215546Sopenharmony_ci       * associated with the limiting terminator--that one needs to stay, since
987bf215546Sopenharmony_ci       * it terminates the loop.
988bf215546Sopenharmony_ci       */
989bf215546Sopenharmony_ci      if (loop->info->limiting_terminator) {
990bf215546Sopenharmony_ci         list_for_each_entry_safe(nir_loop_terminator, t,
991bf215546Sopenharmony_ci                                  &loop->info->loop_terminator_list,
992bf215546Sopenharmony_ci                                  loop_terminator_link) {
993bf215546Sopenharmony_ci            if (t->exact_trip_count_unknown)
994bf215546Sopenharmony_ci               continue;
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci            if (t != loop->info->limiting_terminator) {
997bf215546Sopenharmony_ci
998bf215546Sopenharmony_ci               /* Only delete the if-statement if the continue block is empty.
999bf215546Sopenharmony_ci                * We trust that nir_opt_if() does its job well enough to
1000bf215546Sopenharmony_ci                * remove all instructions from the continue block when possible.
1001bf215546Sopenharmony_ci                */
1002bf215546Sopenharmony_ci               nir_block *first_continue_from_blk = t->continue_from_then ?
1003bf215546Sopenharmony_ci                  nir_if_first_then_block(t->nif) :
1004bf215546Sopenharmony_ci                  nir_if_first_else_block(t->nif);
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_ci               if (!(nir_cf_node_is_last(&first_continue_from_blk->cf_node) &&
1007bf215546Sopenharmony_ci                     exec_list_is_empty(&first_continue_from_blk->instr_list)))
1008bf215546Sopenharmony_ci                  continue;
1009bf215546Sopenharmony_ci
1010bf215546Sopenharmony_ci               /* Now delete the if */
1011bf215546Sopenharmony_ci               nir_cf_node_remove(&t->nif->cf_node);
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_ci               /* Also remove it from the terminator list */
1014bf215546Sopenharmony_ci               list_del(&t->loop_terminator_link);
1015bf215546Sopenharmony_ci
1016bf215546Sopenharmony_ci               progress = true;
1017bf215546Sopenharmony_ci            }
1018bf215546Sopenharmony_ci         }
1019bf215546Sopenharmony_ci      }
1020bf215546Sopenharmony_ci
1021bf215546Sopenharmony_ci      /* Check for the classic
1022bf215546Sopenharmony_ci       *
1023bf215546Sopenharmony_ci       *    do {
1024bf215546Sopenharmony_ci       *        // ...
1025bf215546Sopenharmony_ci       *    } while (false)
1026bf215546Sopenharmony_ci       *
1027bf215546Sopenharmony_ci       * that is used to wrap multi-line macros. GLSL IR also wraps switch
1028bf215546Sopenharmony_ci       * statements in a loop like this.
1029bf215546Sopenharmony_ci       */
1030bf215546Sopenharmony_ci      if (loop->info->limiting_terminator == NULL &&
1031bf215546Sopenharmony_ci          !loop->info->complex_loop) {
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci         nir_block *last_loop_blk = nir_loop_last_block(loop);
1034bf215546Sopenharmony_ci         if (nir_block_ends_in_break(last_loop_blk)) {
1035bf215546Sopenharmony_ci            progress = wrapper_unroll(loop);
1036bf215546Sopenharmony_ci            goto exit;
1037bf215546Sopenharmony_ci         }
1038bf215546Sopenharmony_ci
1039bf215546Sopenharmony_ci         /* If we were able to guess the loop iteration based on array access
1040bf215546Sopenharmony_ci          * then do a partial unroll.
1041bf215546Sopenharmony_ci          */
1042bf215546Sopenharmony_ci         unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1043bf215546Sopenharmony_ci         if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
1044bf215546Sopenharmony_ci             loop->info->guessed_trip_count &&
1045bf215546Sopenharmony_ci             check_unrolling_restrictions(sh, loop)) {
1046bf215546Sopenharmony_ci            partial_unroll(sh, loop, loop->info->guessed_trip_count);
1047bf215546Sopenharmony_ci            progress = true;
1048bf215546Sopenharmony_ci         }
1049bf215546Sopenharmony_ci      }
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci      /* Intentionally don't consider exact_trip_count_known here.  When
1052bf215546Sopenharmony_ci       * max_trip_count is non-zero, it is the upper bound on the number of
1053bf215546Sopenharmony_ci       * times the loop will iterate, but the loop may iterate less.  For
1054bf215546Sopenharmony_ci       * example, the following loop will iterate 0 or 1 time:
1055bf215546Sopenharmony_ci       *
1056bf215546Sopenharmony_ci       *    for (i = 0; i < min(x, 1); i++) { ... }
1057bf215546Sopenharmony_ci       *
1058bf215546Sopenharmony_ci       * Trivial single-interation loops (e.g., do { ... } while (false)) and
1059bf215546Sopenharmony_ci       * trivial zero-iteration loops (e.g., while (false) { ... }) will have
1060bf215546Sopenharmony_ci       * already been handled.
1061bf215546Sopenharmony_ci       *
1062bf215546Sopenharmony_ci       * If the loop is known to execute at most once and meets the other
1063bf215546Sopenharmony_ci       * unrolling criteria, unroll it even if it has nested loops.
1064bf215546Sopenharmony_ci       *
1065bf215546Sopenharmony_ci       * It is unlikely that such loops exist in real shaders. GraphicsFuzz is
1066bf215546Sopenharmony_ci       * known to generate spurious loops that iterate exactly once.  It is
1067bf215546Sopenharmony_ci       * plausible that it could eventually start generating loops like the
1068bf215546Sopenharmony_ci       * example above, so it seems logical to defend against it now.
1069bf215546Sopenharmony_ci       */
1070bf215546Sopenharmony_ci      if (!loop->info->limiting_terminator ||
1071bf215546Sopenharmony_ci          (loop->info->max_trip_count != 1 && has_nested_loop))
1072bf215546Sopenharmony_ci         goto exit;
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci      if (!check_unrolling_restrictions(sh, loop))
1075bf215546Sopenharmony_ci         goto exit;
1076bf215546Sopenharmony_ci
1077bf215546Sopenharmony_ci      if (loop->info->exact_trip_count_known) {
1078bf215546Sopenharmony_ci         simple_unroll(loop);
1079bf215546Sopenharmony_ci         progress = true;
1080bf215546Sopenharmony_ci      } else {
1081bf215546Sopenharmony_ci         /* Attempt to unroll loops with two terminators. */
1082bf215546Sopenharmony_ci         unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1083bf215546Sopenharmony_ci         if (num_lt == 2 &&
1084bf215546Sopenharmony_ci             !loop->info->limiting_terminator->exact_trip_count_unknown) {
1085bf215546Sopenharmony_ci            bool limiting_term_second = true;
1086bf215546Sopenharmony_ci            nir_loop_terminator *terminator =
1087bf215546Sopenharmony_ci               list_first_entry(&loop->info->loop_terminator_list,
1088bf215546Sopenharmony_ci                                nir_loop_terminator, loop_terminator_link);
1089bf215546Sopenharmony_ci
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci            if (terminator->nif == loop->info->limiting_terminator->nif) {
1092bf215546Sopenharmony_ci               limiting_term_second = false;
1093bf215546Sopenharmony_ci               terminator =
1094bf215546Sopenharmony_ci                  list_last_entry(&loop->info->loop_terminator_list,
1095bf215546Sopenharmony_ci                                  nir_loop_terminator, loop_terminator_link);
1096bf215546Sopenharmony_ci            }
1097bf215546Sopenharmony_ci
1098bf215546Sopenharmony_ci            /* If the first terminator has a trip count of zero and is the
1099bf215546Sopenharmony_ci             * limiting terminator just do a simple unroll as the second
1100bf215546Sopenharmony_ci             * terminator can never be reached.
1101bf215546Sopenharmony_ci             */
1102bf215546Sopenharmony_ci            if (loop->info->max_trip_count == 0 && !limiting_term_second) {
1103bf215546Sopenharmony_ci               simple_unroll(loop);
1104bf215546Sopenharmony_ci            } else {
1105bf215546Sopenharmony_ci               complex_unroll(loop, terminator, limiting_term_second);
1106bf215546Sopenharmony_ci            }
1107bf215546Sopenharmony_ci            progress = true;
1108bf215546Sopenharmony_ci         }
1109bf215546Sopenharmony_ci
1110bf215546Sopenharmony_ci         if (num_lt == 1) {
1111bf215546Sopenharmony_ci            assert(loop->info->limiting_terminator->exact_trip_count_unknown);
1112bf215546Sopenharmony_ci            complex_unroll_single_terminator(loop);
1113bf215546Sopenharmony_ci            progress = true;
1114bf215546Sopenharmony_ci         }
1115bf215546Sopenharmony_ci      }
1116bf215546Sopenharmony_ci   }
1117bf215546Sopenharmony_ci
1118bf215546Sopenharmony_ciexit:
1119bf215546Sopenharmony_ci   *has_nested_loop_out = true;
1120bf215546Sopenharmony_ci   if (progress && !unrolled_child_block)
1121bf215546Sopenharmony_ci      *unrolled_this_block = true;
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci   return progress;
1124bf215546Sopenharmony_ci}
1125bf215546Sopenharmony_ci
1126bf215546Sopenharmony_cistatic bool
1127bf215546Sopenharmony_cinir_opt_loop_unroll_impl(nir_function_impl *impl,
1128bf215546Sopenharmony_ci                         nir_variable_mode indirect_mask,
1129bf215546Sopenharmony_ci                         bool force_unroll_sampler_indirect)
1130bf215546Sopenharmony_ci{
1131bf215546Sopenharmony_ci   bool progress = false;
1132bf215546Sopenharmony_ci   nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask,
1133bf215546Sopenharmony_ci                        (int) force_unroll_sampler_indirect);
1134bf215546Sopenharmony_ci   nir_metadata_require(impl, nir_metadata_block_index);
1135bf215546Sopenharmony_ci
1136bf215546Sopenharmony_ci   bool has_nested_loop = false;
1137bf215546Sopenharmony_ci   progress |= process_loops_in_block(impl->function->shader, &impl->body,
1138bf215546Sopenharmony_ci                                      &has_nested_loop);
1139bf215546Sopenharmony_ci
1140bf215546Sopenharmony_ci   if (progress) {
1141bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_none);
1142bf215546Sopenharmony_ci      nir_lower_regs_to_ssa_impl(impl);
1143bf215546Sopenharmony_ci   } else {
1144bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
1145bf215546Sopenharmony_ci   }
1146bf215546Sopenharmony_ci
1147bf215546Sopenharmony_ci   return progress;
1148bf215546Sopenharmony_ci}
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_ci/**
1151bf215546Sopenharmony_ci * indirect_mask specifies which type of indirectly accessed variables
1152bf215546Sopenharmony_ci * should force loop unrolling.
1153bf215546Sopenharmony_ci */
1154bf215546Sopenharmony_cibool
1155bf215546Sopenharmony_cinir_opt_loop_unroll(nir_shader *shader)
1156bf215546Sopenharmony_ci{
1157bf215546Sopenharmony_ci   bool progress = false;
1158bf215546Sopenharmony_ci
1159bf215546Sopenharmony_ci   bool force_unroll_sampler_indirect = shader->options->force_indirect_unrolling_sampler;
1160bf215546Sopenharmony_ci   nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
1161bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
1162bf215546Sopenharmony_ci      if (function->impl) {
1163bf215546Sopenharmony_ci         progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask,
1164bf215546Sopenharmony_ci                                              force_unroll_sampler_indirect);
1165bf215546Sopenharmony_ci      }
1166bf215546Sopenharmony_ci   }
1167bf215546Sopenharmony_ci   return progress;
1168bf215546Sopenharmony_ci}
1169