1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_builder.h"
26#include "nir_control_flow.h"
27#include "nir_loop_analyze.h"
28
29
30/* This limit is chosen fairly arbitrarily.  GLSL IR max iteration is 32
31 * instructions. (Multiply counting nodes and magic number 5.)  But there is
32 * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
33 * to give about the same results. Around 5 instructions per node.  But some
34 * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
35 * we set it to 26.
36 */
37#define LOOP_UNROLL_LIMIT 26
38
39/* Prepare this loop for unrolling by first converting to lcssa and then
40 * converting the phis from the top level of the loop body to regs.
41 * Partially converting out of SSA allows us to unroll the loop without having
42 * to keep track of and update phis along the way which gets tricky and
43 * doesn't add much value over converting to regs.
44 *
45 * The loop may have a jump instruction at the end of the loop which does
46 * nothing.  Once we're out of SSA, we can safely delete it so we don't have
47 * to deal with it later.
48 */
49static void
50loop_prepare_for_unroll(nir_loop *loop)
51{
52   nir_rematerialize_derefs_in_use_blocks_impl(
53      nir_cf_node_get_function(&loop->cf_node));
54
55   nir_convert_loop_to_lcssa(loop);
56
57   /* Lower phis at the top level of the loop body */
58   foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
59      if (nir_cf_node_block == node->type) {
60         nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
61      }
62   }
63
64   /* Lower phis after the loop */
65   nir_block *block_after_loop =
66      nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
67
68   nir_lower_phis_to_regs_block(block_after_loop);
69
70   /* Remove jump if it's the last instruction in the loop */
71   nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
72   if (last_instr && last_instr->type == nir_instr_type_jump) {
73      nir_instr_remove(last_instr);
74   }
75}
76
77static void
78get_first_blocks_in_terminator(nir_loop_terminator *term,
79                               nir_block **first_break_block,
80                               nir_block **first_continue_block)
81{
82   if (term->continue_from_then) {
83      *first_continue_block = nir_if_first_then_block(term->nif);
84      *first_break_block = nir_if_first_else_block(term->nif);
85   } else {
86      *first_continue_block = nir_if_first_else_block(term->nif);
87      *first_break_block = nir_if_first_then_block(term->nif);
88   }
89}
90
91/**
92 * Unroll a loop where we know exactly how many iterations there are and there
93 * is only a single exit point.  Note here we can unroll loops with multiple
94 * theoretical exits that only have a single terminating exit that we always
95 * know is the "real" exit.
96 *
97 *     loop {
98 *         ...instrs...
99 *     }
100 *
101 * And the iteration count is 3, the output will be:
102 *
103 *     ...instrs... ...instrs... ...instrs...
104 */
105static void
106simple_unroll(nir_loop *loop)
107{
108   nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
109   assert(nir_is_trivial_loop_if(limiting_term->nif,
110                                 limiting_term->break_block));
111
112   loop_prepare_for_unroll(loop);
113
114   /* Skip over loop terminator and get the loop body. */
115   list_for_each_entry(nir_loop_terminator, terminator,
116                       &loop->info->loop_terminator_list,
117                       loop_terminator_link) {
118
119      /* Remove all but the limiting terminator as we know the other exit
120       * conditions can never be met. Note we need to extract any instructions
121       * in the continue from branch and insert then into the loop body before
122       * removing it.
123       */
124      if (terminator->nif != limiting_term->nif) {
125         nir_block *first_break_block;
126         nir_block *first_continue_block;
127         get_first_blocks_in_terminator(terminator, &first_break_block,
128                                        &first_continue_block);
129
130         assert(nir_is_trivial_loop_if(terminator->nif,
131                                       terminator->break_block));
132
133         nir_cf_list continue_from_lst;
134         nir_cf_extract(&continue_from_lst,
135                        nir_before_block(first_continue_block),
136                        nir_after_block(terminator->continue_from_block));
137         nir_cf_reinsert(&continue_from_lst,
138                         nir_after_cf_node(&terminator->nif->cf_node));
139
140         nir_cf_node_remove(&terminator->nif->cf_node);
141      }
142   }
143
144   nir_block *first_break_block;
145   nir_block *first_continue_block;
146   get_first_blocks_in_terminator(limiting_term, &first_break_block,
147                                  &first_continue_block);
148
149   /* Pluck out the loop header */
150   nir_block *header_blk = nir_loop_first_block(loop);
151   nir_cf_list lp_header;
152   nir_cf_extract(&lp_header, nir_before_block(header_blk),
153                  nir_before_cf_node(&limiting_term->nif->cf_node));
154
155   /* Add the continue from block of the limiting terminator to the loop body
156    */
157   nir_cf_list continue_from_lst;
158   nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
159                  nir_after_block(limiting_term->continue_from_block));
160   nir_cf_reinsert(&continue_from_lst,
161                   nir_after_cf_node(&limiting_term->nif->cf_node));
162
163   /* Pluck out the loop body */
164   nir_cf_list loop_body;
165   nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
166                  nir_after_block(nir_loop_last_block(loop)));
167
168   struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
169
170   /* Clone the loop header and insert before the loop */
171   nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
172                                  nir_before_cf_node(&loop->cf_node),
173                                  remap_table);
174
175   for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
176      /* Clone loop body and insert before the loop */
177      nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
178                                     nir_before_cf_node(&loop->cf_node),
179                                     remap_table);
180
181      /* Clone loop header and insert after loop body */
182      nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
183                                     nir_before_cf_node(&loop->cf_node),
184                                     remap_table);
185   }
186
187   /* Remove the break from the loop terminator and add instructions from
188    * the break block after the unrolled loop.
189    */
190   nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
191   nir_instr_remove(break_instr);
192   nir_cf_list break_list;
193   nir_cf_extract(&break_list, nir_before_block(first_break_block),
194                  nir_after_block(limiting_term->break_block));
195
196   /* Clone so things get properly remapped */
197   nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
198                                  nir_before_cf_node(&loop->cf_node),
199                                  remap_table);
200
201   /* Remove the loop */
202   nir_cf_node_remove(&loop->cf_node);
203
204   /* Delete the original loop body, break block & header */
205   nir_cf_delete(&lp_header);
206   nir_cf_delete(&loop_body);
207   nir_cf_delete(&break_list);
208
209   _mesa_hash_table_destroy(remap_table, NULL);
210}
211
212static void
213move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
214{
215   /* Move the rest of the loop inside the continue-from-block */
216   nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
217
218   /* Remove the break */
219   nir_instr_remove(nir_block_last_instr(term->break_block));
220}
221
222static nir_cursor
223get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
224{
225   if (node->type == nir_cf_node_loop) {
226      return nir_before_cf_node(node);
227   } else {
228      nir_if *if_stmt = nir_cf_node_as_if(node);
229      if (continue_from_then) {
230         return nir_after_block(nir_if_last_then_block(if_stmt));
231      } else {
232         return nir_after_block(nir_if_last_else_block(if_stmt));
233      }
234   }
235}
236
237static nir_cf_node *
238complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
239                         nir_cf_list *lp_header, nir_cf_list *lp_body,
240                         struct hash_table *remap_table,
241                         unsigned num_times_to_clone)
242{
243   /* In the terminator that we have no trip count for move everything after
244    * the terminator into the continue from branch.
245    */
246   nir_cf_list loop_end;
247   nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
248                  nir_after_block(nir_loop_last_block(loop)));
249   move_cf_list_into_loop_term(&loop_end, unlimit_term);
250
251   /* Pluck out the loop body. */
252   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
253                  nir_after_block(nir_loop_last_block(loop)));
254
255   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
256    */
257   nir_cf_node *unroll_loc = &loop->cf_node;
258
259   /* Temp list to store the cloned loop as we unroll */
260   nir_cf_list unrolled_lp_body;
261
262   for (unsigned i = 0; i < num_times_to_clone; i++) {
263
264      nir_cursor cursor =
265         get_complex_unroll_insert_location(unroll_loc,
266                                            unlimit_term->continue_from_then);
267
268      /* Clone loop header and insert in if branch */
269      nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
270                                     cursor, remap_table);
271
272      cursor =
273         get_complex_unroll_insert_location(unroll_loc,
274                                            unlimit_term->continue_from_then);
275
276      /* Clone loop body */
277      nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
278                        remap_table);
279
280      unroll_loc = exec_node_data(nir_cf_node,
281                                  exec_list_get_tail(&unrolled_lp_body.list),
282                                  node);
283      assert(unroll_loc->type == nir_cf_node_block &&
284             exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
285
286      /* Get the unrolled if node */
287      unroll_loc = nir_cf_node_prev(unroll_loc);
288
289      /* Insert unrolled loop body */
290      nir_cf_reinsert(&unrolled_lp_body, cursor);
291   }
292
293   return unroll_loc;
294}
295
296/**
297 * Unroll a loop with two exists when the trip count of one of the exits is
298 * unknown.  If continue_from_then is true, the loop is repeated only when the
299 * "then" branch of the if is taken; otherwise it is repeated only
300 * when the "else" branch of the if is taken.
301 *
302 * For example, if the input is:
303 *
304 *      loop {
305 *         ...phis/condition...
306 *         if condition {
307 *            ...then instructions...
308 *         } else {
309 *            ...continue instructions...
310 *            break
311 *         }
312 *         ...body...
313 *      }
314 *
315 * And the iteration count is 3, and unlimit_term->continue_from_then is true,
316 * then the output will be:
317 *
318 *      ...condition...
319 *      if condition {
320 *         ...then instructions...
321 *         ...body...
322 *         if condition {
323 *            ...then instructions...
324 *            ...body...
325 *            if condition {
326 *               ...then instructions...
327 *               ...body...
328 *            } else {
329 *               ...continue instructions...
330 *            }
331 *         } else {
332 *            ...continue instructions...
333 *         }
334 *      } else {
335 *         ...continue instructions...
336 *      }
337 */
338static void
339complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
340               bool limiting_term_second)
341{
342   assert(nir_is_trivial_loop_if(unlimit_term->nif,
343                                 unlimit_term->break_block));
344
345   nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
346   assert(nir_is_trivial_loop_if(limiting_term->nif,
347                                 limiting_term->break_block));
348
349   loop_prepare_for_unroll(loop);
350
351   nir_block *header_blk = nir_loop_first_block(loop);
352
353   nir_cf_list lp_header;
354   nir_cf_list limit_break_list;
355   unsigned num_times_to_clone;
356   if (limiting_term_second) {
357      /* Pluck out the loop header */
358      nir_cf_extract(&lp_header, nir_before_block(header_blk),
359                     nir_before_cf_node(&unlimit_term->nif->cf_node));
360
361      /* We need some special handling when its the second terminator causing
362       * us to exit the loop for example:
363       *
364       *   for (int i = 0; i < uniform_lp_count; i++) {
365       *      colour = vec4(0.0, 1.0, 0.0, 1.0);
366       *
367       *      if (i == 1) {
368       *         break;
369       *      }
370       *      ... any further code is unreachable after i == 1 ...
371       *   }
372       */
373      nir_cf_list after_lt;
374      nir_if *limit_if = limiting_term->nif;
375      nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
376                     nir_after_block(nir_loop_last_block(loop)));
377      move_cf_list_into_loop_term(&after_lt, limiting_term);
378
379      /* Because the trip count is the number of times we pass over the entire
380       * loop before hitting a break when the second terminator is the
381       * limiting terminator we can actually execute code inside the loop when
382       * trip count == 0 e.g. the code above the break.  So we need to bump
383       * the trip_count in order for the code below to clone anything.  When
384       * trip count == 1 we execute the code above the break twice and the
385       * code below it once so we need clone things twice and so on.
386       */
387      num_times_to_clone = loop->info->max_trip_count + 1;
388   } else {
389      /* Pluck out the loop header */
390      nir_cf_extract(&lp_header, nir_before_block(header_blk),
391                     nir_before_cf_node(&limiting_term->nif->cf_node));
392
393      nir_block *first_break_block;
394      nir_block *first_continue_block;
395      get_first_blocks_in_terminator(limiting_term, &first_break_block,
396                                     &first_continue_block);
397
398      /* Remove the break then extract instructions from the break block so we
399       * can insert them in the innermost else of the unrolled loop.
400       */
401      nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
402      nir_instr_remove(break_instr);
403      nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
404                     nir_after_block(limiting_term->break_block));
405
406      nir_cf_list continue_list;
407      nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
408                     nir_after_block(limiting_term->continue_from_block));
409
410      nir_cf_reinsert(&continue_list,
411                      nir_after_cf_node(&limiting_term->nif->cf_node));
412
413      nir_cf_node_remove(&limiting_term->nif->cf_node);
414
415      num_times_to_clone = loop->info->max_trip_count;
416   }
417
418   struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
419
420   nir_cf_list lp_body;
421   nir_cf_node *unroll_loc =
422      complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
423                               remap_table, num_times_to_clone);
424
425   if (!limiting_term_second) {
426      assert(unroll_loc->type == nir_cf_node_if);
427
428      nir_cursor cursor =
429         get_complex_unroll_insert_location(unroll_loc,
430                                            unlimit_term->continue_from_then);
431
432      /* Clone loop header and insert in if branch */
433      nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
434                                     cursor, remap_table);
435
436      cursor =
437         get_complex_unroll_insert_location(unroll_loc,
438                                            unlimit_term->continue_from_then);
439
440      /* Clone so things get properly remapped, and insert break block from
441       * the limiting terminator.
442       */
443      nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
444                                     cursor, remap_table);
445
446      nir_cf_delete(&limit_break_list);
447   }
448
449   /* The loop has been unrolled so remove it. */
450   nir_cf_node_remove(&loop->cf_node);
451
452   /* Delete the original loop header and body */
453   nir_cf_delete(&lp_header);
454   nir_cf_delete(&lp_body);
455
456   _mesa_hash_table_destroy(remap_table, NULL);
457}
458
459/**
460 * Unroll loops where we only have a single terminator but the exact trip
461 * count is unknown. For example:
462 *
463 *    for (int i = 0; i < imin(x, 4); i++)
464 *       ...
465 */
466static void
467complex_unroll_single_terminator(nir_loop *loop)
468{
469   assert(list_length(&loop->info->loop_terminator_list) == 1);
470   assert(loop->info->limiting_terminator);
471   assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
472                                 loop->info->limiting_terminator->break_block));
473
474   nir_loop_terminator *terminator = loop->info->limiting_terminator;
475
476   loop_prepare_for_unroll(loop);
477
478   /* Pluck out the loop header */
479   nir_cf_list lp_header;
480   nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
481                  nir_before_cf_node(&terminator->nif->cf_node));
482
483   struct hash_table *remap_table =
484      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
485                              _mesa_key_pointer_equal);
486
487   /* We need to clone the loop one extra time in order to clone the lcssa
488    * vars for the last iteration (they are inside the following ifs break
489    * branch). We leave other passes to clean up this redundant if.
490    */
491   unsigned num_times_to_clone = loop->info->max_trip_count + 1;
492
493   nir_cf_list lp_body;
494   UNUSED nir_cf_node *unroll_loc =
495      complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
496                               remap_table, num_times_to_clone);
497
498   assert(unroll_loc->type == nir_cf_node_if);
499
500   /* We need to clone the lcssa vars in order to insert them on both sides
501    * of the if in the last iteration/if-statement. Otherwise the optimisation
502    * passes will have trouble optimising the unrolled if ladder.
503    */
504   nir_cursor cursor =
505      get_complex_unroll_insert_location(unroll_loc,
506                                         terminator->continue_from_then);
507
508   nir_if *if_stmt = nir_cf_node_as_if(unroll_loc);
509   nir_cursor start_cursor;
510   nir_cursor end_cursor;
511   if (terminator->continue_from_then) {
512      start_cursor = nir_before_block(nir_if_first_else_block(if_stmt));
513      end_cursor = nir_after_block(nir_if_last_else_block(if_stmt));
514   } else {
515      start_cursor = nir_before_block(nir_if_first_then_block(if_stmt));
516      end_cursor = nir_after_block(nir_if_last_then_block(if_stmt));
517   }
518
519   nir_cf_list lcssa_list;
520   nir_cf_extract(&lcssa_list, start_cursor, end_cursor);
521
522   /* Insert the cloned vars in the last continue branch */
523   nir_cf_list_clone_and_reinsert(&lcssa_list, loop->cf_node.parent,
524                                  cursor, remap_table);
525
526   start_cursor = terminator->continue_from_then ?
527      nir_before_block(nir_if_first_else_block(if_stmt)) :
528      nir_before_block(nir_if_first_then_block(if_stmt));
529
530   /* Reinsert the cloned vars back where they came from */
531   nir_cf_reinsert(&lcssa_list, start_cursor);
532
533   /* Delete the original loop header and body */
534   nir_cf_delete(&lp_header);
535   nir_cf_delete(&lp_body);
536
537   /* The original loop has been replaced so remove it. */
538   nir_cf_node_remove(&loop->cf_node);
539
540   _mesa_hash_table_destroy(remap_table, NULL);
541}
542
543/* Unrolls the classic wrapper loops e.g
544 *
545 *    do {
546 *        // ...
547 *    } while (false)
548 */
549static bool
550wrapper_unroll(nir_loop *loop)
551{
552   if (!list_is_empty(&loop->info->loop_terminator_list)) {
553
554      /* Unrolling a loop with a large number of exits can result in a
555       * large inrease in register pressure. For now we just skip
556       * unrolling if we have more than 3 exits (not including the break
557       * at the end of the loop).
558       *
559       * TODO: Most loops that fit this pattern are simply switch
560       * statements that are converted to a loop to take advantage of
561       * exiting jump instruction handling. In this case we could make
562       * use of a binary seach pattern like we do in
563       * nir_lower_indirect_derefs(), this should allow us to unroll the
564       * loops in an optimal way and should also avoid some of the
565       * register pressure that comes from simply nesting the
566       * terminators one after the other.
567       */
568      if (list_length(&loop->info->loop_terminator_list) > 3)
569         return false;
570
571      loop_prepare_for_unroll(loop);
572
573      nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
574      list_for_each_entry(nir_loop_terminator, terminator,
575                          &loop->info->loop_terminator_list,
576                          loop_terminator_link) {
577
578         /* Remove break from the terminator */
579         nir_instr *break_instr =
580            nir_block_last_instr(terminator->break_block);
581         nir_instr_remove(break_instr);
582
583         /* Pluck out the loop body. */
584         nir_cf_list loop_body;
585         nir_cf_extract(&loop_body,
586                        nir_after_cf_node(&terminator->nif->cf_node),
587                        loop_end);
588
589         /* Reinsert loop body into continue from block */
590         nir_cf_reinsert(&loop_body,
591                         nir_after_block(terminator->continue_from_block));
592
593         loop_end = terminator->continue_from_then ?
594           nir_after_block(nir_if_last_then_block(terminator->nif)) :
595           nir_after_block(nir_if_last_else_block(terminator->nif));
596      }
597   } else {
598      loop_prepare_for_unroll(loop);
599   }
600
601   /* Pluck out the loop body. */
602   nir_cf_list loop_body;
603   nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
604                  nir_after_block(nir_loop_last_block(loop)));
605
606   /* Reinsert loop body after the loop */
607   nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
608
609   /* The loop has been unrolled so remove it. */
610   nir_cf_node_remove(&loop->cf_node);
611
612   return true;
613}
614
615static bool
616is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
617                        unsigned trip_count)
618{
619   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
620      if (d->deref_type != nir_deref_type_array)
621         continue;
622
623      nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
624      nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
625      if (!nir_srcs_equal(d->arr.index, src))
626         continue;
627
628      nir_deref_instr *parent = nir_deref_instr_parent(d);
629      assert(glsl_type_is_array(parent->type) ||
630             glsl_type_is_matrix(parent->type) ||
631             glsl_type_is_vector(parent->type));
632
633      /* We have already unrolled the loop and the new one will be imbedded in
634       * the innermost continue branch. So unless the array is greater than
635       * the trip count any iteration over the loop will be an out of bounds
636       * access of the array.
637       */
638      unsigned length = glsl_type_is_vector(parent->type) ?
639                        glsl_get_vector_elements(parent->type) :
640                        glsl_get_length(parent->type);
641      return length <= trip_count;
642   }
643
644   return false;
645}
646
647/* If we know an array access is going to be out of bounds remove or replace
648 * the access with an undef. This can later result in the entire loop being
649 * removed by nir_opt_dead_cf().
650 */
651static void
652remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
653                                   nir_loop_terminator *term,
654                                   nir_cf_list *lp_header,
655                                   nir_cf_list *lp_body,
656                                   unsigned trip_count)
657{
658   if (!loop->info->guessed_trip_count)
659      return;
660
661   /* Temporarily recreate the original loop so we can alter it */
662   nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
663   nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
664
665   nir_builder b;
666   nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node));
667
668   nir_foreach_block_in_cf_node(block, &loop->cf_node) {
669      nir_foreach_instr_safe(instr, block) {
670         if (instr->type != nir_instr_type_intrinsic)
671            continue;
672
673         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
674
675         /* Check for arrays variably-indexed by a loop induction variable.
676          * If this access is out of bounds remove the instruction or replace
677          * its use with an undefined instruction.
678          * If the loop is no longer useful we leave it for the appropriate
679          * pass to clean it up for us.
680          */
681         if (intrin->intrinsic == nir_intrinsic_load_deref ||
682             intrin->intrinsic == nir_intrinsic_store_deref ||
683             intrin->intrinsic == nir_intrinsic_copy_deref) {
684
685            if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
686                                        trip_count)) {
687               if (intrin->intrinsic == nir_intrinsic_load_deref) {
688                  nir_ssa_def *undef =
689                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
690                                   intrin->dest.ssa.bit_size);
691                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
692                                           undef);
693               } else {
694                  nir_instr_remove(instr);
695                  continue;
696               }
697            }
698
699            if (intrin->intrinsic == nir_intrinsic_copy_deref &&
700                is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
701                                        trip_count)) {
702               nir_instr_remove(instr);
703            }
704         }
705      }
706   }
707
708   /* Now that we are done extract the loop header and body again */
709   nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
710                  nir_before_cf_node(&term->nif->cf_node));
711   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
712                  nir_after_block(nir_loop_last_block(loop)));
713}
714
715/* Partially unrolls loops that don't have a known trip count.
716 */
717static void
718partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
719{
720   assert(list_length(&loop->info->loop_terminator_list) == 1);
721
722   nir_loop_terminator *terminator =
723      list_first_entry(&loop->info->loop_terminator_list,
724                        nir_loop_terminator, loop_terminator_link);
725
726   assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
727
728   loop_prepare_for_unroll(loop);
729
730   /* Pluck out the loop header */
731   nir_cf_list lp_header;
732   nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
733                  nir_before_cf_node(&terminator->nif->cf_node));
734
735   struct hash_table *remap_table =
736      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
737                              _mesa_key_pointer_equal);
738
739   nir_cf_list lp_body;
740   nir_cf_node *unroll_loc =
741      complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
742                               remap_table, trip_count);
743
744   /* Attempt to remove out of bounds array access */
745   remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
746                                      &lp_body, trip_count);
747
748   nir_cursor cursor =
749      get_complex_unroll_insert_location(unroll_loc,
750                                         terminator->continue_from_then);
751
752   /* Reinsert the loop in the innermost nested continue branch of the unrolled
753    * loop.
754    */
755   nir_loop *new_loop = nir_loop_create(shader);
756   nir_cf_node_insert(cursor, &new_loop->cf_node);
757   new_loop->partially_unrolled = true;
758
759   /* Clone loop header and insert into new loop */
760   nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
761                                  nir_after_cf_list(&new_loop->body),
762                                  remap_table);
763
764   /* Clone loop body and insert into new loop */
765   nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
766                                  nir_after_cf_list(&new_loop->body),
767                                  remap_table);
768
769   /* Insert break back into terminator */
770   nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
771   nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop));
772   if (terminator->continue_from_then) {
773      nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr);
774   } else {
775      nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr);
776   }
777
778   /* Delete the original loop header and body */
779   nir_cf_delete(&lp_header);
780   nir_cf_delete(&lp_body);
781
782   /* The original loop has been replaced so remove it. */
783   nir_cf_node_remove(&loop->cf_node);
784
785   _mesa_hash_table_destroy(remap_table, NULL);
786}
787
788static bool
789is_indirect_load(nir_instr *instr)
790{
791   if (instr->type == nir_instr_type_intrinsic) {
792      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
793
794      if ((intrin->intrinsic == nir_intrinsic_load_ubo ||
795           intrin->intrinsic == nir_intrinsic_load_ssbo) &&
796          !nir_src_is_const(intrin->src[1])) {
797         return true;
798      }
799
800      if (intrin->intrinsic == nir_intrinsic_load_global)
801         return true;
802
803      if (intrin->intrinsic == nir_intrinsic_load_deref ||
804          intrin->intrinsic == nir_intrinsic_store_deref) {
805         nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
806         nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global;
807         if (!nir_deref_mode_may_be(deref, mem_modes))
808            return false;
809         while (deref) {
810            if ((deref->deref_type == nir_deref_type_array ||
811                 deref->deref_type == nir_deref_type_ptr_as_array) &&
812                !nir_src_is_const(deref->arr.index)) {
813               return true;
814            }
815            deref = nir_deref_instr_parent(deref);
816         }
817      }
818   } else if (instr->type == nir_instr_type_tex) {
819      nir_tex_instr *tex = nir_instr_as_tex(instr);
820
821      for (unsigned i = 0; i < tex->num_srcs; i++) {
822         if (!nir_src_is_const(tex->src[i].src))
823            return true;
824      }
825   }
826
827   return false;
828}
829
830static bool
831can_pipeline_loads(nir_loop *loop)
832{
833   if (!loop->info->exact_trip_count_known)
834      return false;
835
836   bool interesting_loads = false;
837
838   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
839      if (cf_node == &loop->info->limiting_terminator->nif->cf_node)
840         continue;
841
842      /* Control flow usually prevents useful scheduling */
843      if (cf_node->type != nir_cf_node_block)
844         return false;
845
846      if (interesting_loads)
847         continue;
848
849      nir_block *block = nir_cf_node_as_block(cf_node);
850      nir_foreach_instr(instr, block) {
851         if (is_indirect_load(instr)) {
852            interesting_loads = true;
853            break;
854         }
855      }
856   }
857
858   return interesting_loads;
859}
860
861/*
862 * Returns true if we should unroll the loop, otherwise false.
863 */
864static bool
865check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
866{
867   if (loop->control == nir_loop_control_unroll)
868      return true;
869
870   if (loop->control == nir_loop_control_dont_unroll)
871      return false;
872
873   nir_loop_info *li = loop->info;
874   unsigned max_iter = shader->options->max_unroll_iterations;
875   /* Unroll much more aggressively if it can hide load latency. */
876   if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop))
877      max_iter = shader->options->max_unroll_iterations_aggressive;
878   unsigned trip_count =
879      li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
880
881   if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter)
882      return true;
883
884   unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT;
885   unsigned cost = li->instr_cost * trip_count;
886
887   if (cost <= cost_limit && trip_count <= max_iter)
888      return true;
889
890   return false;
891}
892
893static bool
894process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
895              bool *unrolled_this_block);
896
897static bool
898process_loops_in_block(nir_shader *sh, struct exec_list *block,
899                       bool *has_nested_loop_out)
900{
901   /* We try to unroll as many loops in one pass as possible.
902    * E.g. we can safely unroll both loops in this block:
903    *
904    *    if (...) {
905    *       loop {...}
906    *    }
907    *
908    *    if (...) {
909    *       loop {...}
910    *    }
911    *
912    * Unrolling one loop doesn't affect the other one.
913    *
914    * On the other hand for block with:
915    *
916    *    loop {...}
917    *    ...
918    *    loop {...}
919    *
920    * It is unsafe to unroll both loops in one pass without taking
921    * complicating precautions, since the structure of the block would
922    * change after unrolling the first loop. So in such a case we leave
923    * the second loop for the next iteration of unrolling to handle.
924    */
925
926   bool progress = false;
927   bool unrolled_this_block = false;
928
929   foreach_list_typed(nir_cf_node, nested_node, node, block) {
930      if (process_loops(sh, nested_node,
931                        has_nested_loop_out, &unrolled_this_block)) {
932         progress = true;
933
934         /* If current node is unrolled we could not safely continue
935          * our iteration since we don't know the next node
936          * and it's hard to guarantee that we won't end up unrolling
937          * inner loop of the currently unrolled one, if such exists.
938          */
939         if (unrolled_this_block) {
940            break;
941         }
942      }
943   }
944
945   return progress;
946}
947
948static bool
949process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
950              bool *unrolled_this_block)
951{
952   bool progress = false;
953   bool has_nested_loop = false;
954   nir_loop *loop;
955
956   switch (cf_node->type) {
957   case nir_cf_node_block:
958      return progress;
959   case nir_cf_node_if: {
960      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
961      progress |= process_loops_in_block(sh, &if_stmt->then_list,
962                                         has_nested_loop_out);
963      progress |= process_loops_in_block(sh, &if_stmt->else_list,
964                                         has_nested_loop_out);
965      return progress;
966   }
967   case nir_cf_node_loop: {
968      loop = nir_cf_node_as_loop(cf_node);
969      progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop);
970
971      break;
972   }
973   default:
974      unreachable("unknown cf node type");
975   }
976
977   const bool unrolled_child_block = progress;
978
979   /* Don't attempt to unroll a second inner loop in this pass, wait until the
980    * next pass as we have altered the cf.
981    */
982   if (!progress && loop->control != nir_loop_control_dont_unroll) {
983
984      /* Remove the conditional break statements associated with all terminators
985       * that are associated with a fixed iteration count, except for the one
986       * associated with the limiting terminator--that one needs to stay, since
987       * it terminates the loop.
988       */
989      if (loop->info->limiting_terminator) {
990         list_for_each_entry_safe(nir_loop_terminator, t,
991                                  &loop->info->loop_terminator_list,
992                                  loop_terminator_link) {
993            if (t->exact_trip_count_unknown)
994               continue;
995
996            if (t != loop->info->limiting_terminator) {
997
998               /* Only delete the if-statement if the continue block is empty.
999                * We trust that nir_opt_if() does its job well enough to
1000                * remove all instructions from the continue block when possible.
1001                */
1002               nir_block *first_continue_from_blk = t->continue_from_then ?
1003                  nir_if_first_then_block(t->nif) :
1004                  nir_if_first_else_block(t->nif);
1005
1006               if (!(nir_cf_node_is_last(&first_continue_from_blk->cf_node) &&
1007                     exec_list_is_empty(&first_continue_from_blk->instr_list)))
1008                  continue;
1009
1010               /* Now delete the if */
1011               nir_cf_node_remove(&t->nif->cf_node);
1012
1013               /* Also remove it from the terminator list */
1014               list_del(&t->loop_terminator_link);
1015
1016               progress = true;
1017            }
1018         }
1019      }
1020
1021      /* Check for the classic
1022       *
1023       *    do {
1024       *        // ...
1025       *    } while (false)
1026       *
1027       * that is used to wrap multi-line macros. GLSL IR also wraps switch
1028       * statements in a loop like this.
1029       */
1030      if (loop->info->limiting_terminator == NULL &&
1031          !loop->info->complex_loop) {
1032
1033         nir_block *last_loop_blk = nir_loop_last_block(loop);
1034         if (nir_block_ends_in_break(last_loop_blk)) {
1035            progress = wrapper_unroll(loop);
1036            goto exit;
1037         }
1038
1039         /* If we were able to guess the loop iteration based on array access
1040          * then do a partial unroll.
1041          */
1042         unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1043         if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
1044             loop->info->guessed_trip_count &&
1045             check_unrolling_restrictions(sh, loop)) {
1046            partial_unroll(sh, loop, loop->info->guessed_trip_count);
1047            progress = true;
1048         }
1049      }
1050
1051      /* Intentionally don't consider exact_trip_count_known here.  When
1052       * max_trip_count is non-zero, it is the upper bound on the number of
1053       * times the loop will iterate, but the loop may iterate less.  For
1054       * example, the following loop will iterate 0 or 1 time:
1055       *
1056       *    for (i = 0; i < min(x, 1); i++) { ... }
1057       *
1058       * Trivial single-interation loops (e.g., do { ... } while (false)) and
1059       * trivial zero-iteration loops (e.g., while (false) { ... }) will have
1060       * already been handled.
1061       *
1062       * If the loop is known to execute at most once and meets the other
1063       * unrolling criteria, unroll it even if it has nested loops.
1064       *
1065       * It is unlikely that such loops exist in real shaders. GraphicsFuzz is
1066       * known to generate spurious loops that iterate exactly once.  It is
1067       * plausible that it could eventually start generating loops like the
1068       * example above, so it seems logical to defend against it now.
1069       */
1070      if (!loop->info->limiting_terminator ||
1071          (loop->info->max_trip_count != 1 && has_nested_loop))
1072         goto exit;
1073
1074      if (!check_unrolling_restrictions(sh, loop))
1075         goto exit;
1076
1077      if (loop->info->exact_trip_count_known) {
1078         simple_unroll(loop);
1079         progress = true;
1080      } else {
1081         /* Attempt to unroll loops with two terminators. */
1082         unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1083         if (num_lt == 2 &&
1084             !loop->info->limiting_terminator->exact_trip_count_unknown) {
1085            bool limiting_term_second = true;
1086            nir_loop_terminator *terminator =
1087               list_first_entry(&loop->info->loop_terminator_list,
1088                                nir_loop_terminator, loop_terminator_link);
1089
1090
1091            if (terminator->nif == loop->info->limiting_terminator->nif) {
1092               limiting_term_second = false;
1093               terminator =
1094                  list_last_entry(&loop->info->loop_terminator_list,
1095                                  nir_loop_terminator, loop_terminator_link);
1096            }
1097
1098            /* If the first terminator has a trip count of zero and is the
1099             * limiting terminator just do a simple unroll as the second
1100             * terminator can never be reached.
1101             */
1102            if (loop->info->max_trip_count == 0 && !limiting_term_second) {
1103               simple_unroll(loop);
1104            } else {
1105               complex_unroll(loop, terminator, limiting_term_second);
1106            }
1107            progress = true;
1108         }
1109
1110         if (num_lt == 1) {
1111            assert(loop->info->limiting_terminator->exact_trip_count_unknown);
1112            complex_unroll_single_terminator(loop);
1113            progress = true;
1114         }
1115      }
1116   }
1117
1118exit:
1119   *has_nested_loop_out = true;
1120   if (progress && !unrolled_child_block)
1121      *unrolled_this_block = true;
1122
1123   return progress;
1124}
1125
1126static bool
1127nir_opt_loop_unroll_impl(nir_function_impl *impl,
1128                         nir_variable_mode indirect_mask,
1129                         bool force_unroll_sampler_indirect)
1130{
1131   bool progress = false;
1132   nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask,
1133                        (int) force_unroll_sampler_indirect);
1134   nir_metadata_require(impl, nir_metadata_block_index);
1135
1136   bool has_nested_loop = false;
1137   progress |= process_loops_in_block(impl->function->shader, &impl->body,
1138                                      &has_nested_loop);
1139
1140   if (progress) {
1141      nir_metadata_preserve(impl, nir_metadata_none);
1142      nir_lower_regs_to_ssa_impl(impl);
1143   } else {
1144      nir_metadata_preserve(impl, nir_metadata_all);
1145   }
1146
1147   return progress;
1148}
1149
1150/**
1151 * indirect_mask specifies which type of indirectly accessed variables
1152 * should force loop unrolling.
1153 */
1154bool
1155nir_opt_loop_unroll(nir_shader *shader)
1156{
1157   bool progress = false;
1158
1159   bool force_unroll_sampler_indirect = shader->options->force_indirect_unrolling_sampler;
1160   nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
1161   nir_foreach_function(function, shader) {
1162      if (function->impl) {
1163         progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask,
1164                                              force_unroll_sampler_indirect);
1165      }
1166   }
1167   return progress;
1168}
1169