1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
28 
29 
30 /* This limit is chosen fairly arbitrarily.  GLSL IR max iteration is 32
31  * instructions. (Multiply counting nodes and magic number 5.)  But there is
32  * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
33  * to give about the same results. Around 5 instructions per node.  But some
34  * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
35  * we set it to 26.
36  */
37 #define LOOP_UNROLL_LIMIT 26
38 
39 /* Prepare this loop for unrolling by first converting to lcssa and then
40  * converting the phis from the top level of the loop body to regs.
41  * Partially converting out of SSA allows us to unroll the loop without having
42  * to keep track of and update phis along the way which gets tricky and
43  * doesn't add much value over converting to regs.
44  *
45  * The loop may have a jump instruction at the end of the loop which does
46  * nothing.  Once we're out of SSA, we can safely delete it so we don't have
47  * to deal with it later.
48  */
49 static void
loop_prepare_for_unroll(nir_loop *loop)50 loop_prepare_for_unroll(nir_loop *loop)
51 {
52    nir_rematerialize_derefs_in_use_blocks_impl(
53       nir_cf_node_get_function(&loop->cf_node));
54 
55    nir_convert_loop_to_lcssa(loop);
56 
57    /* Lower phis at the top level of the loop body */
58    foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
59       if (nir_cf_node_block == node->type) {
60          nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
61       }
62    }
63 
64    /* Lower phis after the loop */
65    nir_block *block_after_loop =
66       nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
67 
68    nir_lower_phis_to_regs_block(block_after_loop);
69 
70    /* Remove jump if it's the last instruction in the loop */
71    nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
72    if (last_instr && last_instr->type == nir_instr_type_jump) {
73       nir_instr_remove(last_instr);
74    }
75 }
76 
77 static void
get_first_blocks_in_terminator(nir_loop_terminator *term, nir_block **first_break_block, nir_block **first_continue_block)78 get_first_blocks_in_terminator(nir_loop_terminator *term,
79                                nir_block **first_break_block,
80                                nir_block **first_continue_block)
81 {
82    if (term->continue_from_then) {
83       *first_continue_block = nir_if_first_then_block(term->nif);
84       *first_break_block = nir_if_first_else_block(term->nif);
85    } else {
86       *first_continue_block = nir_if_first_else_block(term->nif);
87       *first_break_block = nir_if_first_then_block(term->nif);
88    }
89 }
90 
91 /**
92  * Unroll a loop where we know exactly how many iterations there are and there
93  * is only a single exit point.  Note here we can unroll loops with multiple
94  * theoretical exits that only have a single terminating exit that we always
95  * know is the "real" exit.
96  *
97  *     loop {
98  *         ...instrs...
99  *     }
100  *
101  * And the iteration count is 3, the output will be:
102  *
103  *     ...instrs... ...instrs... ...instrs...
104  */
105 static void
simple_unroll(nir_loop *loop)106 simple_unroll(nir_loop *loop)
107 {
108    nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
109    assert(nir_is_trivial_loop_if(limiting_term->nif,
110                                  limiting_term->break_block));
111 
112    loop_prepare_for_unroll(loop);
113 
114    /* Skip over loop terminator and get the loop body. */
115    list_for_each_entry(nir_loop_terminator, terminator,
116                        &loop->info->loop_terminator_list,
117                        loop_terminator_link) {
118 
119       /* Remove all but the limiting terminator as we know the other exit
120        * conditions can never be met. Note we need to extract any instructions
121        * in the continue from branch and insert then into the loop body before
122        * removing it.
123        */
124       if (terminator->nif != limiting_term->nif) {
125          nir_block *first_break_block;
126          nir_block *first_continue_block;
127          get_first_blocks_in_terminator(terminator, &first_break_block,
128                                         &first_continue_block);
129 
130          assert(nir_is_trivial_loop_if(terminator->nif,
131                                        terminator->break_block));
132 
133          nir_cf_list continue_from_lst;
134          nir_cf_extract(&continue_from_lst,
135                         nir_before_block(first_continue_block),
136                         nir_after_block(terminator->continue_from_block));
137          nir_cf_reinsert(&continue_from_lst,
138                          nir_after_cf_node(&terminator->nif->cf_node));
139 
140          nir_cf_node_remove(&terminator->nif->cf_node);
141       }
142    }
143 
144    nir_block *first_break_block;
145    nir_block *first_continue_block;
146    get_first_blocks_in_terminator(limiting_term, &first_break_block,
147                                   &first_continue_block);
148 
149    /* Pluck out the loop header */
150    nir_block *header_blk = nir_loop_first_block(loop);
151    nir_cf_list lp_header;
152    nir_cf_extract(&lp_header, nir_before_block(header_blk),
153                   nir_before_cf_node(&limiting_term->nif->cf_node));
154 
155    /* Add the continue from block of the limiting terminator to the loop body
156     */
157    nir_cf_list continue_from_lst;
158    nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
159                   nir_after_block(limiting_term->continue_from_block));
160    nir_cf_reinsert(&continue_from_lst,
161                    nir_after_cf_node(&limiting_term->nif->cf_node));
162 
163    /* Pluck out the loop body */
164    nir_cf_list loop_body;
165    nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
166                   nir_after_block(nir_loop_last_block(loop)));
167 
168    struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
169 
170    /* Clone the loop header and insert before the loop */
171    nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
172                                   nir_before_cf_node(&loop->cf_node),
173                                   remap_table);
174 
175    for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
176       /* Clone loop body and insert before the loop */
177       nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
178                                      nir_before_cf_node(&loop->cf_node),
179                                      remap_table);
180 
181       /* Clone loop header and insert after loop body */
182       nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
183                                      nir_before_cf_node(&loop->cf_node),
184                                      remap_table);
185    }
186 
187    /* Remove the break from the loop terminator and add instructions from
188     * the break block after the unrolled loop.
189     */
190    nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
191    nir_instr_remove(break_instr);
192    nir_cf_list break_list;
193    nir_cf_extract(&break_list, nir_before_block(first_break_block),
194                   nir_after_block(limiting_term->break_block));
195 
196    /* Clone so things get properly remapped */
197    nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
198                                   nir_before_cf_node(&loop->cf_node),
199                                   remap_table);
200 
201    /* Remove the loop */
202    nir_cf_node_remove(&loop->cf_node);
203 
204    /* Delete the original loop body, break block & header */
205    nir_cf_delete(&lp_header);
206    nir_cf_delete(&loop_body);
207    nir_cf_delete(&break_list);
208 
209    _mesa_hash_table_destroy(remap_table, NULL);
210 }
211 
212 static void
move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)213 move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
214 {
215    /* Move the rest of the loop inside the continue-from-block */
216    nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
217 
218    /* Remove the break */
219    nir_instr_remove(nir_block_last_instr(term->break_block));
220 }
221 
222 static nir_cursor
get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)223 get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
224 {
225    if (node->type == nir_cf_node_loop) {
226       return nir_before_cf_node(node);
227    } else {
228       nir_if *if_stmt = nir_cf_node_as_if(node);
229       if (continue_from_then) {
230          return nir_after_block(nir_if_last_then_block(if_stmt));
231       } else {
232          return nir_after_block(nir_if_last_else_block(if_stmt));
233       }
234    }
235 }
236 
237 static nir_cf_node *
complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term, nir_cf_list *lp_header, nir_cf_list *lp_body, struct hash_table *remap_table, unsigned num_times_to_clone)238 complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
239                          nir_cf_list *lp_header, nir_cf_list *lp_body,
240                          struct hash_table *remap_table,
241                          unsigned num_times_to_clone)
242 {
243    /* In the terminator that we have no trip count for move everything after
244     * the terminator into the continue from branch.
245     */
246    nir_cf_list loop_end;
247    nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
248                   nir_after_block(nir_loop_last_block(loop)));
249    move_cf_list_into_loop_term(&loop_end, unlimit_term);
250 
251    /* Pluck out the loop body. */
252    nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
253                   nir_after_block(nir_loop_last_block(loop)));
254 
255    /* Set unroll_loc to the loop as we will insert the unrolled loop before it
256     */
257    nir_cf_node *unroll_loc = &loop->cf_node;
258 
259    /* Temp list to store the cloned loop as we unroll */
260    nir_cf_list unrolled_lp_body;
261 
262    for (unsigned i = 0; i < num_times_to_clone; i++) {
263 
264       nir_cursor cursor =
265          get_complex_unroll_insert_location(unroll_loc,
266                                             unlimit_term->continue_from_then);
267 
268       /* Clone loop header and insert in if branch */
269       nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
270                                      cursor, remap_table);
271 
272       cursor =
273          get_complex_unroll_insert_location(unroll_loc,
274                                             unlimit_term->continue_from_then);
275 
276       /* Clone loop body */
277       nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
278                         remap_table);
279 
280       unroll_loc = exec_node_data(nir_cf_node,
281                                   exec_list_get_tail(&unrolled_lp_body.list),
282                                   node);
283       assert(unroll_loc->type == nir_cf_node_block &&
284              exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
285 
286       /* Get the unrolled if node */
287       unroll_loc = nir_cf_node_prev(unroll_loc);
288 
289       /* Insert unrolled loop body */
290       nir_cf_reinsert(&unrolled_lp_body, cursor);
291    }
292 
293    return unroll_loc;
294 }
295 
296 /**
297  * Unroll a loop with two exists when the trip count of one of the exits is
298  * unknown.  If continue_from_then is true, the loop is repeated only when the
299  * "then" branch of the if is taken; otherwise it is repeated only
300  * when the "else" branch of the if is taken.
301  *
302  * For example, if the input is:
303  *
304  *      loop {
305  *         ...phis/condition...
306  *         if condition {
307  *            ...then instructions...
308  *         } else {
309  *            ...continue instructions...
310  *            break
311  *         }
312  *         ...body...
313  *      }
314  *
315  * And the iteration count is 3, and unlimit_term->continue_from_then is true,
316  * then the output will be:
317  *
318  *      ...condition...
319  *      if condition {
320  *         ...then instructions...
321  *         ...body...
322  *         if condition {
323  *            ...then instructions...
324  *            ...body...
325  *            if condition {
326  *               ...then instructions...
327  *               ...body...
328  *            } else {
329  *               ...continue instructions...
330  *            }
331  *         } else {
332  *            ...continue instructions...
333  *         }
334  *      } else {
335  *         ...continue instructions...
336  *      }
337  */
338 static void
complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term, bool limiting_term_second)339 complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
340                bool limiting_term_second)
341 {
342    assert(nir_is_trivial_loop_if(unlimit_term->nif,
343                                  unlimit_term->break_block));
344 
345    nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
346    assert(nir_is_trivial_loop_if(limiting_term->nif,
347                                  limiting_term->break_block));
348 
349    loop_prepare_for_unroll(loop);
350 
351    nir_block *header_blk = nir_loop_first_block(loop);
352 
353    nir_cf_list lp_header;
354    nir_cf_list limit_break_list;
355    unsigned num_times_to_clone;
356    if (limiting_term_second) {
357       /* Pluck out the loop header */
358       nir_cf_extract(&lp_header, nir_before_block(header_blk),
359                      nir_before_cf_node(&unlimit_term->nif->cf_node));
360 
361       /* We need some special handling when its the second terminator causing
362        * us to exit the loop for example:
363        *
364        *   for (int i = 0; i < uniform_lp_count; i++) {
365        *      colour = vec4(0.0, 1.0, 0.0, 1.0);
366        *
367        *      if (i == 1) {
368        *         break;
369        *      }
370        *      ... any further code is unreachable after i == 1 ...
371        *   }
372        */
373       nir_cf_list after_lt;
374       nir_if *limit_if = limiting_term->nif;
375       nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
376                      nir_after_block(nir_loop_last_block(loop)));
377       move_cf_list_into_loop_term(&after_lt, limiting_term);
378 
379       /* Because the trip count is the number of times we pass over the entire
380        * loop before hitting a break when the second terminator is the
381        * limiting terminator we can actually execute code inside the loop when
382        * trip count == 0 e.g. the code above the break.  So we need to bump
383        * the trip_count in order for the code below to clone anything.  When
384        * trip count == 1 we execute the code above the break twice and the
385        * code below it once so we need clone things twice and so on.
386        */
387       num_times_to_clone = loop->info->max_trip_count + 1;
388    } else {
389       /* Pluck out the loop header */
390       nir_cf_extract(&lp_header, nir_before_block(header_blk),
391                      nir_before_cf_node(&limiting_term->nif->cf_node));
392 
393       nir_block *first_break_block;
394       nir_block *first_continue_block;
395       get_first_blocks_in_terminator(limiting_term, &first_break_block,
396                                      &first_continue_block);
397 
398       /* Remove the break then extract instructions from the break block so we
399        * can insert them in the innermost else of the unrolled loop.
400        */
401       nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
402       nir_instr_remove(break_instr);
403       nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
404                      nir_after_block(limiting_term->break_block));
405 
406       nir_cf_list continue_list;
407       nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
408                      nir_after_block(limiting_term->continue_from_block));
409 
410       nir_cf_reinsert(&continue_list,
411                       nir_after_cf_node(&limiting_term->nif->cf_node));
412 
413       nir_cf_node_remove(&limiting_term->nif->cf_node);
414 
415       num_times_to_clone = loop->info->max_trip_count;
416    }
417 
418    struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
419 
420    nir_cf_list lp_body;
421    nir_cf_node *unroll_loc =
422       complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
423                                remap_table, num_times_to_clone);
424 
425    if (!limiting_term_second) {
426       assert(unroll_loc->type == nir_cf_node_if);
427 
428       nir_cursor cursor =
429          get_complex_unroll_insert_location(unroll_loc,
430                                             unlimit_term->continue_from_then);
431 
432       /* Clone loop header and insert in if branch */
433       nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
434                                      cursor, remap_table);
435 
436       cursor =
437          get_complex_unroll_insert_location(unroll_loc,
438                                             unlimit_term->continue_from_then);
439 
440       /* Clone so things get properly remapped, and insert break block from
441        * the limiting terminator.
442        */
443       nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
444                                      cursor, remap_table);
445 
446       nir_cf_delete(&limit_break_list);
447    }
448 
449    /* The loop has been unrolled so remove it. */
450    nir_cf_node_remove(&loop->cf_node);
451 
452    /* Delete the original loop header and body */
453    nir_cf_delete(&lp_header);
454    nir_cf_delete(&lp_body);
455 
456    _mesa_hash_table_destroy(remap_table, NULL);
457 }
458 
459 /**
460  * Unroll loops where we only have a single terminator but the exact trip
461  * count is unknown. For example:
462  *
463  *    for (int i = 0; i < imin(x, 4); i++)
464  *       ...
465  */
466 static void
complex_unroll_single_terminator(nir_loop *loop)467 complex_unroll_single_terminator(nir_loop *loop)
468 {
469    assert(list_length(&loop->info->loop_terminator_list) == 1);
470    assert(loop->info->limiting_terminator);
471    assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
472                                  loop->info->limiting_terminator->break_block));
473 
474    nir_loop_terminator *terminator = loop->info->limiting_terminator;
475 
476    loop_prepare_for_unroll(loop);
477 
478    /* Pluck out the loop header */
479    nir_cf_list lp_header;
480    nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
481                   nir_before_cf_node(&terminator->nif->cf_node));
482 
483    struct hash_table *remap_table =
484       _mesa_hash_table_create(NULL, _mesa_hash_pointer,
485                               _mesa_key_pointer_equal);
486 
487    /* We need to clone the loop one extra time in order to clone the lcssa
488     * vars for the last iteration (they are inside the following ifs break
489     * branch). We leave other passes to clean up this redundant if.
490     */
491    unsigned num_times_to_clone = loop->info->max_trip_count + 1;
492 
493    nir_cf_list lp_body;
494    UNUSED nir_cf_node *unroll_loc =
495       complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
496                                remap_table, num_times_to_clone);
497 
498    assert(unroll_loc->type == nir_cf_node_if);
499 
500    /* We need to clone the lcssa vars in order to insert them on both sides
501     * of the if in the last iteration/if-statement. Otherwise the optimisation
502     * passes will have trouble optimising the unrolled if ladder.
503     */
504    nir_cursor cursor =
505       get_complex_unroll_insert_location(unroll_loc,
506                                          terminator->continue_from_then);
507 
508    nir_if *if_stmt = nir_cf_node_as_if(unroll_loc);
509    nir_cursor start_cursor;
510    nir_cursor end_cursor;
511    if (terminator->continue_from_then) {
512       start_cursor = nir_before_block(nir_if_first_else_block(if_stmt));
513       end_cursor = nir_after_block(nir_if_last_else_block(if_stmt));
514    } else {
515       start_cursor = nir_before_block(nir_if_first_then_block(if_stmt));
516       end_cursor = nir_after_block(nir_if_last_then_block(if_stmt));
517    }
518 
519    nir_cf_list lcssa_list;
520    nir_cf_extract(&lcssa_list, start_cursor, end_cursor);
521 
522    /* Insert the cloned vars in the last continue branch */
523    nir_cf_list_clone_and_reinsert(&lcssa_list, loop->cf_node.parent,
524                                   cursor, remap_table);
525 
526    start_cursor = terminator->continue_from_then ?
527       nir_before_block(nir_if_first_else_block(if_stmt)) :
528       nir_before_block(nir_if_first_then_block(if_stmt));
529 
530    /* Reinsert the cloned vars back where they came from */
531    nir_cf_reinsert(&lcssa_list, start_cursor);
532 
533    /* Delete the original loop header and body */
534    nir_cf_delete(&lp_header);
535    nir_cf_delete(&lp_body);
536 
537    /* The original loop has been replaced so remove it. */
538    nir_cf_node_remove(&loop->cf_node);
539 
540    _mesa_hash_table_destroy(remap_table, NULL);
541 }
542 
543 /* Unrolls the classic wrapper loops e.g
544  *
545  *    do {
546  *        // ...
547  *    } while (false)
548  */
549 static bool
wrapper_unroll(nir_loop *loop)550 wrapper_unroll(nir_loop *loop)
551 {
552    if (!list_is_empty(&loop->info->loop_terminator_list)) {
553 
554       /* Unrolling a loop with a large number of exits can result in a
555        * large inrease in register pressure. For now we just skip
556        * unrolling if we have more than 3 exits (not including the break
557        * at the end of the loop).
558        *
559        * TODO: Most loops that fit this pattern are simply switch
560        * statements that are converted to a loop to take advantage of
561        * exiting jump instruction handling. In this case we could make
562        * use of a binary seach pattern like we do in
563        * nir_lower_indirect_derefs(), this should allow us to unroll the
564        * loops in an optimal way and should also avoid some of the
565        * register pressure that comes from simply nesting the
566        * terminators one after the other.
567        */
568       if (list_length(&loop->info->loop_terminator_list) > 3)
569          return false;
570 
571       loop_prepare_for_unroll(loop);
572 
573       nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
574       list_for_each_entry(nir_loop_terminator, terminator,
575                           &loop->info->loop_terminator_list,
576                           loop_terminator_link) {
577 
578          /* Remove break from the terminator */
579          nir_instr *break_instr =
580             nir_block_last_instr(terminator->break_block);
581          nir_instr_remove(break_instr);
582 
583          /* Pluck out the loop body. */
584          nir_cf_list loop_body;
585          nir_cf_extract(&loop_body,
586                         nir_after_cf_node(&terminator->nif->cf_node),
587                         loop_end);
588 
589          /* Reinsert loop body into continue from block */
590          nir_cf_reinsert(&loop_body,
591                          nir_after_block(terminator->continue_from_block));
592 
593          loop_end = terminator->continue_from_then ?
594            nir_after_block(nir_if_last_then_block(terminator->nif)) :
595            nir_after_block(nir_if_last_else_block(terminator->nif));
596       }
597    } else {
598       loop_prepare_for_unroll(loop);
599    }
600 
601    /* Pluck out the loop body. */
602    nir_cf_list loop_body;
603    nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
604                   nir_after_block(nir_loop_last_block(loop)));
605 
606    /* Reinsert loop body after the loop */
607    nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
608 
609    /* The loop has been unrolled so remove it. */
610    nir_cf_node_remove(&loop->cf_node);
611 
612    return true;
613 }
614 
615 static bool
is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref, unsigned trip_count)616 is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
617                         unsigned trip_count)
618 {
619    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
620       if (d->deref_type != nir_deref_type_array)
621          continue;
622 
623       nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
624       nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
625       if (!nir_srcs_equal(d->arr.index, src))
626          continue;
627 
628       nir_deref_instr *parent = nir_deref_instr_parent(d);
629       assert(glsl_type_is_array(parent->type) ||
630              glsl_type_is_matrix(parent->type) ||
631              glsl_type_is_vector(parent->type));
632 
633       /* We have already unrolled the loop and the new one will be imbedded in
634        * the innermost continue branch. So unless the array is greater than
635        * the trip count any iteration over the loop will be an out of bounds
636        * access of the array.
637        */
638       unsigned length = glsl_type_is_vector(parent->type) ?
639                         glsl_get_vector_elements(parent->type) :
640                         glsl_get_length(parent->type);
641       return length <= trip_count;
642    }
643 
644    return false;
645 }
646 
647 /* If we know an array access is going to be out of bounds remove or replace
648  * the access with an undef. This can later result in the entire loop being
649  * removed by nir_opt_dead_cf().
650  */
651 static void
remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop, nir_loop_terminator *term, nir_cf_list *lp_header, nir_cf_list *lp_body, unsigned trip_count)652 remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
653                                    nir_loop_terminator *term,
654                                    nir_cf_list *lp_header,
655                                    nir_cf_list *lp_body,
656                                    unsigned trip_count)
657 {
658    if (!loop->info->guessed_trip_count)
659       return;
660 
661    /* Temporarily recreate the original loop so we can alter it */
662    nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
663    nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
664 
665    nir_builder b;
666    nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node));
667 
668    nir_foreach_block_in_cf_node(block, &loop->cf_node) {
669       nir_foreach_instr_safe(instr, block) {
670          if (instr->type != nir_instr_type_intrinsic)
671             continue;
672 
673          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
674 
675          /* Check for arrays variably-indexed by a loop induction variable.
676           * If this access is out of bounds remove the instruction or replace
677           * its use with an undefined instruction.
678           * If the loop is no longer useful we leave it for the appropriate
679           * pass to clean it up for us.
680           */
681          if (intrin->intrinsic == nir_intrinsic_load_deref ||
682              intrin->intrinsic == nir_intrinsic_store_deref ||
683              intrin->intrinsic == nir_intrinsic_copy_deref) {
684 
685             if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
686                                         trip_count)) {
687                if (intrin->intrinsic == nir_intrinsic_load_deref) {
688                   nir_ssa_def *undef =
689                      nir_ssa_undef(&b, intrin->dest.ssa.num_components,
690                                    intrin->dest.ssa.bit_size);
691                   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
692                                            undef);
693                } else {
694                   nir_instr_remove(instr);
695                   continue;
696                }
697             }
698 
699             if (intrin->intrinsic == nir_intrinsic_copy_deref &&
700                 is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
701                                         trip_count)) {
702                nir_instr_remove(instr);
703             }
704          }
705       }
706    }
707 
708    /* Now that we are done extract the loop header and body again */
709    nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
710                   nir_before_cf_node(&term->nif->cf_node));
711    nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
712                   nir_after_block(nir_loop_last_block(loop)));
713 }
714 
715 /* Partially unrolls loops that don't have a known trip count.
716  */
717 static void
partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)718 partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
719 {
720    assert(list_length(&loop->info->loop_terminator_list) == 1);
721 
722    nir_loop_terminator *terminator =
723       list_first_entry(&loop->info->loop_terminator_list,
724                         nir_loop_terminator, loop_terminator_link);
725 
726    assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
727 
728    loop_prepare_for_unroll(loop);
729 
730    /* Pluck out the loop header */
731    nir_cf_list lp_header;
732    nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
733                   nir_before_cf_node(&terminator->nif->cf_node));
734 
735    struct hash_table *remap_table =
736       _mesa_hash_table_create(NULL, _mesa_hash_pointer,
737                               _mesa_key_pointer_equal);
738 
739    nir_cf_list lp_body;
740    nir_cf_node *unroll_loc =
741       complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
742                                remap_table, trip_count);
743 
744    /* Attempt to remove out of bounds array access */
745    remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
746                                       &lp_body, trip_count);
747 
748    nir_cursor cursor =
749       get_complex_unroll_insert_location(unroll_loc,
750                                          terminator->continue_from_then);
751 
752    /* Reinsert the loop in the innermost nested continue branch of the unrolled
753     * loop.
754     */
755    nir_loop *new_loop = nir_loop_create(shader);
756    nir_cf_node_insert(cursor, &new_loop->cf_node);
757    new_loop->partially_unrolled = true;
758 
759    /* Clone loop header and insert into new loop */
760    nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
761                                   nir_after_cf_list(&new_loop->body),
762                                   remap_table);
763 
764    /* Clone loop body and insert into new loop */
765    nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
766                                   nir_after_cf_list(&new_loop->body),
767                                   remap_table);
768 
769    /* Insert break back into terminator */
770    nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
771    nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop));
772    if (terminator->continue_from_then) {
773       nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr);
774    } else {
775       nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr);
776    }
777 
778    /* Delete the original loop header and body */
779    nir_cf_delete(&lp_header);
780    nir_cf_delete(&lp_body);
781 
782    /* The original loop has been replaced so remove it. */
783    nir_cf_node_remove(&loop->cf_node);
784 
785    _mesa_hash_table_destroy(remap_table, NULL);
786 }
787 
788 static bool
is_indirect_load(nir_instr *instr)789 is_indirect_load(nir_instr *instr)
790 {
791    if (instr->type == nir_instr_type_intrinsic) {
792       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
793 
794       if ((intrin->intrinsic == nir_intrinsic_load_ubo ||
795            intrin->intrinsic == nir_intrinsic_load_ssbo) &&
796           !nir_src_is_const(intrin->src[1])) {
797          return true;
798       }
799 
800       if (intrin->intrinsic == nir_intrinsic_load_global)
801          return true;
802 
803       if (intrin->intrinsic == nir_intrinsic_load_deref ||
804           intrin->intrinsic == nir_intrinsic_store_deref) {
805          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
806          nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global;
807          if (!nir_deref_mode_may_be(deref, mem_modes))
808             return false;
809          while (deref) {
810             if ((deref->deref_type == nir_deref_type_array ||
811                  deref->deref_type == nir_deref_type_ptr_as_array) &&
812                 !nir_src_is_const(deref->arr.index)) {
813                return true;
814             }
815             deref = nir_deref_instr_parent(deref);
816          }
817       }
818    } else if (instr->type == nir_instr_type_tex) {
819       nir_tex_instr *tex = nir_instr_as_tex(instr);
820 
821       for (unsigned i = 0; i < tex->num_srcs; i++) {
822          if (!nir_src_is_const(tex->src[i].src))
823             return true;
824       }
825    }
826 
827    return false;
828 }
829 
830 static bool
can_pipeline_loads(nir_loop *loop)831 can_pipeline_loads(nir_loop *loop)
832 {
833    if (!loop->info->exact_trip_count_known)
834       return false;
835 
836    bool interesting_loads = false;
837 
838    foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
839       if (cf_node == &loop->info->limiting_terminator->nif->cf_node)
840          continue;
841 
842       /* Control flow usually prevents useful scheduling */
843       if (cf_node->type != nir_cf_node_block)
844          return false;
845 
846       if (interesting_loads)
847          continue;
848 
849       nir_block *block = nir_cf_node_as_block(cf_node);
850       nir_foreach_instr(instr, block) {
851          if (is_indirect_load(instr)) {
852             interesting_loads = true;
853             break;
854          }
855       }
856    }
857 
858    return interesting_loads;
859 }
860 
861 /*
862  * Returns true if we should unroll the loop, otherwise false.
863  */
864 static bool
check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)865 check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
866 {
867    if (loop->control == nir_loop_control_unroll)
868       return true;
869 
870    if (loop->control == nir_loop_control_dont_unroll)
871       return false;
872 
873    nir_loop_info *li = loop->info;
874    unsigned max_iter = shader->options->max_unroll_iterations;
875    /* Unroll much more aggressively if it can hide load latency. */
876    if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop))
877       max_iter = shader->options->max_unroll_iterations_aggressive;
878    unsigned trip_count =
879       li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
880 
881    if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter)
882       return true;
883 
884    unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT;
885    unsigned cost = li->instr_cost * trip_count;
886 
887    if (cost <= cost_limit && trip_count <= max_iter)
888       return true;
889 
890    return false;
891 }
892 
893 static bool
894 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
895               bool *unrolled_this_block);
896 
897 static bool
process_loops_in_block(nir_shader *sh, struct exec_list *block, bool *has_nested_loop_out)898 process_loops_in_block(nir_shader *sh, struct exec_list *block,
899                        bool *has_nested_loop_out)
900 {
901    /* We try to unroll as many loops in one pass as possible.
902     * E.g. we can safely unroll both loops in this block:
903     *
904     *    if (...) {
905     *       loop {...}
906     *    }
907     *
908     *    if (...) {
909     *       loop {...}
910     *    }
911     *
912     * Unrolling one loop doesn't affect the other one.
913     *
914     * On the other hand for block with:
915     *
916     *    loop {...}
917     *    ...
918     *    loop {...}
919     *
920     * It is unsafe to unroll both loops in one pass without taking
921     * complicating precautions, since the structure of the block would
922     * change after unrolling the first loop. So in such a case we leave
923     * the second loop for the next iteration of unrolling to handle.
924     */
925 
926    bool progress = false;
927    bool unrolled_this_block = false;
928 
929    foreach_list_typed(nir_cf_node, nested_node, node, block) {
930       if (process_loops(sh, nested_node,
931                         has_nested_loop_out, &unrolled_this_block)) {
932          progress = true;
933 
934          /* If current node is unrolled we could not safely continue
935           * our iteration since we don't know the next node
936           * and it's hard to guarantee that we won't end up unrolling
937           * inner loop of the currently unrolled one, if such exists.
938           */
939          if (unrolled_this_block) {
940             break;
941          }
942       }
943    }
944 
945    return progress;
946 }
947 
948 static bool
process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out, bool *unrolled_this_block)949 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
950               bool *unrolled_this_block)
951 {
952    bool progress = false;
953    bool has_nested_loop = false;
954    nir_loop *loop;
955 
956    switch (cf_node->type) {
957    case nir_cf_node_block:
958       return progress;
959    case nir_cf_node_if: {
960       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
961       progress |= process_loops_in_block(sh, &if_stmt->then_list,
962                                          has_nested_loop_out);
963       progress |= process_loops_in_block(sh, &if_stmt->else_list,
964                                          has_nested_loop_out);
965       return progress;
966    }
967    case nir_cf_node_loop: {
968       loop = nir_cf_node_as_loop(cf_node);
969       progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop);
970 
971       break;
972    }
973    default:
974       unreachable("unknown cf node type");
975    }
976 
977    const bool unrolled_child_block = progress;
978 
979    /* Don't attempt to unroll a second inner loop in this pass, wait until the
980     * next pass as we have altered the cf.
981     */
982    if (!progress && loop->control != nir_loop_control_dont_unroll) {
983 
984       /* Remove the conditional break statements associated with all terminators
985        * that are associated with a fixed iteration count, except for the one
986        * associated with the limiting terminator--that one needs to stay, since
987        * it terminates the loop.
988        */
989       if (loop->info->limiting_terminator) {
990          list_for_each_entry_safe(nir_loop_terminator, t,
991                                   &loop->info->loop_terminator_list,
992                                   loop_terminator_link) {
993             if (t->exact_trip_count_unknown)
994                continue;
995 
996             if (t != loop->info->limiting_terminator) {
997 
998                /* Only delete the if-statement if the continue block is empty.
999                 * We trust that nir_opt_if() does its job well enough to
1000                 * remove all instructions from the continue block when possible.
1001                 */
1002                nir_block *first_continue_from_blk = t->continue_from_then ?
1003                   nir_if_first_then_block(t->nif) :
1004                   nir_if_first_else_block(t->nif);
1005 
1006                if (!(nir_cf_node_is_last(&first_continue_from_blk->cf_node) &&
1007                      exec_list_is_empty(&first_continue_from_blk->instr_list)))
1008                   continue;
1009 
1010                /* Now delete the if */
1011                nir_cf_node_remove(&t->nif->cf_node);
1012 
1013                /* Also remove it from the terminator list */
1014                list_del(&t->loop_terminator_link);
1015 
1016                progress = true;
1017             }
1018          }
1019       }
1020 
1021       /* Check for the classic
1022        *
1023        *    do {
1024        *        // ...
1025        *    } while (false)
1026        *
1027        * that is used to wrap multi-line macros. GLSL IR also wraps switch
1028        * statements in a loop like this.
1029        */
1030       if (loop->info->limiting_terminator == NULL &&
1031           !loop->info->complex_loop) {
1032 
1033          nir_block *last_loop_blk = nir_loop_last_block(loop);
1034          if (nir_block_ends_in_break(last_loop_blk)) {
1035             progress = wrapper_unroll(loop);
1036             goto exit;
1037          }
1038 
1039          /* If we were able to guess the loop iteration based on array access
1040           * then do a partial unroll.
1041           */
1042          unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1043          if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
1044              loop->info->guessed_trip_count &&
1045              check_unrolling_restrictions(sh, loop)) {
1046             partial_unroll(sh, loop, loop->info->guessed_trip_count);
1047             progress = true;
1048          }
1049       }
1050 
1051       /* Intentionally don't consider exact_trip_count_known here.  When
1052        * max_trip_count is non-zero, it is the upper bound on the number of
1053        * times the loop will iterate, but the loop may iterate less.  For
1054        * example, the following loop will iterate 0 or 1 time:
1055        *
1056        *    for (i = 0; i < min(x, 1); i++) { ... }
1057        *
1058        * Trivial single-interation loops (e.g., do { ... } while (false)) and
1059        * trivial zero-iteration loops (e.g., while (false) { ... }) will have
1060        * already been handled.
1061        *
1062        * If the loop is known to execute at most once and meets the other
1063        * unrolling criteria, unroll it even if it has nested loops.
1064        *
1065        * It is unlikely that such loops exist in real shaders. GraphicsFuzz is
1066        * known to generate spurious loops that iterate exactly once.  It is
1067        * plausible that it could eventually start generating loops like the
1068        * example above, so it seems logical to defend against it now.
1069        */
1070       if (!loop->info->limiting_terminator ||
1071           (loop->info->max_trip_count != 1 && has_nested_loop))
1072          goto exit;
1073 
1074       if (!check_unrolling_restrictions(sh, loop))
1075          goto exit;
1076 
1077       if (loop->info->exact_trip_count_known) {
1078          simple_unroll(loop);
1079          progress = true;
1080       } else {
1081          /* Attempt to unroll loops with two terminators. */
1082          unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1083          if (num_lt == 2 &&
1084              !loop->info->limiting_terminator->exact_trip_count_unknown) {
1085             bool limiting_term_second = true;
1086             nir_loop_terminator *terminator =
1087                list_first_entry(&loop->info->loop_terminator_list,
1088                                 nir_loop_terminator, loop_terminator_link);
1089 
1090 
1091             if (terminator->nif == loop->info->limiting_terminator->nif) {
1092                limiting_term_second = false;
1093                terminator =
1094                   list_last_entry(&loop->info->loop_terminator_list,
1095                                   nir_loop_terminator, loop_terminator_link);
1096             }
1097 
1098             /* If the first terminator has a trip count of zero and is the
1099              * limiting terminator just do a simple unroll as the second
1100              * terminator can never be reached.
1101              */
1102             if (loop->info->max_trip_count == 0 && !limiting_term_second) {
1103                simple_unroll(loop);
1104             } else {
1105                complex_unroll(loop, terminator, limiting_term_second);
1106             }
1107             progress = true;
1108          }
1109 
1110          if (num_lt == 1) {
1111             assert(loop->info->limiting_terminator->exact_trip_count_unknown);
1112             complex_unroll_single_terminator(loop);
1113             progress = true;
1114          }
1115       }
1116    }
1117 
1118 exit:
1119    *has_nested_loop_out = true;
1120    if (progress && !unrolled_child_block)
1121       *unrolled_this_block = true;
1122 
1123    return progress;
1124 }
1125 
1126 static bool
nir_opt_loop_unroll_impl(nir_function_impl *impl, nir_variable_mode indirect_mask, bool force_unroll_sampler_indirect)1127 nir_opt_loop_unroll_impl(nir_function_impl *impl,
1128                          nir_variable_mode indirect_mask,
1129                          bool force_unroll_sampler_indirect)
1130 {
1131    bool progress = false;
1132    nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask,
1133                         (int) force_unroll_sampler_indirect);
1134    nir_metadata_require(impl, nir_metadata_block_index);
1135 
1136    bool has_nested_loop = false;
1137    progress |= process_loops_in_block(impl->function->shader, &impl->body,
1138                                       &has_nested_loop);
1139 
1140    if (progress) {
1141       nir_metadata_preserve(impl, nir_metadata_none);
1142       nir_lower_regs_to_ssa_impl(impl);
1143    } else {
1144       nir_metadata_preserve(impl, nir_metadata_all);
1145    }
1146 
1147    return progress;
1148 }
1149 
1150 /**
1151  * indirect_mask specifies which type of indirectly accessed variables
1152  * should force loop unrolling.
1153  */
1154 bool
nir_opt_loop_unroll(nir_shader *shader)1155 nir_opt_loop_unroll(nir_shader *shader)
1156 {
1157    bool progress = false;
1158 
1159    bool force_unroll_sampler_indirect = shader->options->force_indirect_unrolling_sampler;
1160    nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
1161    nir_foreach_function(function, shader) {
1162       if (function->impl) {
1163          progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask,
1164                                               force_unroll_sampler_indirect);
1165       }
1166    }
1167    return progress;
1168 }
1169