1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
28
29
30 /* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32
31 * instructions. (Multiply counting nodes and magic number 5.) But there is
32 * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
33 * to give about the same results. Around 5 instructions per node. But some
34 * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
35 * we set it to 26.
36 */
37 #define LOOP_UNROLL_LIMIT 26
38
39 /* Prepare this loop for unrolling by first converting to lcssa and then
40 * converting the phis from the top level of the loop body to regs.
41 * Partially converting out of SSA allows us to unroll the loop without having
42 * to keep track of and update phis along the way which gets tricky and
43 * doesn't add much value over converting to regs.
44 *
45 * The loop may have a jump instruction at the end of the loop which does
46 * nothing. Once we're out of SSA, we can safely delete it so we don't have
47 * to deal with it later.
48 */
49 static void
loop_prepare_for_unroll(nir_loop *loop)50 loop_prepare_for_unroll(nir_loop *loop)
51 {
52 nir_rematerialize_derefs_in_use_blocks_impl(
53 nir_cf_node_get_function(&loop->cf_node));
54
55 nir_convert_loop_to_lcssa(loop);
56
57 /* Lower phis at the top level of the loop body */
58 foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
59 if (nir_cf_node_block == node->type) {
60 nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
61 }
62 }
63
64 /* Lower phis after the loop */
65 nir_block *block_after_loop =
66 nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
67
68 nir_lower_phis_to_regs_block(block_after_loop);
69
70 /* Remove jump if it's the last instruction in the loop */
71 nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
72 if (last_instr && last_instr->type == nir_instr_type_jump) {
73 nir_instr_remove(last_instr);
74 }
75 }
76
77 static void
get_first_blocks_in_terminator(nir_loop_terminator *term, nir_block **first_break_block, nir_block **first_continue_block)78 get_first_blocks_in_terminator(nir_loop_terminator *term,
79 nir_block **first_break_block,
80 nir_block **first_continue_block)
81 {
82 if (term->continue_from_then) {
83 *first_continue_block = nir_if_first_then_block(term->nif);
84 *first_break_block = nir_if_first_else_block(term->nif);
85 } else {
86 *first_continue_block = nir_if_first_else_block(term->nif);
87 *first_break_block = nir_if_first_then_block(term->nif);
88 }
89 }
90
91 /**
92 * Unroll a loop where we know exactly how many iterations there are and there
93 * is only a single exit point. Note here we can unroll loops with multiple
94 * theoretical exits that only have a single terminating exit that we always
95 * know is the "real" exit.
96 *
97 * loop {
98 * ...instrs...
99 * }
100 *
101 * And the iteration count is 3, the output will be:
102 *
103 * ...instrs... ...instrs... ...instrs...
104 */
105 static void
simple_unroll(nir_loop *loop)106 simple_unroll(nir_loop *loop)
107 {
108 nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
109 assert(nir_is_trivial_loop_if(limiting_term->nif,
110 limiting_term->break_block));
111
112 loop_prepare_for_unroll(loop);
113
114 /* Skip over loop terminator and get the loop body. */
115 list_for_each_entry(nir_loop_terminator, terminator,
116 &loop->info->loop_terminator_list,
117 loop_terminator_link) {
118
119 /* Remove all but the limiting terminator as we know the other exit
120 * conditions can never be met. Note we need to extract any instructions
121 * in the continue from branch and insert then into the loop body before
122 * removing it.
123 */
124 if (terminator->nif != limiting_term->nif) {
125 nir_block *first_break_block;
126 nir_block *first_continue_block;
127 get_first_blocks_in_terminator(terminator, &first_break_block,
128 &first_continue_block);
129
130 assert(nir_is_trivial_loop_if(terminator->nif,
131 terminator->break_block));
132
133 nir_cf_list continue_from_lst;
134 nir_cf_extract(&continue_from_lst,
135 nir_before_block(first_continue_block),
136 nir_after_block(terminator->continue_from_block));
137 nir_cf_reinsert(&continue_from_lst,
138 nir_after_cf_node(&terminator->nif->cf_node));
139
140 nir_cf_node_remove(&terminator->nif->cf_node);
141 }
142 }
143
144 nir_block *first_break_block;
145 nir_block *first_continue_block;
146 get_first_blocks_in_terminator(limiting_term, &first_break_block,
147 &first_continue_block);
148
149 /* Pluck out the loop header */
150 nir_block *header_blk = nir_loop_first_block(loop);
151 nir_cf_list lp_header;
152 nir_cf_extract(&lp_header, nir_before_block(header_blk),
153 nir_before_cf_node(&limiting_term->nif->cf_node));
154
155 /* Add the continue from block of the limiting terminator to the loop body
156 */
157 nir_cf_list continue_from_lst;
158 nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
159 nir_after_block(limiting_term->continue_from_block));
160 nir_cf_reinsert(&continue_from_lst,
161 nir_after_cf_node(&limiting_term->nif->cf_node));
162
163 /* Pluck out the loop body */
164 nir_cf_list loop_body;
165 nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
166 nir_after_block(nir_loop_last_block(loop)));
167
168 struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
169
170 /* Clone the loop header and insert before the loop */
171 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
172 nir_before_cf_node(&loop->cf_node),
173 remap_table);
174
175 for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
176 /* Clone loop body and insert before the loop */
177 nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
178 nir_before_cf_node(&loop->cf_node),
179 remap_table);
180
181 /* Clone loop header and insert after loop body */
182 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
183 nir_before_cf_node(&loop->cf_node),
184 remap_table);
185 }
186
187 /* Remove the break from the loop terminator and add instructions from
188 * the break block after the unrolled loop.
189 */
190 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
191 nir_instr_remove(break_instr);
192 nir_cf_list break_list;
193 nir_cf_extract(&break_list, nir_before_block(first_break_block),
194 nir_after_block(limiting_term->break_block));
195
196 /* Clone so things get properly remapped */
197 nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
198 nir_before_cf_node(&loop->cf_node),
199 remap_table);
200
201 /* Remove the loop */
202 nir_cf_node_remove(&loop->cf_node);
203
204 /* Delete the original loop body, break block & header */
205 nir_cf_delete(&lp_header);
206 nir_cf_delete(&loop_body);
207 nir_cf_delete(&break_list);
208
209 _mesa_hash_table_destroy(remap_table, NULL);
210 }
211
212 static void
move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)213 move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
214 {
215 /* Move the rest of the loop inside the continue-from-block */
216 nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
217
218 /* Remove the break */
219 nir_instr_remove(nir_block_last_instr(term->break_block));
220 }
221
222 static nir_cursor
get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)223 get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
224 {
225 if (node->type == nir_cf_node_loop) {
226 return nir_before_cf_node(node);
227 } else {
228 nir_if *if_stmt = nir_cf_node_as_if(node);
229 if (continue_from_then) {
230 return nir_after_block(nir_if_last_then_block(if_stmt));
231 } else {
232 return nir_after_block(nir_if_last_else_block(if_stmt));
233 }
234 }
235 }
236
237 static nir_cf_node *
complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term, nir_cf_list *lp_header, nir_cf_list *lp_body, struct hash_table *remap_table, unsigned num_times_to_clone)238 complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
239 nir_cf_list *lp_header, nir_cf_list *lp_body,
240 struct hash_table *remap_table,
241 unsigned num_times_to_clone)
242 {
243 /* In the terminator that we have no trip count for move everything after
244 * the terminator into the continue from branch.
245 */
246 nir_cf_list loop_end;
247 nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
248 nir_after_block(nir_loop_last_block(loop)));
249 move_cf_list_into_loop_term(&loop_end, unlimit_term);
250
251 /* Pluck out the loop body. */
252 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
253 nir_after_block(nir_loop_last_block(loop)));
254
255 /* Set unroll_loc to the loop as we will insert the unrolled loop before it
256 */
257 nir_cf_node *unroll_loc = &loop->cf_node;
258
259 /* Temp list to store the cloned loop as we unroll */
260 nir_cf_list unrolled_lp_body;
261
262 for (unsigned i = 0; i < num_times_to_clone; i++) {
263
264 nir_cursor cursor =
265 get_complex_unroll_insert_location(unroll_loc,
266 unlimit_term->continue_from_then);
267
268 /* Clone loop header and insert in if branch */
269 nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
270 cursor, remap_table);
271
272 cursor =
273 get_complex_unroll_insert_location(unroll_loc,
274 unlimit_term->continue_from_then);
275
276 /* Clone loop body */
277 nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
278 remap_table);
279
280 unroll_loc = exec_node_data(nir_cf_node,
281 exec_list_get_tail(&unrolled_lp_body.list),
282 node);
283 assert(unroll_loc->type == nir_cf_node_block &&
284 exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
285
286 /* Get the unrolled if node */
287 unroll_loc = nir_cf_node_prev(unroll_loc);
288
289 /* Insert unrolled loop body */
290 nir_cf_reinsert(&unrolled_lp_body, cursor);
291 }
292
293 return unroll_loc;
294 }
295
296 /**
297 * Unroll a loop with two exists when the trip count of one of the exits is
298 * unknown. If continue_from_then is true, the loop is repeated only when the
299 * "then" branch of the if is taken; otherwise it is repeated only
300 * when the "else" branch of the if is taken.
301 *
302 * For example, if the input is:
303 *
304 * loop {
305 * ...phis/condition...
306 * if condition {
307 * ...then instructions...
308 * } else {
309 * ...continue instructions...
310 * break
311 * }
312 * ...body...
313 * }
314 *
315 * And the iteration count is 3, and unlimit_term->continue_from_then is true,
316 * then the output will be:
317 *
318 * ...condition...
319 * if condition {
320 * ...then instructions...
321 * ...body...
322 * if condition {
323 * ...then instructions...
324 * ...body...
325 * if condition {
326 * ...then instructions...
327 * ...body...
328 * } else {
329 * ...continue instructions...
330 * }
331 * } else {
332 * ...continue instructions...
333 * }
334 * } else {
335 * ...continue instructions...
336 * }
337 */
338 static void
complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term, bool limiting_term_second)339 complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
340 bool limiting_term_second)
341 {
342 assert(nir_is_trivial_loop_if(unlimit_term->nif,
343 unlimit_term->break_block));
344
345 nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
346 assert(nir_is_trivial_loop_if(limiting_term->nif,
347 limiting_term->break_block));
348
349 loop_prepare_for_unroll(loop);
350
351 nir_block *header_blk = nir_loop_first_block(loop);
352
353 nir_cf_list lp_header;
354 nir_cf_list limit_break_list;
355 unsigned num_times_to_clone;
356 if (limiting_term_second) {
357 /* Pluck out the loop header */
358 nir_cf_extract(&lp_header, nir_before_block(header_blk),
359 nir_before_cf_node(&unlimit_term->nif->cf_node));
360
361 /* We need some special handling when its the second terminator causing
362 * us to exit the loop for example:
363 *
364 * for (int i = 0; i < uniform_lp_count; i++) {
365 * colour = vec4(0.0, 1.0, 0.0, 1.0);
366 *
367 * if (i == 1) {
368 * break;
369 * }
370 * ... any further code is unreachable after i == 1 ...
371 * }
372 */
373 nir_cf_list after_lt;
374 nir_if *limit_if = limiting_term->nif;
375 nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
376 nir_after_block(nir_loop_last_block(loop)));
377 move_cf_list_into_loop_term(&after_lt, limiting_term);
378
379 /* Because the trip count is the number of times we pass over the entire
380 * loop before hitting a break when the second terminator is the
381 * limiting terminator we can actually execute code inside the loop when
382 * trip count == 0 e.g. the code above the break. So we need to bump
383 * the trip_count in order for the code below to clone anything. When
384 * trip count == 1 we execute the code above the break twice and the
385 * code below it once so we need clone things twice and so on.
386 */
387 num_times_to_clone = loop->info->max_trip_count + 1;
388 } else {
389 /* Pluck out the loop header */
390 nir_cf_extract(&lp_header, nir_before_block(header_blk),
391 nir_before_cf_node(&limiting_term->nif->cf_node));
392
393 nir_block *first_break_block;
394 nir_block *first_continue_block;
395 get_first_blocks_in_terminator(limiting_term, &first_break_block,
396 &first_continue_block);
397
398 /* Remove the break then extract instructions from the break block so we
399 * can insert them in the innermost else of the unrolled loop.
400 */
401 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
402 nir_instr_remove(break_instr);
403 nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
404 nir_after_block(limiting_term->break_block));
405
406 nir_cf_list continue_list;
407 nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
408 nir_after_block(limiting_term->continue_from_block));
409
410 nir_cf_reinsert(&continue_list,
411 nir_after_cf_node(&limiting_term->nif->cf_node));
412
413 nir_cf_node_remove(&limiting_term->nif->cf_node);
414
415 num_times_to_clone = loop->info->max_trip_count;
416 }
417
418 struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
419
420 nir_cf_list lp_body;
421 nir_cf_node *unroll_loc =
422 complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
423 remap_table, num_times_to_clone);
424
425 if (!limiting_term_second) {
426 assert(unroll_loc->type == nir_cf_node_if);
427
428 nir_cursor cursor =
429 get_complex_unroll_insert_location(unroll_loc,
430 unlimit_term->continue_from_then);
431
432 /* Clone loop header and insert in if branch */
433 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
434 cursor, remap_table);
435
436 cursor =
437 get_complex_unroll_insert_location(unroll_loc,
438 unlimit_term->continue_from_then);
439
440 /* Clone so things get properly remapped, and insert break block from
441 * the limiting terminator.
442 */
443 nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
444 cursor, remap_table);
445
446 nir_cf_delete(&limit_break_list);
447 }
448
449 /* The loop has been unrolled so remove it. */
450 nir_cf_node_remove(&loop->cf_node);
451
452 /* Delete the original loop header and body */
453 nir_cf_delete(&lp_header);
454 nir_cf_delete(&lp_body);
455
456 _mesa_hash_table_destroy(remap_table, NULL);
457 }
458
459 /**
460 * Unroll loops where we only have a single terminator but the exact trip
461 * count is unknown. For example:
462 *
463 * for (int i = 0; i < imin(x, 4); i++)
464 * ...
465 */
466 static void
complex_unroll_single_terminator(nir_loop *loop)467 complex_unroll_single_terminator(nir_loop *loop)
468 {
469 assert(list_length(&loop->info->loop_terminator_list) == 1);
470 assert(loop->info->limiting_terminator);
471 assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
472 loop->info->limiting_terminator->break_block));
473
474 nir_loop_terminator *terminator = loop->info->limiting_terminator;
475
476 loop_prepare_for_unroll(loop);
477
478 /* Pluck out the loop header */
479 nir_cf_list lp_header;
480 nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
481 nir_before_cf_node(&terminator->nif->cf_node));
482
483 struct hash_table *remap_table =
484 _mesa_hash_table_create(NULL, _mesa_hash_pointer,
485 _mesa_key_pointer_equal);
486
487 /* We need to clone the loop one extra time in order to clone the lcssa
488 * vars for the last iteration (they are inside the following ifs break
489 * branch). We leave other passes to clean up this redundant if.
490 */
491 unsigned num_times_to_clone = loop->info->max_trip_count + 1;
492
493 nir_cf_list lp_body;
494 UNUSED nir_cf_node *unroll_loc =
495 complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
496 remap_table, num_times_to_clone);
497
498 assert(unroll_loc->type == nir_cf_node_if);
499
500 /* We need to clone the lcssa vars in order to insert them on both sides
501 * of the if in the last iteration/if-statement. Otherwise the optimisation
502 * passes will have trouble optimising the unrolled if ladder.
503 */
504 nir_cursor cursor =
505 get_complex_unroll_insert_location(unroll_loc,
506 terminator->continue_from_then);
507
508 nir_if *if_stmt = nir_cf_node_as_if(unroll_loc);
509 nir_cursor start_cursor;
510 nir_cursor end_cursor;
511 if (terminator->continue_from_then) {
512 start_cursor = nir_before_block(nir_if_first_else_block(if_stmt));
513 end_cursor = nir_after_block(nir_if_last_else_block(if_stmt));
514 } else {
515 start_cursor = nir_before_block(nir_if_first_then_block(if_stmt));
516 end_cursor = nir_after_block(nir_if_last_then_block(if_stmt));
517 }
518
519 nir_cf_list lcssa_list;
520 nir_cf_extract(&lcssa_list, start_cursor, end_cursor);
521
522 /* Insert the cloned vars in the last continue branch */
523 nir_cf_list_clone_and_reinsert(&lcssa_list, loop->cf_node.parent,
524 cursor, remap_table);
525
526 start_cursor = terminator->continue_from_then ?
527 nir_before_block(nir_if_first_else_block(if_stmt)) :
528 nir_before_block(nir_if_first_then_block(if_stmt));
529
530 /* Reinsert the cloned vars back where they came from */
531 nir_cf_reinsert(&lcssa_list, start_cursor);
532
533 /* Delete the original loop header and body */
534 nir_cf_delete(&lp_header);
535 nir_cf_delete(&lp_body);
536
537 /* The original loop has been replaced so remove it. */
538 nir_cf_node_remove(&loop->cf_node);
539
540 _mesa_hash_table_destroy(remap_table, NULL);
541 }
542
543 /* Unrolls the classic wrapper loops e.g
544 *
545 * do {
546 * // ...
547 * } while (false)
548 */
549 static bool
wrapper_unroll(nir_loop *loop)550 wrapper_unroll(nir_loop *loop)
551 {
552 if (!list_is_empty(&loop->info->loop_terminator_list)) {
553
554 /* Unrolling a loop with a large number of exits can result in a
555 * large inrease in register pressure. For now we just skip
556 * unrolling if we have more than 3 exits (not including the break
557 * at the end of the loop).
558 *
559 * TODO: Most loops that fit this pattern are simply switch
560 * statements that are converted to a loop to take advantage of
561 * exiting jump instruction handling. In this case we could make
562 * use of a binary seach pattern like we do in
563 * nir_lower_indirect_derefs(), this should allow us to unroll the
564 * loops in an optimal way and should also avoid some of the
565 * register pressure that comes from simply nesting the
566 * terminators one after the other.
567 */
568 if (list_length(&loop->info->loop_terminator_list) > 3)
569 return false;
570
571 loop_prepare_for_unroll(loop);
572
573 nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
574 list_for_each_entry(nir_loop_terminator, terminator,
575 &loop->info->loop_terminator_list,
576 loop_terminator_link) {
577
578 /* Remove break from the terminator */
579 nir_instr *break_instr =
580 nir_block_last_instr(terminator->break_block);
581 nir_instr_remove(break_instr);
582
583 /* Pluck out the loop body. */
584 nir_cf_list loop_body;
585 nir_cf_extract(&loop_body,
586 nir_after_cf_node(&terminator->nif->cf_node),
587 loop_end);
588
589 /* Reinsert loop body into continue from block */
590 nir_cf_reinsert(&loop_body,
591 nir_after_block(terminator->continue_from_block));
592
593 loop_end = terminator->continue_from_then ?
594 nir_after_block(nir_if_last_then_block(terminator->nif)) :
595 nir_after_block(nir_if_last_else_block(terminator->nif));
596 }
597 } else {
598 loop_prepare_for_unroll(loop);
599 }
600
601 /* Pluck out the loop body. */
602 nir_cf_list loop_body;
603 nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
604 nir_after_block(nir_loop_last_block(loop)));
605
606 /* Reinsert loop body after the loop */
607 nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
608
609 /* The loop has been unrolled so remove it. */
610 nir_cf_node_remove(&loop->cf_node);
611
612 return true;
613 }
614
615 static bool
is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref, unsigned trip_count)616 is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
617 unsigned trip_count)
618 {
619 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
620 if (d->deref_type != nir_deref_type_array)
621 continue;
622
623 nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
624 nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
625 if (!nir_srcs_equal(d->arr.index, src))
626 continue;
627
628 nir_deref_instr *parent = nir_deref_instr_parent(d);
629 assert(glsl_type_is_array(parent->type) ||
630 glsl_type_is_matrix(parent->type) ||
631 glsl_type_is_vector(parent->type));
632
633 /* We have already unrolled the loop and the new one will be imbedded in
634 * the innermost continue branch. So unless the array is greater than
635 * the trip count any iteration over the loop will be an out of bounds
636 * access of the array.
637 */
638 unsigned length = glsl_type_is_vector(parent->type) ?
639 glsl_get_vector_elements(parent->type) :
640 glsl_get_length(parent->type);
641 return length <= trip_count;
642 }
643
644 return false;
645 }
646
647 /* If we know an array access is going to be out of bounds remove or replace
648 * the access with an undef. This can later result in the entire loop being
649 * removed by nir_opt_dead_cf().
650 */
651 static void
remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop, nir_loop_terminator *term, nir_cf_list *lp_header, nir_cf_list *lp_body, unsigned trip_count)652 remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
653 nir_loop_terminator *term,
654 nir_cf_list *lp_header,
655 nir_cf_list *lp_body,
656 unsigned trip_count)
657 {
658 if (!loop->info->guessed_trip_count)
659 return;
660
661 /* Temporarily recreate the original loop so we can alter it */
662 nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
663 nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
664
665 nir_builder b;
666 nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node));
667
668 nir_foreach_block_in_cf_node(block, &loop->cf_node) {
669 nir_foreach_instr_safe(instr, block) {
670 if (instr->type != nir_instr_type_intrinsic)
671 continue;
672
673 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
674
675 /* Check for arrays variably-indexed by a loop induction variable.
676 * If this access is out of bounds remove the instruction or replace
677 * its use with an undefined instruction.
678 * If the loop is no longer useful we leave it for the appropriate
679 * pass to clean it up for us.
680 */
681 if (intrin->intrinsic == nir_intrinsic_load_deref ||
682 intrin->intrinsic == nir_intrinsic_store_deref ||
683 intrin->intrinsic == nir_intrinsic_copy_deref) {
684
685 if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
686 trip_count)) {
687 if (intrin->intrinsic == nir_intrinsic_load_deref) {
688 nir_ssa_def *undef =
689 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
690 intrin->dest.ssa.bit_size);
691 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
692 undef);
693 } else {
694 nir_instr_remove(instr);
695 continue;
696 }
697 }
698
699 if (intrin->intrinsic == nir_intrinsic_copy_deref &&
700 is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
701 trip_count)) {
702 nir_instr_remove(instr);
703 }
704 }
705 }
706 }
707
708 /* Now that we are done extract the loop header and body again */
709 nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
710 nir_before_cf_node(&term->nif->cf_node));
711 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
712 nir_after_block(nir_loop_last_block(loop)));
713 }
714
715 /* Partially unrolls loops that don't have a known trip count.
716 */
717 static void
partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)718 partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
719 {
720 assert(list_length(&loop->info->loop_terminator_list) == 1);
721
722 nir_loop_terminator *terminator =
723 list_first_entry(&loop->info->loop_terminator_list,
724 nir_loop_terminator, loop_terminator_link);
725
726 assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
727
728 loop_prepare_for_unroll(loop);
729
730 /* Pluck out the loop header */
731 nir_cf_list lp_header;
732 nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
733 nir_before_cf_node(&terminator->nif->cf_node));
734
735 struct hash_table *remap_table =
736 _mesa_hash_table_create(NULL, _mesa_hash_pointer,
737 _mesa_key_pointer_equal);
738
739 nir_cf_list lp_body;
740 nir_cf_node *unroll_loc =
741 complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
742 remap_table, trip_count);
743
744 /* Attempt to remove out of bounds array access */
745 remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
746 &lp_body, trip_count);
747
748 nir_cursor cursor =
749 get_complex_unroll_insert_location(unroll_loc,
750 terminator->continue_from_then);
751
752 /* Reinsert the loop in the innermost nested continue branch of the unrolled
753 * loop.
754 */
755 nir_loop *new_loop = nir_loop_create(shader);
756 nir_cf_node_insert(cursor, &new_loop->cf_node);
757 new_loop->partially_unrolled = true;
758
759 /* Clone loop header and insert into new loop */
760 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
761 nir_after_cf_list(&new_loop->body),
762 remap_table);
763
764 /* Clone loop body and insert into new loop */
765 nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
766 nir_after_cf_list(&new_loop->body),
767 remap_table);
768
769 /* Insert break back into terminator */
770 nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
771 nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop));
772 if (terminator->continue_from_then) {
773 nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr);
774 } else {
775 nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr);
776 }
777
778 /* Delete the original loop header and body */
779 nir_cf_delete(&lp_header);
780 nir_cf_delete(&lp_body);
781
782 /* The original loop has been replaced so remove it. */
783 nir_cf_node_remove(&loop->cf_node);
784
785 _mesa_hash_table_destroy(remap_table, NULL);
786 }
787
788 static bool
is_indirect_load(nir_instr *instr)789 is_indirect_load(nir_instr *instr)
790 {
791 if (instr->type == nir_instr_type_intrinsic) {
792 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
793
794 if ((intrin->intrinsic == nir_intrinsic_load_ubo ||
795 intrin->intrinsic == nir_intrinsic_load_ssbo) &&
796 !nir_src_is_const(intrin->src[1])) {
797 return true;
798 }
799
800 if (intrin->intrinsic == nir_intrinsic_load_global)
801 return true;
802
803 if (intrin->intrinsic == nir_intrinsic_load_deref ||
804 intrin->intrinsic == nir_intrinsic_store_deref) {
805 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
806 nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global;
807 if (!nir_deref_mode_may_be(deref, mem_modes))
808 return false;
809 while (deref) {
810 if ((deref->deref_type == nir_deref_type_array ||
811 deref->deref_type == nir_deref_type_ptr_as_array) &&
812 !nir_src_is_const(deref->arr.index)) {
813 return true;
814 }
815 deref = nir_deref_instr_parent(deref);
816 }
817 }
818 } else if (instr->type == nir_instr_type_tex) {
819 nir_tex_instr *tex = nir_instr_as_tex(instr);
820
821 for (unsigned i = 0; i < tex->num_srcs; i++) {
822 if (!nir_src_is_const(tex->src[i].src))
823 return true;
824 }
825 }
826
827 return false;
828 }
829
830 static bool
can_pipeline_loads(nir_loop *loop)831 can_pipeline_loads(nir_loop *loop)
832 {
833 if (!loop->info->exact_trip_count_known)
834 return false;
835
836 bool interesting_loads = false;
837
838 foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
839 if (cf_node == &loop->info->limiting_terminator->nif->cf_node)
840 continue;
841
842 /* Control flow usually prevents useful scheduling */
843 if (cf_node->type != nir_cf_node_block)
844 return false;
845
846 if (interesting_loads)
847 continue;
848
849 nir_block *block = nir_cf_node_as_block(cf_node);
850 nir_foreach_instr(instr, block) {
851 if (is_indirect_load(instr)) {
852 interesting_loads = true;
853 break;
854 }
855 }
856 }
857
858 return interesting_loads;
859 }
860
861 /*
862 * Returns true if we should unroll the loop, otherwise false.
863 */
864 static bool
check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)865 check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
866 {
867 if (loop->control == nir_loop_control_unroll)
868 return true;
869
870 if (loop->control == nir_loop_control_dont_unroll)
871 return false;
872
873 nir_loop_info *li = loop->info;
874 unsigned max_iter = shader->options->max_unroll_iterations;
875 /* Unroll much more aggressively if it can hide load latency. */
876 if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop))
877 max_iter = shader->options->max_unroll_iterations_aggressive;
878 unsigned trip_count =
879 li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
880
881 if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter)
882 return true;
883
884 unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT;
885 unsigned cost = li->instr_cost * trip_count;
886
887 if (cost <= cost_limit && trip_count <= max_iter)
888 return true;
889
890 return false;
891 }
892
893 static bool
894 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
895 bool *unrolled_this_block);
896
897 static bool
process_loops_in_block(nir_shader *sh, struct exec_list *block, bool *has_nested_loop_out)898 process_loops_in_block(nir_shader *sh, struct exec_list *block,
899 bool *has_nested_loop_out)
900 {
901 /* We try to unroll as many loops in one pass as possible.
902 * E.g. we can safely unroll both loops in this block:
903 *
904 * if (...) {
905 * loop {...}
906 * }
907 *
908 * if (...) {
909 * loop {...}
910 * }
911 *
912 * Unrolling one loop doesn't affect the other one.
913 *
914 * On the other hand for block with:
915 *
916 * loop {...}
917 * ...
918 * loop {...}
919 *
920 * It is unsafe to unroll both loops in one pass without taking
921 * complicating precautions, since the structure of the block would
922 * change after unrolling the first loop. So in such a case we leave
923 * the second loop for the next iteration of unrolling to handle.
924 */
925
926 bool progress = false;
927 bool unrolled_this_block = false;
928
929 foreach_list_typed(nir_cf_node, nested_node, node, block) {
930 if (process_loops(sh, nested_node,
931 has_nested_loop_out, &unrolled_this_block)) {
932 progress = true;
933
934 /* If current node is unrolled we could not safely continue
935 * our iteration since we don't know the next node
936 * and it's hard to guarantee that we won't end up unrolling
937 * inner loop of the currently unrolled one, if such exists.
938 */
939 if (unrolled_this_block) {
940 break;
941 }
942 }
943 }
944
945 return progress;
946 }
947
948 static bool
process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out, bool *unrolled_this_block)949 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
950 bool *unrolled_this_block)
951 {
952 bool progress = false;
953 bool has_nested_loop = false;
954 nir_loop *loop;
955
956 switch (cf_node->type) {
957 case nir_cf_node_block:
958 return progress;
959 case nir_cf_node_if: {
960 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
961 progress |= process_loops_in_block(sh, &if_stmt->then_list,
962 has_nested_loop_out);
963 progress |= process_loops_in_block(sh, &if_stmt->else_list,
964 has_nested_loop_out);
965 return progress;
966 }
967 case nir_cf_node_loop: {
968 loop = nir_cf_node_as_loop(cf_node);
969 progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop);
970
971 break;
972 }
973 default:
974 unreachable("unknown cf node type");
975 }
976
977 const bool unrolled_child_block = progress;
978
979 /* Don't attempt to unroll a second inner loop in this pass, wait until the
980 * next pass as we have altered the cf.
981 */
982 if (!progress && loop->control != nir_loop_control_dont_unroll) {
983
984 /* Remove the conditional break statements associated with all terminators
985 * that are associated with a fixed iteration count, except for the one
986 * associated with the limiting terminator--that one needs to stay, since
987 * it terminates the loop.
988 */
989 if (loop->info->limiting_terminator) {
990 list_for_each_entry_safe(nir_loop_terminator, t,
991 &loop->info->loop_terminator_list,
992 loop_terminator_link) {
993 if (t->exact_trip_count_unknown)
994 continue;
995
996 if (t != loop->info->limiting_terminator) {
997
998 /* Only delete the if-statement if the continue block is empty.
999 * We trust that nir_opt_if() does its job well enough to
1000 * remove all instructions from the continue block when possible.
1001 */
1002 nir_block *first_continue_from_blk = t->continue_from_then ?
1003 nir_if_first_then_block(t->nif) :
1004 nir_if_first_else_block(t->nif);
1005
1006 if (!(nir_cf_node_is_last(&first_continue_from_blk->cf_node) &&
1007 exec_list_is_empty(&first_continue_from_blk->instr_list)))
1008 continue;
1009
1010 /* Now delete the if */
1011 nir_cf_node_remove(&t->nif->cf_node);
1012
1013 /* Also remove it from the terminator list */
1014 list_del(&t->loop_terminator_link);
1015
1016 progress = true;
1017 }
1018 }
1019 }
1020
1021 /* Check for the classic
1022 *
1023 * do {
1024 * // ...
1025 * } while (false)
1026 *
1027 * that is used to wrap multi-line macros. GLSL IR also wraps switch
1028 * statements in a loop like this.
1029 */
1030 if (loop->info->limiting_terminator == NULL &&
1031 !loop->info->complex_loop) {
1032
1033 nir_block *last_loop_blk = nir_loop_last_block(loop);
1034 if (nir_block_ends_in_break(last_loop_blk)) {
1035 progress = wrapper_unroll(loop);
1036 goto exit;
1037 }
1038
1039 /* If we were able to guess the loop iteration based on array access
1040 * then do a partial unroll.
1041 */
1042 unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1043 if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
1044 loop->info->guessed_trip_count &&
1045 check_unrolling_restrictions(sh, loop)) {
1046 partial_unroll(sh, loop, loop->info->guessed_trip_count);
1047 progress = true;
1048 }
1049 }
1050
1051 /* Intentionally don't consider exact_trip_count_known here. When
1052 * max_trip_count is non-zero, it is the upper bound on the number of
1053 * times the loop will iterate, but the loop may iterate less. For
1054 * example, the following loop will iterate 0 or 1 time:
1055 *
1056 * for (i = 0; i < min(x, 1); i++) { ... }
1057 *
1058 * Trivial single-interation loops (e.g., do { ... } while (false)) and
1059 * trivial zero-iteration loops (e.g., while (false) { ... }) will have
1060 * already been handled.
1061 *
1062 * If the loop is known to execute at most once and meets the other
1063 * unrolling criteria, unroll it even if it has nested loops.
1064 *
1065 * It is unlikely that such loops exist in real shaders. GraphicsFuzz is
1066 * known to generate spurious loops that iterate exactly once. It is
1067 * plausible that it could eventually start generating loops like the
1068 * example above, so it seems logical to defend against it now.
1069 */
1070 if (!loop->info->limiting_terminator ||
1071 (loop->info->max_trip_count != 1 && has_nested_loop))
1072 goto exit;
1073
1074 if (!check_unrolling_restrictions(sh, loop))
1075 goto exit;
1076
1077 if (loop->info->exact_trip_count_known) {
1078 simple_unroll(loop);
1079 progress = true;
1080 } else {
1081 /* Attempt to unroll loops with two terminators. */
1082 unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1083 if (num_lt == 2 &&
1084 !loop->info->limiting_terminator->exact_trip_count_unknown) {
1085 bool limiting_term_second = true;
1086 nir_loop_terminator *terminator =
1087 list_first_entry(&loop->info->loop_terminator_list,
1088 nir_loop_terminator, loop_terminator_link);
1089
1090
1091 if (terminator->nif == loop->info->limiting_terminator->nif) {
1092 limiting_term_second = false;
1093 terminator =
1094 list_last_entry(&loop->info->loop_terminator_list,
1095 nir_loop_terminator, loop_terminator_link);
1096 }
1097
1098 /* If the first terminator has a trip count of zero and is the
1099 * limiting terminator just do a simple unroll as the second
1100 * terminator can never be reached.
1101 */
1102 if (loop->info->max_trip_count == 0 && !limiting_term_second) {
1103 simple_unroll(loop);
1104 } else {
1105 complex_unroll(loop, terminator, limiting_term_second);
1106 }
1107 progress = true;
1108 }
1109
1110 if (num_lt == 1) {
1111 assert(loop->info->limiting_terminator->exact_trip_count_unknown);
1112 complex_unroll_single_terminator(loop);
1113 progress = true;
1114 }
1115 }
1116 }
1117
1118 exit:
1119 *has_nested_loop_out = true;
1120 if (progress && !unrolled_child_block)
1121 *unrolled_this_block = true;
1122
1123 return progress;
1124 }
1125
1126 static bool
nir_opt_loop_unroll_impl(nir_function_impl *impl, nir_variable_mode indirect_mask, bool force_unroll_sampler_indirect)1127 nir_opt_loop_unroll_impl(nir_function_impl *impl,
1128 nir_variable_mode indirect_mask,
1129 bool force_unroll_sampler_indirect)
1130 {
1131 bool progress = false;
1132 nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask,
1133 (int) force_unroll_sampler_indirect);
1134 nir_metadata_require(impl, nir_metadata_block_index);
1135
1136 bool has_nested_loop = false;
1137 progress |= process_loops_in_block(impl->function->shader, &impl->body,
1138 &has_nested_loop);
1139
1140 if (progress) {
1141 nir_metadata_preserve(impl, nir_metadata_none);
1142 nir_lower_regs_to_ssa_impl(impl);
1143 } else {
1144 nir_metadata_preserve(impl, nir_metadata_all);
1145 }
1146
1147 return progress;
1148 }
1149
1150 /**
1151 * indirect_mask specifies which type of indirectly accessed variables
1152 * should force loop unrolling.
1153 */
1154 bool
nir_opt_loop_unroll(nir_shader *shader)1155 nir_opt_loop_unroll(nir_shader *shader)
1156 {
1157 bool progress = false;
1158
1159 bool force_unroll_sampler_indirect = shader->options->force_indirect_unrolling_sampler;
1160 nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
1161 nir_foreach_function(function, shader) {
1162 if (function->impl) {
1163 progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask,
1164 force_unroll_sampler_indirect);
1165 }
1166 }
1167 return progress;
1168 }
1169