1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir/nir_builder.h"
26 #include "nir_constant_expressions.h"
27 #include "nir_control_flow.h"
28 #include "nir_loop_analyze.h"
29
30 static nir_ssa_def *clone_alu_and_replace_src_defs(nir_builder *b,
31 const nir_alu_instr *alu,
32 nir_ssa_def **src_defs);
33
34 /**
35 * Gets the single block that jumps back to the loop header. Already assumes
36 * there is exactly one such block.
37 */
38 static nir_block*
find_continue_block(nir_loop *loop)39 find_continue_block(nir_loop *loop)
40 {
41 nir_block *header_block = nir_loop_first_block(loop);
42 nir_block *prev_block =
43 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
44
45 assert(header_block->predecessors->entries == 2);
46
47 set_foreach(header_block->predecessors, pred_entry) {
48 if (pred_entry->key != prev_block)
49 return (nir_block*)pred_entry->key;
50 }
51
52 unreachable("Continue block not found!");
53 }
54
55 /**
56 * Does a phi have one constant value from outside a loop and one from inside?
57 */
58 static bool
phi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr *phi, const nir_block *entry_block, bool *entry_val, bool *continue_val)59 phi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr *phi,
60 const nir_block *entry_block,
61 bool *entry_val,
62 bool *continue_val)
63 {
64 /* We already know we have exactly one continue */
65 assert(exec_list_length(&phi->srcs) == 2);
66
67 *entry_val = false;
68 *continue_val = false;
69
70 nir_foreach_phi_src(src, phi) {
71 if (!nir_src_is_const(src->src))
72 return false;
73
74 if (src->pred != entry_block) {
75 *continue_val = nir_src_as_bool(src->src);
76 } else {
77 *entry_val = nir_src_as_bool(src->src);
78 }
79 }
80
81 return true;
82 }
83
84 /**
85 * This optimization detects if statements at the tops of loops where the
86 * condition is a phi node of two constants and moves half of the if to above
87 * the loop and the other half of the if to the end of the loop. A simple for
88 * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
89 * ends up looking something like this:
90 *
91 * vec1 32 ssa_0 = load_const (0x00000000)
92 * vec1 32 ssa_1 = load_const (0xffffffff)
93 * loop {
94 * block block_1:
95 * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
96 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
97 * if ssa_3 {
98 * block block_2:
99 * vec1 32 ssa_4 = load_const (0x00000001)
100 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
101 * } else {
102 * block block_3:
103 * }
104 * block block_4:
105 * vec1 32 ssa_6 = load_const (0x00000004)
106 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
107 * if ssa_7 {
108 * block block_5:
109 * } else {
110 * block block_6:
111 * break
112 * }
113 * block block_7:
114 * }
115 *
116 * This turns it into something like this:
117 *
118 * // Stuff from block 1
119 * // Stuff from block 3
120 * loop {
121 * block block_1:
122 * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
123 * vec1 32 ssa_6 = load_const (0x00000004)
124 * vec1 32 ssa_7 = ilt ssa_2, ssa_6
125 * if ssa_7 {
126 * block block_5:
127 * } else {
128 * block block_6:
129 * break
130 * }
131 * block block_7:
132 * // Stuff from block 1
133 * // Stuff from block 2
134 * vec1 32 ssa_4 = load_const (0x00000001)
135 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
136 * }
137 */
138 static bool
opt_peel_loop_initial_if(nir_loop *loop)139 opt_peel_loop_initial_if(nir_loop *loop)
140 {
141 nir_block *header_block = nir_loop_first_block(loop);
142 nir_block *const prev_block =
143 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
144
145 /* It would be insane if this were not true */
146 assert(_mesa_set_search(header_block->predecessors, prev_block));
147
148 /* The loop must have exactly one continue block which could be a block
149 * ending in a continue instruction or the "natural" continue from the
150 * last block in the loop back to the top.
151 */
152 if (header_block->predecessors->entries != 2)
153 return false;
154
155 nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
156 if (!if_node || if_node->type != nir_cf_node_if)
157 return false;
158
159 nir_if *nif = nir_cf_node_as_if(if_node);
160 if (!nif->condition.is_ssa)
161 return false;
162
163 nir_ssa_def *cond = nif->condition.ssa;
164 if (cond->parent_instr->type != nir_instr_type_phi)
165 return false;
166
167 nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
168 if (cond->parent_instr->block != header_block)
169 return false;
170
171 bool entry_val = false, continue_val = false;
172 if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
173 prev_block,
174 &entry_val,
175 &continue_val))
176 return false;
177
178 /* If they both execute or both don't execute, this is a job for
179 * nir_dead_cf, not this pass.
180 */
181 if ((entry_val && continue_val) || (!entry_val && !continue_val))
182 return false;
183
184 struct exec_list *continue_list, *entry_list;
185 if (continue_val) {
186 continue_list = &nif->then_list;
187 entry_list = &nif->else_list;
188 } else {
189 continue_list = &nif->else_list;
190 entry_list = &nif->then_list;
191 }
192
193 /* We want to be moving the contents of entry_list to above the loop so it
194 * can't contain any break or continue instructions.
195 */
196 foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
197 nir_foreach_block_in_cf_node(block, cf_node) {
198 nir_instr *last_instr = nir_block_last_instr(block);
199 if (last_instr && last_instr->type == nir_instr_type_jump)
200 return false;
201 }
202 }
203
204 /* We're about to re-arrange a bunch of blocks so make sure that we don't
205 * have deref uses which cross block boundaries. We don't want a deref
206 * accidentally ending up in a phi.
207 */
208 nir_rematerialize_derefs_in_use_blocks_impl(
209 nir_cf_node_get_function(&loop->cf_node));
210
211 /* Before we do anything, convert the loop to LCSSA. We're about to
212 * replace a bunch of SSA defs with registers and this will prevent any of
213 * it from leaking outside the loop.
214 */
215 nir_convert_loop_to_lcssa(loop);
216
217 nir_block *after_if_block =
218 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
219
220 /* Get rid of phis in the header block since we will be duplicating it */
221 nir_lower_phis_to_regs_block(header_block);
222 /* Get rid of phis after the if since dominance will change */
223 nir_lower_phis_to_regs_block(after_if_block);
224
225 /* Get rid of SSA defs in the pieces we're about to move around */
226 nir_lower_ssa_defs_to_regs_block(header_block);
227 nir_foreach_block_in_cf_node(block, &nif->cf_node)
228 nir_lower_ssa_defs_to_regs_block(block);
229
230 nir_cf_list header, tmp;
231 nir_cf_extract(&header, nir_before_block(header_block),
232 nir_after_block(header_block));
233
234 nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
235 nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
236 nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
237 nir_after_cf_list(entry_list));
238 nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
239
240 nir_cf_reinsert(&header,
241 nir_after_block_before_jump(find_continue_block(loop)));
242
243 bool continue_list_jumps =
244 nir_block_ends_in_jump(exec_node_data(nir_block,
245 exec_list_get_tail(continue_list),
246 cf_node.node));
247
248 nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
249 nir_after_cf_list(continue_list));
250
251 /* Get continue block again as the previous reinsert might have removed the
252 * block. Also, if both the continue list and the continue block ends in
253 * jump instructions, removes the jump from the latter, as it will not be
254 * executed if we insert the continue list before it. */
255
256 nir_block *continue_block = find_continue_block(loop);
257
258 if (continue_list_jumps) {
259 nir_instr *last_instr = nir_block_last_instr(continue_block);
260 if (last_instr && last_instr->type == nir_instr_type_jump)
261 nir_instr_remove(last_instr);
262 }
263
264 nir_cf_reinsert(&tmp,
265 nir_after_block_before_jump(continue_block));
266
267 nir_cf_node_remove(&nif->cf_node);
268
269 return true;
270 }
271
272 static bool
alu_instr_is_comparison(const nir_alu_instr *alu)273 alu_instr_is_comparison(const nir_alu_instr *alu)
274 {
275 switch (alu->op) {
276 case nir_op_flt32:
277 case nir_op_fge32:
278 case nir_op_feq32:
279 case nir_op_fneu32:
280 case nir_op_ilt32:
281 case nir_op_ult32:
282 case nir_op_ige32:
283 case nir_op_uge32:
284 case nir_op_ieq32:
285 case nir_op_ine32:
286 return true;
287 default:
288 return nir_alu_instr_is_comparison(alu);
289 }
290 }
291
292 static bool
alu_instr_is_type_conversion(const nir_alu_instr *alu)293 alu_instr_is_type_conversion(const nir_alu_instr *alu)
294 {
295 return nir_op_infos[alu->op].num_inputs == 1 &&
296 nir_op_infos[alu->op].output_type != nir_op_infos[alu->op].input_types[0];
297 }
298
299 static bool
is_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src)300 is_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src)
301 {
302 if (instr->type != nir_instr_type_alu)
303 return false;
304
305 nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
306 if (!nir_op_is_selection(bcsel->op))
307 return false;
308
309 for (unsigned i = 0; i < 3; i++) {
310 if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
311 bcsel->src[i].src.ssa->parent_instr->block != instr->block)
312 return false;
313
314 if (bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi) {
315 /* opt_split_alu_of_phi() is able to peel that src from the loop */
316 if (i == 0 || !allow_non_phi_src)
317 return false;
318 allow_non_phi_src = false;
319 }
320 }
321
322 nir_foreach_phi_src(src, nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr)) {
323 if (!nir_src_is_const(src->src))
324 return false;
325 }
326
327 return true;
328 }
329
330 /**
331 * Splits ALU instructions that have a source that is a phi node
332 *
333 * ALU instructions in the header block of a loop that meet the following
334 * criteria can be split.
335 *
336 * - The loop has no continue instructions other than the "natural" continue
337 * at the bottom of the loop.
338 *
339 * - At least one source of the instruction is a phi node from the header block.
340 *
341 * - Any non-phi sources of the ALU instruction come from a block that
342 * dominates the block before the loop. The most common failure mode for
343 * this check is sources that are generated in the loop header block.
344 *
345 * - The phi node selects a constant or undef from the block before the loop or
346 * the only ALU user is a trivial bcsel that gets removed by peeling the ALU
347 *
348 * The split process splits the original ALU instruction into two, one at the
349 * bottom of the loop and one at the block before the loop. The instruction
350 * before the loop computes the value on the first iteration, and the
351 * instruction at the bottom computes the value on the second, third, and so
352 * on. A new phi node is added to the header block that selects either the
353 * instruction before the loop or the one at the end, and uses of the original
354 * instruction are replaced by this phi.
355 *
356 * The splitting transforms a loop like:
357 *
358 * vec1 32 ssa_8 = load_const (0x00000001)
359 * vec1 32 ssa_10 = load_const (0x00000000)
360 * // succs: block_1
361 * loop {
362 * block block_1:
363 * // preds: block_0 block_4
364 * vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15
365 * vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15
366 * vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16
367 * vec1 32 ssa_14 = iadd ssa_11, ssa_8
368 * vec1 32 ssa_15 = b32csel ssa_13, ssa_14, ssa_12
369 * ...
370 * // succs: block_1
371 * }
372 *
373 * into:
374 *
375 * vec1 32 ssa_8 = load_const (0x00000001)
376 * vec1 32 ssa_10 = load_const (0x00000000)
377 * vec1 32 ssa_22 = iadd ssa_10, ssa_8
378 * // succs: block_1
379 * loop {
380 * block block_1:
381 * // preds: block_0 block_4
382 * vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15
383 * vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15
384 * vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16
385 * vec1 32 ssa_21 = phi block_0: ssa_22, block_4: ssa_20
386 * vec1 32 ssa_15 = b32csel ssa_13, ssa_21, ssa_12
387 * ...
388 * vec1 32 ssa_20 = iadd ssa_15, ssa_8
389 * // succs: block_1
390 * }
391 */
392 static bool
opt_split_alu_of_phi(nir_builder *b, nir_loop *loop)393 opt_split_alu_of_phi(nir_builder *b, nir_loop *loop)
394 {
395 bool progress = false;
396 nir_block *header_block = nir_loop_first_block(loop);
397 nir_block *const prev_block =
398 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
399
400 /* It would be insane if this were not true */
401 assert(_mesa_set_search(header_block->predecessors, prev_block));
402
403 /* The loop must have exactly one continue block which could be a block
404 * ending in a continue instruction or the "natural" continue from the
405 * last block in the loop back to the top.
406 */
407 if (header_block->predecessors->entries != 2)
408 return false;
409
410 nir_block *continue_block = find_continue_block(loop);
411 if (continue_block == header_block)
412 return false;
413
414 nir_foreach_instr_safe(instr, header_block) {
415 if (instr->type != nir_instr_type_alu)
416 continue;
417
418 nir_alu_instr *const alu = nir_instr_as_alu(instr);
419
420 /* nir_op_vec{2,3,4} and nir_op_mov are excluded because they can easily
421 * lead to infinite optimization loops. Splitting comparisons can lead
422 * to loop unrolling not recognizing loop termintators, and type
423 * conversions also lead to regressions.
424 */
425 if (nir_op_is_vec(alu->op) ||
426 alu_instr_is_comparison(alu) ||
427 alu_instr_is_type_conversion(alu))
428 continue;
429
430 bool has_phi_src_from_prev_block = false;
431 bool all_non_phi_exist_in_prev_block = true;
432 bool is_prev_result_undef = true;
433 bool is_prev_result_const = true;
434 nir_ssa_def *prev_srcs[8]; // FINISHME: Array size?
435 nir_ssa_def *continue_srcs[8]; // FINISHME: Array size?
436
437 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
438 nir_instr *const src_instr = alu->src[i].src.ssa->parent_instr;
439
440 /* If the source is a phi in the loop header block, then the
441 * prev_srcs and continue_srcs will come from the different sources
442 * of the phi.
443 */
444 if (src_instr->type == nir_instr_type_phi &&
445 src_instr->block == header_block) {
446 nir_phi_instr *const phi = nir_instr_as_phi(src_instr);
447
448 /* Only strictly need to NULL out the pointers when the assertions
449 * (below) are compiled in. Debugging a NULL pointer deref in the
450 * wild is easier than debugging a random pointer deref, so set
451 * NULL unconditionally just to be safe.
452 */
453 prev_srcs[i] = NULL;
454 continue_srcs[i] = NULL;
455
456 nir_foreach_phi_src(src_of_phi, phi) {
457 if (src_of_phi->pred == prev_block) {
458 if (src_of_phi->src.ssa->parent_instr->type !=
459 nir_instr_type_ssa_undef) {
460 is_prev_result_undef = false;
461 }
462
463 if (src_of_phi->src.ssa->parent_instr->type !=
464 nir_instr_type_load_const) {
465 is_prev_result_const = false;
466 }
467
468 prev_srcs[i] = src_of_phi->src.ssa;
469 has_phi_src_from_prev_block = true;
470 } else
471 continue_srcs[i] = src_of_phi->src.ssa;
472 }
473
474 assert(prev_srcs[i] != NULL);
475 assert(continue_srcs[i] != NULL);
476 } else {
477 /* If the source is not a phi (or a phi in a block other than the
478 * loop header), then the value must exist in prev_block.
479 */
480 if (!nir_block_dominates(src_instr->block, prev_block)) {
481 all_non_phi_exist_in_prev_block = false;
482 break;
483 }
484
485 prev_srcs[i] = alu->src[i].src.ssa;
486 continue_srcs[i] = alu->src[i].src.ssa;
487 }
488 }
489
490 if (!has_phi_src_from_prev_block || !all_non_phi_exist_in_prev_block)
491 continue;
492
493 if (!is_prev_result_undef && !is_prev_result_const) {
494 /* check if the only user is a trivial bcsel */
495 if (!list_is_empty(&alu->dest.dest.ssa.if_uses) ||
496 !list_is_singular(&alu->dest.dest.ssa.uses))
497 continue;
498
499 nir_src *use = list_first_entry(&alu->dest.dest.ssa.uses, nir_src, use_link);
500 if (!is_trivial_bcsel(use->parent_instr, true))
501 continue;
502 }
503
504 /* Split ALU of Phi */
505 b->cursor = nir_after_block(prev_block);
506 nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
507
508 /* Make a copy of the original ALU instruction. Replace the sources
509 * of the new instruction that read a phi with an undef source from
510 * prev_block with the non-undef source of that phi.
511 *
512 * Insert the new instruction at the end of the continue block.
513 */
514 b->cursor = nir_after_block_before_jump(continue_block);
515
516 nir_ssa_def *const alu_copy =
517 clone_alu_and_replace_src_defs(b, alu, continue_srcs);
518
519 /* Make a new phi node that selects a value from prev_block and the
520 * result of the new instruction from continue_block.
521 */
522 nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
523 nir_phi_instr_add_src(phi, prev_block, nir_src_for_ssa(prev_value));
524 nir_phi_instr_add_src(phi, continue_block, nir_src_for_ssa(alu_copy));
525
526 nir_ssa_dest_init(&phi->instr, &phi->dest,
527 alu_copy->num_components, alu_copy->bit_size, NULL);
528
529 b->cursor = nir_after_phis(header_block);
530 nir_builder_instr_insert(b, &phi->instr);
531
532 /* Modify all readers of the original ALU instruction to read the
533 * result of the phi.
534 */
535 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
536 &phi->dest.ssa);
537
538 /* Since the original ALU instruction no longer has any readers, just
539 * remove it.
540 */
541 nir_instr_remove_v(&alu->instr);
542 nir_instr_free(&alu->instr);
543
544 progress = true;
545 }
546
547 return progress;
548 }
549
550 /**
551 * Simplify a bcsel whose sources are all phi nodes from the loop header block
552 *
553 * bcsel instructions in a loop that meet the following criteria can be
554 * converted to phi nodes:
555 *
556 * - The loop has no continue instructions other than the "natural" continue
557 * at the bottom of the loop.
558 *
559 * - All of the sources of the bcsel are phi nodes in the header block of the
560 * loop.
561 *
562 * - The phi node representing the condition of the bcsel instruction chooses
563 * only constant values.
564 *
565 * The contant value from the condition will select one of the other sources
566 * when entered from outside the loop and the remaining source when entered
567 * from the continue block. Since each of these sources is also a phi node in
568 * the header block, the value of the phi node can be "evaluated." These
569 * evaluated phi nodes provide the sources for a new phi node. All users of
570 * the bcsel result are updated to use the phi node result.
571 *
572 * The replacement transforms loops like:
573 *
574 * vec1 32 ssa_7 = undefined
575 * vec1 32 ssa_8 = load_const (0x00000001)
576 * vec1 32 ssa_9 = load_const (0x000000c8)
577 * vec1 32 ssa_10 = load_const (0x00000000)
578 * // succs: block_1
579 * loop {
580 * block block_1:
581 * // preds: block_0 block_4
582 * vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
583 * vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
584 * vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
585 * vec1 32 ssa_14 = b32csel ssa_12, ssa_13, ssa_11
586 * vec1 32 ssa_16 = ige32 ssa_14, ssa_9
587 * ...
588 * vec1 32 ssa_15 = load_const (0xffffffff)
589 * ...
590 * vec1 32 ssa_25 = iadd ssa_14, ssa_8
591 * // succs: block_1
592 * }
593 *
594 * into:
595 *
596 * vec1 32 ssa_7 = undefined
597 * vec1 32 ssa_8 = load_const (0x00000001)
598 * vec1 32 ssa_9 = load_const (0x000000c8)
599 * vec1 32 ssa_10 = load_const (0x00000000)
600 * // succs: block_1
601 * loop {
602 * block block_1:
603 * // preds: block_0 block_4
604 * vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
605 * vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
606 * vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
607 * vec1 32 sss_26 = phi block_0: ssa_1, block_4: ssa_25
608 * vec1 32 ssa_16 = ige32 ssa_26, ssa_9
609 * ...
610 * vec1 32 ssa_15 = load_const (0xffffffff)
611 * ...
612 * vec1 32 ssa_25 = iadd ssa_26, ssa_8
613 * // succs: block_1
614 * }
615 *
616 * \note
617 * It may be possible modify this function to not require a phi node as the
618 * source of the bcsel that is selected when entering from outside the loop.
619 * The only restriction is that the source must be geneated outside the loop
620 * (since it will become the source of a phi node in the header block of the
621 * loop).
622 */
623 static bool
opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)624 opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)
625 {
626 bool progress = false;
627 nir_block *header_block = nir_loop_first_block(loop);
628 nir_block *const prev_block =
629 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
630
631 /* It would be insane if this were not true */
632 assert(_mesa_set_search(header_block->predecessors, prev_block));
633
634 /* The loop must have exactly one continue block which could be a block
635 * ending in a continue instruction or the "natural" continue from the
636 * last block in the loop back to the top.
637 */
638 if (header_block->predecessors->entries != 2)
639 return false;
640
641 /* We can move any bcsel that can guaranteed to execut on every iteration
642 * of a loop. For now this is accomplished by only taking bcsels from the
643 * header_block. In the future, this could be expanced to include any
644 * bcsel that must come before any break.
645 *
646 * For more details, see
647 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305
648 */
649 nir_foreach_instr_safe(instr, header_block) {
650 if (!is_trivial_bcsel(instr, false))
651 continue;
652
653 nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
654 nir_phi_instr *const cond_phi =
655 nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr);
656
657 bool entry_val = false, continue_val = false;
658 if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
659 prev_block,
660 &entry_val,
661 &continue_val))
662 continue;
663
664 /* If they both execute or both don't execute, this is a job for
665 * nir_dead_cf, not this pass.
666 */
667 if ((entry_val && continue_val) || (!entry_val && !continue_val))
668 continue;
669
670 const unsigned entry_src = entry_val ? 1 : 2;
671 const unsigned continue_src = entry_val ? 2 : 1;
672
673 /* Create a new phi node that selects the value for prev_block from
674 * the bcsel source that is selected by entry_val and the value for
675 * continue_block from the other bcsel source. Both sources have
676 * already been verified to be phi nodes.
677 */
678 nir_block *continue_block = find_continue_block(loop);
679 nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
680 nir_phi_instr_add_src(phi, prev_block,
681 nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr),
682 prev_block)->src);
683
684 nir_phi_instr_add_src(phi, continue_block,
685 nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[continue_src].src.ssa->parent_instr),
686 continue_block)->src);
687
688 nir_ssa_dest_init(&phi->instr,
689 &phi->dest,
690 nir_dest_num_components(bcsel->dest.dest),
691 nir_dest_bit_size(bcsel->dest.dest),
692 NULL);
693
694 b->cursor = nir_after_phis(header_block);
695 nir_builder_instr_insert(b, &phi->instr);
696
697 /* Modify all readers of the bcsel instruction to read the result of
698 * the phi.
699 */
700 nir_ssa_def_rewrite_uses(&bcsel->dest.dest.ssa,
701 &phi->dest.ssa);
702
703 /* Since the original bcsel instruction no longer has any readers,
704 * just remove it.
705 */
706 nir_instr_remove_v(&bcsel->instr);
707 nir_instr_free(&bcsel->instr);
708
709 progress = true;
710 }
711
712 return progress;
713 }
714
715 static bool
is_block_empty(nir_block *block)716 is_block_empty(nir_block *block)
717 {
718 return nir_cf_node_is_last(&block->cf_node) &&
719 exec_list_is_empty(&block->instr_list);
720 }
721
722 static bool
nir_block_ends_in_continue(nir_block *block)723 nir_block_ends_in_continue(nir_block *block)
724 {
725 if (exec_list_is_empty(&block->instr_list))
726 return false;
727
728 nir_instr *instr = nir_block_last_instr(block);
729 return instr->type == nir_instr_type_jump &&
730 nir_instr_as_jump(instr)->type == nir_jump_continue;
731 }
732
733 /**
734 * This optimization turns:
735 *
736 * loop {
737 * ...
738 * if (cond) {
739 * do_work_1();
740 * continue;
741 * } else {
742 * }
743 * do_work_2();
744 * }
745 *
746 * into:
747 *
748 * loop {
749 * ...
750 * if (cond) {
751 * do_work_1();
752 * continue;
753 * } else {
754 * do_work_2();
755 * }
756 * }
757 *
758 * The continue should then be removed by nir_opt_trivial_continues() and the
759 * loop can potentially be unrolled.
760 *
761 * Note: Unless the function param aggressive_last_continue==true do_work_2()
762 * is only ever blocks and nested loops. We avoid nesting other if-statments
763 * in the branch as this can result in increased register pressure, and in
764 * the i965 driver it causes a large amount of spilling in shader-db.
765 * For RADV however nesting these if-statements allows further continues to be
766 * remove and provides a significant FPS boost in Doom, which is why we have
767 * opted for this special bool to enable more aggresive optimisations.
768 * TODO: The GCM pass solves most of the spilling regressions in i965, if it
769 * is ever enabled we should consider removing the aggressive_last_continue
770 * param.
771 */
772 static bool
opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue)773 opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue)
774 {
775 nir_if *nif = NULL;
776 bool then_ends_in_continue = false;
777 bool else_ends_in_continue = false;
778
779 /* Scan the control flow of the loop from the last to the first node
780 * looking for an if-statement we can optimise.
781 */
782 nir_block *last_block = nir_loop_last_block(loop);
783 nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node);
784 while (if_node) {
785 if (if_node->type == nir_cf_node_if) {
786 nif = nir_cf_node_as_if(if_node);
787 nir_block *then_block = nir_if_last_then_block(nif);
788 nir_block *else_block = nir_if_last_else_block(nif);
789
790 then_ends_in_continue = nir_block_ends_in_continue(then_block);
791 else_ends_in_continue = nir_block_ends_in_continue(else_block);
792
793 /* If both branches end in a jump do nothing, this should be handled
794 * by nir_opt_dead_cf().
795 */
796 if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
797 (else_ends_in_continue || nir_block_ends_in_break(else_block)))
798 return false;
799
800 /* If continue found stop scanning and attempt optimisation, or
801 */
802 if (then_ends_in_continue || else_ends_in_continue ||
803 !aggressive_last_continue)
804 break;
805 }
806
807 if_node = nir_cf_node_prev(if_node);
808 }
809
810 /* If we didn't find an if to optimise return */
811 if (!nif || (!then_ends_in_continue && !else_ends_in_continue))
812 return false;
813
814 /* If there is nothing after the if-statement we bail */
815 if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) &&
816 exec_list_is_empty(&last_block->instr_list))
817 return false;
818
819 /* If there are single-source phis in the last block,
820 * get rid of them first
821 */
822 nir_opt_remove_phis_block(last_block);
823
824 /* Move the last block of the loop inside the last if-statement */
825 nir_cf_list tmp;
826 nir_cf_extract(&tmp, nir_after_cf_node(if_node),
827 nir_after_block(last_block));
828 if (then_ends_in_continue)
829 nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->else_list));
830 else
831 nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->then_list));
832
833 /* In order to avoid running nir_lower_regs_to_ssa_impl() every time an if
834 * opt makes progress we leave nir_opt_trivial_continues() to remove the
835 * continue now that the end of the loop has been simplified.
836 */
837
838 return true;
839 }
840
841 /* Walk all the phis in the block immediately following the if statement and
842 * swap the blocks.
843 */
844 static void
rewrite_phi_predecessor_blocks(nir_if *nif, nir_block *old_then_block, nir_block *old_else_block, nir_block *new_then_block, nir_block *new_else_block)845 rewrite_phi_predecessor_blocks(nir_if *nif,
846 nir_block *old_then_block,
847 nir_block *old_else_block,
848 nir_block *new_then_block,
849 nir_block *new_else_block)
850 {
851 nir_block *after_if_block =
852 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
853
854 nir_foreach_instr(instr, after_if_block) {
855 if (instr->type != nir_instr_type_phi)
856 continue;
857
858 nir_phi_instr *phi = nir_instr_as_phi(instr);
859
860 nir_foreach_phi_src(src, phi) {
861 if (src->pred == old_then_block) {
862 src->pred = new_then_block;
863 } else if (src->pred == old_else_block) {
864 src->pred = new_else_block;
865 }
866 }
867 }
868 }
869
870 /**
871 * This optimization turns:
872 *
873 * if (cond) {
874 * } else {
875 * do_work();
876 * }
877 *
878 * into:
879 *
880 * if (!cond) {
881 * do_work();
882 * } else {
883 * }
884 */
885 static bool
opt_if_simplification(nir_builder *b, nir_if *nif)886 opt_if_simplification(nir_builder *b, nir_if *nif)
887 {
888 /* Only simplify if the then block is empty and the else block is not. */
889 if (!is_block_empty(nir_if_first_then_block(nif)) ||
890 is_block_empty(nir_if_first_else_block(nif)))
891 return false;
892
893 /* Make sure the condition is a comparison operation. */
894 nir_instr *src_instr = nif->condition.ssa->parent_instr;
895 if (src_instr->type != nir_instr_type_alu)
896 return false;
897
898 nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
899 if (!nir_alu_instr_is_comparison(alu_instr))
900 return false;
901
902 /* Insert the inverted instruction and rewrite the condition. */
903 b->cursor = nir_after_instr(&alu_instr->instr);
904
905 nir_ssa_def *new_condition =
906 nir_inot(b, &alu_instr->dest.dest.ssa);
907
908 nir_if_rewrite_condition(nif, nir_src_for_ssa(new_condition));
909
910 /* Grab pointers to the last then/else blocks for fixing up the phis. */
911 nir_block *then_block = nir_if_last_then_block(nif);
912 nir_block *else_block = nir_if_last_else_block(nif);
913
914 if (nir_block_ends_in_jump(else_block)) {
915 /* Even though this if statement has a jump on one side, we may still have
916 * phis afterwards. Single-source phis can be produced by loop unrolling
917 * or dead control-flow passes and are perfectly legal. Run a quick phi
918 * removal on the block after the if to clean up any such phis.
919 */
920 nir_block *const next_block =
921 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
922 nir_opt_remove_phis_block(next_block);
923 }
924
925 rewrite_phi_predecessor_blocks(nif, then_block, else_block, else_block,
926 then_block);
927
928 /* Finally, move the else block to the then block. */
929 nir_cf_list tmp;
930 nir_cf_extract(&tmp, nir_before_cf_list(&nif->else_list),
931 nir_after_cf_list(&nif->else_list));
932 nir_cf_reinsert(&tmp, nir_before_cf_list(&nif->then_list));
933
934 return true;
935 }
936
937 /* Find phi statements after an if that choose between true and false, and
938 * replace them with the if statement's condition (or an inot of it).
939 */
940 static bool
opt_if_phi_is_condition(nir_builder *b, nir_if *nif)941 opt_if_phi_is_condition(nir_builder *b, nir_if *nif)
942 {
943 /* Grab pointers to the last then/else blocks for looking in the phis. */
944 nir_block *then_block = nir_if_last_then_block(nif);
945 nir_block *else_block = nir_if_last_else_block(nif);
946 nir_ssa_def *cond = nif->condition.ssa;
947 bool progress = false;
948
949 nir_block *after_if_block = nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
950 nir_foreach_instr_safe(instr, after_if_block) {
951 if (instr->type != nir_instr_type_phi)
952 break;
953
954 nir_phi_instr *phi = nir_instr_as_phi(instr);
955 if (phi->dest.ssa.bit_size != cond->bit_size ||
956 phi->dest.ssa.num_components != 1)
957 continue;
958
959 enum opt_bool {
960 T, F, UNKNOWN
961 } then_val = UNKNOWN, else_val = UNKNOWN;
962
963 nir_foreach_phi_src(src, phi) {
964 assert(src->pred == then_block || src->pred == else_block);
965 enum opt_bool *pred_val = src->pred == then_block ? &then_val : &else_val;
966
967 nir_ssa_scalar val = nir_ssa_scalar_resolved(src->src.ssa, 0);
968 if (!nir_ssa_scalar_is_const(val))
969 break;
970
971 if (nir_ssa_scalar_as_int(val) == -1)
972 *pred_val = T;
973 else if (nir_ssa_scalar_as_uint(val) == 0)
974 *pred_val = F;
975 else
976 break;
977 }
978 if (then_val == T && else_val == F) {
979 nir_ssa_def_rewrite_uses(&phi->dest.ssa, cond);
980 progress = true;
981 } else if (then_val == F && else_val == T) {
982 b->cursor = nir_before_cf_node(&nif->cf_node);
983 nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_inot(b, cond));
984 progress = true;
985 }
986 }
987
988 return progress;
989 }
990
991 /**
992 * This optimization tries to merge two break statements into a single break.
993 * For this purpose, it checks if both branch legs end in a break or
994 * if one branch leg ends in a break, and the other one does so after the
995 * branch.
996 *
997 * This optimization turns
998 *
999 * loop {
1000 * ...
1001 * if (cond) {
1002 * do_work_1();
1003 * break;
1004 * } else {
1005 * do_work_2();
1006 * break;
1007 * }
1008 * }
1009 *
1010 * into:
1011 *
1012 * loop {
1013 * ...
1014 * if (cond) {
1015 * do_work_1();
1016 * } else {
1017 * do_work_2();
1018 * }
1019 * break;
1020 * }
1021 *
1022 * but also situations like
1023 *
1024 * loop {
1025 * ...
1026 * if (cond1) {
1027 * if (cond2) {
1028 * do_work_1();
1029 * break;
1030 * } else {
1031 * do_work_2();
1032 * }
1033 * do_work_3();
1034 * break;
1035 * } else {
1036 * ...
1037 * }
1038 * }
1039 *
1040 * into:
1041 *
1042 * loop {
1043 * ...
1044 * if (cond1) {
1045 * if (cond2) {
1046 * do_work_1();
1047 * } else {
1048 * do_work_2();
1049 * do_work_3();
1050 * }
1051 * break;
1052 * } else {
1053 * ...
1054 * }
1055 * }
1056 */
1057 static bool
opt_merge_breaks(nir_if *nif)1058 opt_merge_breaks(nir_if *nif)
1059 {
1060 nir_block *last_then = nir_if_last_then_block(nif);
1061 nir_block *last_else = nir_if_last_else_block(nif);
1062 bool then_break = nir_block_ends_in_break(last_then);
1063 bool else_break = nir_block_ends_in_break(last_else);
1064
1065 /* If both branch legs end in a break, merge the break after the branch */
1066 if (then_break && else_break) {
1067 nir_block *after_if = nir_cf_node_cf_tree_next(&nif->cf_node);
1068 /* Make sure that the successor is empty.
1069 * If not we let nir_opt_dead_cf() clean it up first.
1070 */
1071 if (!is_block_empty(after_if))
1072 return false;
1073
1074 nir_lower_phis_to_regs_block(last_then->successors[0]);
1075 nir_instr_remove_v(nir_block_last_instr(last_then));
1076 nir_instr *jump = nir_block_last_instr(last_else);
1077 nir_instr_remove_v(jump);
1078 nir_instr_insert(nir_after_block(after_if), jump);
1079 return true;
1080 }
1081
1082 /* Single break: If there's a break after the branch and the non-breaking
1083 * side of the if falls through to it, then hoist that code after up into
1084 * the if and leave just a single break there.
1085 */
1086 if (then_break || else_break) {
1087
1088 /* At least one branch leg must fall-through */
1089 if (nir_block_ends_in_jump(last_then) && nir_block_ends_in_jump(last_else))
1090 return false;
1091
1092 /* Check if there is a single break after the IF */
1093 nir_cf_node *first = nir_cf_node_next(&nif->cf_node);
1094 nir_cf_node *last = first;
1095 while (!nir_cf_node_is_last(last)) {
1096 if (contains_other_jump (last, NULL))
1097 return false;
1098 last = nir_cf_node_next(last);
1099 }
1100
1101 assert(last->type == nir_cf_node_block);
1102 if (!nir_block_ends_in_break(nir_cf_node_as_block(last)))
1103 return false;
1104
1105 /* Hoist the code from after the IF into the falling-through branch leg */
1106 nir_opt_remove_phis_block(nir_cf_node_as_block(first));
1107 nir_block *break_block = then_break ? last_then : last_else;
1108 nir_lower_phis_to_regs_block(break_block->successors[0]);
1109
1110 nir_cf_list tmp;
1111 nir_cf_extract(&tmp, nir_before_cf_node(first),
1112 nir_after_block_before_jump(nir_cf_node_as_block(last)));
1113 if (then_break)
1114 nir_cf_reinsert(&tmp, nir_after_block(last_else));
1115 else
1116 nir_cf_reinsert(&tmp, nir_after_block(last_then));
1117
1118 nir_instr_remove_v(nir_block_last_instr(break_block));
1119 return true;
1120 }
1121
1122 return false;
1123 }
1124
1125 /**
1126 * This optimization simplifies potential loop terminators which then allows
1127 * other passes such as opt_if_simplification() and loop unrolling to progress
1128 * further:
1129 *
1130 * if (cond) {
1131 * ... then block instructions ...
1132 * } else {
1133 * ...
1134 * break;
1135 * }
1136 *
1137 * into:
1138 *
1139 * if (cond) {
1140 * } else {
1141 * ...
1142 * break;
1143 * }
1144 * ... then block instructions ...
1145 */
1146 static bool
opt_if_loop_terminator(nir_if *nif)1147 opt_if_loop_terminator(nir_if *nif)
1148 {
1149 nir_block *break_blk = NULL;
1150 nir_block *continue_from_blk = NULL;
1151 bool continue_from_then = true;
1152
1153 nir_block *last_then = nir_if_last_then_block(nif);
1154 nir_block *last_else = nir_if_last_else_block(nif);
1155
1156 if (nir_block_ends_in_break(last_then)) {
1157 break_blk = last_then;
1158 continue_from_blk = last_else;
1159 continue_from_then = false;
1160 } else if (nir_block_ends_in_break(last_else)) {
1161 break_blk = last_else;
1162 continue_from_blk = last_then;
1163 }
1164
1165 /* Continue if the if-statement contained no jumps at all */
1166 if (!break_blk)
1167 return false;
1168
1169 /* If the continue from block is empty then return as there is nothing to
1170 * move.
1171 */
1172 nir_block *first_continue_from_blk = continue_from_then ?
1173 nir_if_first_then_block(nif) :
1174 nir_if_first_else_block(nif);
1175 if (is_block_empty(first_continue_from_blk))
1176 return false;
1177
1178 if (nir_block_ends_in_jump(continue_from_blk))
1179 return false;
1180
1181 /* Even though this if statement has a jump on one side, we may still have
1182 * phis afterwards. Single-source phis can be produced by loop unrolling
1183 * or dead control-flow passes and are perfectly legal. Run a quick phi
1184 * removal on the block after the if to clean up any such phis.
1185 */
1186 nir_opt_remove_phis_block(nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)));
1187
1188 /* Finally, move the continue from branch after the if-statement. */
1189 nir_cf_list tmp;
1190 nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk),
1191 nir_after_block(continue_from_blk));
1192 nir_cf_reinsert(&tmp, nir_after_cf_node(&nif->cf_node));
1193
1194 return true;
1195 }
1196
1197 static bool
evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value)1198 evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value)
1199 {
1200 nir_block *use_block = nir_cursor_current_block(cursor);
1201 if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
1202 *value = true;
1203 return true;
1204 } else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) {
1205 *value = false;
1206 return true;
1207 } else {
1208 return false;
1209 }
1210 }
1211
1212 static nir_ssa_def *
clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, nir_ssa_def **src_defs)1213 clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
1214 nir_ssa_def **src_defs)
1215 {
1216 nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op);
1217 nalu->exact = alu->exact;
1218
1219 nir_ssa_dest_init(&nalu->instr, &nalu->dest.dest,
1220 alu->dest.dest.ssa.num_components,
1221 alu->dest.dest.ssa.bit_size, NULL);
1222
1223 nalu->dest.saturate = alu->dest.saturate;
1224 nalu->dest.write_mask = alu->dest.write_mask;
1225
1226 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1227 assert(alu->src[i].src.is_ssa);
1228 nalu->src[i].src = nir_src_for_ssa(src_defs[i]);
1229 nalu->src[i].negate = alu->src[i].negate;
1230 nalu->src[i].abs = alu->src[i].abs;
1231 memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
1232 sizeof(nalu->src[i].swizzle));
1233 }
1234
1235 nir_builder_instr_insert(b, &nalu->instr);
1236
1237 return &nalu->dest.dest.ssa;;
1238 }
1239
1240 /*
1241 * This propagates if condition evaluation down the chain of some alu
1242 * instructions. For example by checking the use of some of the following alu
1243 * instruction we can eventually replace ssa_107 with NIR_TRUE.
1244 *
1245 * loop {
1246 * block block_1:
1247 * vec1 32 ssa_85 = load_const (0x00000002)
1248 * vec1 32 ssa_86 = ieq ssa_48, ssa_85
1249 * vec1 32 ssa_87 = load_const (0x00000001)
1250 * vec1 32 ssa_88 = ieq ssa_48, ssa_87
1251 * vec1 32 ssa_89 = ior ssa_86, ssa_88
1252 * vec1 32 ssa_90 = ieq ssa_48, ssa_0
1253 * vec1 32 ssa_91 = ior ssa_89, ssa_90
1254 * if ssa_86 {
1255 * block block_2:
1256 * ...
1257 * break
1258 * } else {
1259 * block block_3:
1260 * }
1261 * block block_4:
1262 * if ssa_88 {
1263 * block block_5:
1264 * ...
1265 * break
1266 * } else {
1267 * block block_6:
1268 * }
1269 * block block_7:
1270 * if ssa_90 {
1271 * block block_8:
1272 * ...
1273 * break
1274 * } else {
1275 * block block_9:
1276 * }
1277 * block block_10:
1278 * vec1 32 ssa_107 = inot ssa_91
1279 * if ssa_107 {
1280 * block block_11:
1281 * break
1282 * } else {
1283 * block block_12:
1284 * }
1285 * }
1286 */
1287 static bool
propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, nir_src *alu_use, nir_alu_instr *alu, bool is_if_condition)1288 propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
1289 nir_src *alu_use, nir_alu_instr *alu,
1290 bool is_if_condition)
1291 {
1292 bool bool_value;
1293 b->cursor = nir_before_src(alu_use, is_if_condition);
1294 if (!evaluate_if_condition(nif, b->cursor, &bool_value))
1295 return false;
1296
1297 nir_ssa_def *def[NIR_MAX_VEC_COMPONENTS] = {0};
1298 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1299 if (alu->src[i].src.ssa == use_src->ssa) {
1300 def[i] = nir_imm_bool(b, bool_value);
1301 } else {
1302 def[i] = alu->src[i].src.ssa;
1303 }
1304 }
1305
1306 nir_ssa_def *nalu = clone_alu_and_replace_src_defs(b, alu, def);
1307
1308 /* Rewrite use to use new alu instruction */
1309 nir_src new_src = nir_src_for_ssa(nalu);
1310
1311 if (is_if_condition)
1312 nir_if_rewrite_condition(alu_use->parent_if, new_src);
1313 else
1314 nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src);
1315
1316 return true;
1317 }
1318
1319 static bool
can_propagate_through_alu(nir_src *src)1320 can_propagate_through_alu(nir_src *src)
1321 {
1322 if (src->parent_instr->type != nir_instr_type_alu)
1323 return false;
1324
1325 nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
1326 switch (alu->op) {
1327 case nir_op_ior:
1328 case nir_op_iand:
1329 case nir_op_inot:
1330 case nir_op_b2i32:
1331 return true;
1332 case nir_op_bcsel:
1333 return src == &alu->src[0].src;
1334 default:
1335 return false;
1336 }
1337 }
1338
1339 static bool
evaluate_condition_use(nir_builder *b, nir_if *nif, nir_src *use_src, bool is_if_condition)1340 evaluate_condition_use(nir_builder *b, nir_if *nif, nir_src *use_src,
1341 bool is_if_condition)
1342 {
1343 bool progress = false;
1344
1345 b->cursor = nir_before_src(use_src, is_if_condition);
1346
1347 bool bool_value;
1348 if (evaluate_if_condition(nif, b->cursor, &bool_value)) {
1349 /* Rewrite use to use const */
1350 nir_src imm_src = nir_src_for_ssa(nir_imm_bool(b, bool_value));
1351 if (is_if_condition)
1352 nir_if_rewrite_condition(use_src->parent_if, imm_src);
1353 else
1354 nir_instr_rewrite_src(use_src->parent_instr, use_src, imm_src);
1355
1356 progress = true;
1357 }
1358
1359 if (!is_if_condition && can_propagate_through_alu(use_src)) {
1360 nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr);
1361
1362 nir_foreach_use_safe(alu_use, &alu->dest.dest.ssa) {
1363 progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu,
1364 false);
1365 }
1366
1367 nir_foreach_if_use_safe(alu_use, &alu->dest.dest.ssa) {
1368 progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu,
1369 true);
1370 }
1371 }
1372
1373 return progress;
1374 }
1375
1376 static bool
opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif)1377 opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif)
1378 {
1379 bool progress = false;
1380
1381 /* Evaluate any uses of the if condition inside the if branches */
1382 assert(nif->condition.is_ssa);
1383 nir_foreach_use_safe(use_src, nif->condition.ssa) {
1384 progress |= evaluate_condition_use(b, nif, use_src, false);
1385 }
1386
1387 nir_foreach_if_use_safe(use_src, nif->condition.ssa) {
1388 if (use_src->parent_if != nif)
1389 progress |= evaluate_condition_use(b, nif, use_src, true);
1390 }
1391
1392 return progress;
1393 }
1394
1395 static bool
rewrite_comp_uses_within_if(nir_builder *b, nir_if *nif, bool invert, nir_ssa_scalar scalar, nir_ssa_scalar new_scalar)1396 rewrite_comp_uses_within_if(nir_builder *b, nir_if *nif, bool invert,
1397 nir_ssa_scalar scalar, nir_ssa_scalar new_scalar)
1398 {
1399 bool progress = false;
1400
1401 nir_block *first = invert ? nir_if_first_else_block(nif) : nir_if_first_then_block(nif);
1402 nir_block *last = invert ? nir_if_last_else_block(nif) : nir_if_last_then_block(nif);
1403
1404 nir_ssa_def *new_ssa = NULL;
1405 nir_foreach_use_safe(use, scalar.def) {
1406 if (use->parent_instr->block->index < first->index ||
1407 use->parent_instr->block->index > last->index)
1408 continue;
1409
1410 /* Only rewrite users which use only the new component. This is to avoid a
1411 * situation where copy propagation will undo the rewrite and we risk an infinite
1412 * loop.
1413 *
1414 * We could rewrite users which use a mix of the old and new components, but if
1415 * nir_src_components_read() is incomplete, then we risk the new component actually being
1416 * unused and some optimization later undoing the rewrite.
1417 */
1418 if (nir_src_components_read(use) != BITFIELD64_BIT(scalar.comp))
1419 continue;
1420
1421 if (!new_ssa) {
1422 b->cursor = nir_before_cf_node(&nif->cf_node);
1423 new_ssa = nir_channel(b, new_scalar.def, new_scalar.comp);
1424 if (scalar.def->num_components > 1) {
1425 nir_ssa_def *vec = nir_ssa_undef(b, scalar.def->num_components, scalar.def->bit_size);
1426 new_ssa = nir_vector_insert_imm(b, vec, new_ssa, scalar.comp);
1427 }
1428 }
1429
1430 nir_instr_rewrite_src_ssa(use->parent_instr, use, new_ssa);
1431 progress = true;
1432 }
1433
1434 return progress;
1435 }
1436
1437 /*
1438 * This optimization turns:
1439 *
1440 * if (a == (b=readfirstlane(a)))
1441 * use(a)
1442 * if (c == (d=load_const))
1443 * use(c)
1444 *
1445 * into:
1446 *
1447 * if (a == (b=readfirstlane(a)))
1448 * use(b)
1449 * if (c == (d=load_const))
1450 * use(d)
1451 */
1452 static bool
opt_if_rewrite_uniform_uses(nir_builder *b, nir_if *nif, nir_ssa_scalar cond, bool accept_ine)1453 opt_if_rewrite_uniform_uses(nir_builder *b, nir_if *nif, nir_ssa_scalar cond, bool accept_ine)
1454 {
1455 bool progress = false;
1456
1457 if (!nir_ssa_scalar_is_alu(cond))
1458 return false;
1459
1460 nir_op op = nir_ssa_scalar_alu_op(cond);
1461 if (op == nir_op_iand) {
1462 progress |= opt_if_rewrite_uniform_uses(b, nif, nir_ssa_scalar_chase_alu_src(cond, 0), false);
1463 progress |= opt_if_rewrite_uniform_uses(b, nif, nir_ssa_scalar_chase_alu_src(cond, 1), false);
1464 return progress;
1465 }
1466
1467 if (op != nir_op_ieq && (op != nir_op_ine || !accept_ine))
1468 return false;
1469
1470 for (unsigned i = 0; i < 2; i++) {
1471 nir_ssa_scalar src_uni = nir_ssa_scalar_chase_alu_src(cond, i);
1472 nir_ssa_scalar src_div = nir_ssa_scalar_chase_alu_src(cond, !i);
1473
1474 if (src_uni.def->parent_instr->type == nir_instr_type_load_const && src_div.def != src_uni.def)
1475 return rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, src_div, src_uni);
1476
1477 if (src_uni.def->parent_instr->type != nir_instr_type_intrinsic)
1478 continue;
1479 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src_uni.def->parent_instr);
1480 if (intrin->intrinsic != nir_intrinsic_read_first_invocation &&
1481 (intrin->intrinsic != nir_intrinsic_reduce || nir_intrinsic_cluster_size(intrin)))
1482 continue;
1483
1484 nir_ssa_scalar intrin_src = {intrin->src[0].ssa, src_uni.comp};
1485 nir_ssa_scalar resolved_intrin_src = nir_ssa_scalar_resolved(intrin_src.def, intrin_src.comp);
1486
1487 if (resolved_intrin_src.comp != src_div.comp || resolved_intrin_src.def != src_div.def)
1488 continue;
1489
1490 progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, resolved_intrin_src, src_uni);
1491 if (intrin_src.comp != resolved_intrin_src.comp || intrin_src.def != resolved_intrin_src.def)
1492 progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, intrin_src, src_uni);
1493
1494 return progress;
1495 }
1496
1497 return false;
1498 }
1499
1500 static void
simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then, bool src_if_then)1501 simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then,
1502 bool src_if_then)
1503 {
1504 /* Now merge the if branch */
1505 nir_block *dest_blk = dest_if_then ? nir_if_last_then_block(dest_if)
1506 : nir_if_last_else_block(dest_if);
1507
1508 struct exec_list *list = src_if_then ? &src_if->then_list
1509 : &src_if->else_list;
1510
1511 nir_cf_list if_cf_list;
1512 nir_cf_extract(&if_cf_list, nir_before_cf_list(list),
1513 nir_after_cf_list(list));
1514 nir_cf_reinsert(&if_cf_list, nir_after_block(dest_blk));
1515 }
1516
1517 static bool
opt_if_merge(nir_if *nif)1518 opt_if_merge(nir_if *nif)
1519 {
1520 bool progress = false;
1521
1522 nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node);
1523 if (!next_blk || !nif->condition.is_ssa)
1524 return false;
1525
1526 nir_if *next_if = nir_block_get_following_if(next_blk);
1527 if (!next_if || !next_if->condition.is_ssa)
1528 return false;
1529
1530 /* Here we merge two consecutive ifs that have the same condition e.g:
1531 *
1532 * if ssa_12 {
1533 * ...
1534 * } else {
1535 * ...
1536 * }
1537 * if ssa_12 {
1538 * ...
1539 * } else {
1540 * ...
1541 * }
1542 *
1543 * Note: This only merges if-statements when the block between them is
1544 * empty. The reason we don't try to merge ifs that just have phis between
1545 * them is because this can result in increased register pressure. For
1546 * example when merging if ladders created by indirect indexing.
1547 */
1548 if (nif->condition.ssa == next_if->condition.ssa &&
1549 exec_list_is_empty(&next_blk->instr_list)) {
1550
1551 /* This optimization isn't made to work in this case and
1552 * opt_if_evaluate_condition_use will optimize it later.
1553 */
1554 if (nir_block_ends_in_jump(nir_if_last_then_block(nif)) ||
1555 nir_block_ends_in_jump(nir_if_last_else_block(nif)))
1556 return false;
1557
1558 simple_merge_if(nif, next_if, true, true);
1559 simple_merge_if(nif, next_if, false, false);
1560
1561 nir_block *new_then_block = nir_if_last_then_block(nif);
1562 nir_block *new_else_block = nir_if_last_else_block(nif);
1563
1564 nir_block *old_then_block = nir_if_last_then_block(next_if);
1565 nir_block *old_else_block = nir_if_last_else_block(next_if);
1566
1567 /* Rewrite the predecessor block for any phis following the second
1568 * if-statement.
1569 */
1570 rewrite_phi_predecessor_blocks(next_if, old_then_block,
1571 old_else_block,
1572 new_then_block,
1573 new_else_block);
1574
1575 /* Move phis after merged if to avoid them being deleted when we remove
1576 * the merged if-statement.
1577 */
1578 nir_block *after_next_if_block =
1579 nir_cf_node_as_block(nir_cf_node_next(&next_if->cf_node));
1580
1581 nir_foreach_instr_safe(instr, after_next_if_block) {
1582 if (instr->type != nir_instr_type_phi)
1583 break;
1584
1585 exec_node_remove(&instr->node);
1586 exec_list_push_tail(&next_blk->instr_list, &instr->node);
1587 instr->block = next_blk;
1588 }
1589
1590 nir_cf_node_remove(&next_if->cf_node);
1591
1592 progress = true;
1593 }
1594
1595 return progress;
1596 }
1597
1598 static bool
opt_if_cf_list(nir_builder *b, struct exec_list *cf_list, nir_opt_if_options options)1599 opt_if_cf_list(nir_builder *b, struct exec_list *cf_list,
1600 nir_opt_if_options options)
1601 {
1602 bool progress = false;
1603 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1604 switch (cf_node->type) {
1605 case nir_cf_node_block:
1606 break;
1607
1608 case nir_cf_node_if: {
1609 nir_if *nif = nir_cf_node_as_if(cf_node);
1610 progress |= opt_if_cf_list(b, &nif->then_list,
1611 options);
1612 progress |= opt_if_cf_list(b, &nif->else_list,
1613 options);
1614 progress |= opt_if_loop_terminator(nif);
1615 progress |= opt_if_merge(nif);
1616 progress |= opt_if_simplification(b, nif);
1617 if (options & nir_opt_if_optimize_phi_true_false)
1618 progress |= opt_if_phi_is_condition(b, nif);
1619 break;
1620 }
1621
1622 case nir_cf_node_loop: {
1623 nir_loop *loop = nir_cf_node_as_loop(cf_node);
1624 progress |= opt_if_cf_list(b, &loop->body,
1625 options);
1626 progress |= opt_simplify_bcsel_of_phi(b, loop);
1627 progress |= opt_if_loop_last_continue(loop,
1628 options & nir_opt_if_aggressive_last_continue);
1629 break;
1630 }
1631
1632 case nir_cf_node_function:
1633 unreachable("Invalid cf type");
1634 }
1635 }
1636
1637 return progress;
1638 }
1639
1640 /**
1641 * Optimizations which can create registers are done after other optimizations
1642 * which require SSA.
1643 */
1644 static bool
opt_if_regs_cf_list(struct exec_list *cf_list)1645 opt_if_regs_cf_list(struct exec_list *cf_list)
1646 {
1647 bool progress = false;
1648 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1649 switch (cf_node->type) {
1650 case nir_cf_node_block:
1651 break;
1652
1653 case nir_cf_node_if: {
1654 nir_if *nif = nir_cf_node_as_if(cf_node);
1655 progress |= opt_if_regs_cf_list(&nif->then_list);
1656 progress |= opt_if_regs_cf_list(&nif->else_list);
1657 if (opt_merge_breaks(nif)) {
1658 /* This optimization might move blocks
1659 * from after the NIF into the NIF */
1660 progress = true;
1661 opt_if_regs_cf_list(&nif->then_list);
1662 opt_if_regs_cf_list(&nif->else_list);
1663 }
1664 break;
1665 }
1666
1667 case nir_cf_node_loop: {
1668 nir_loop *loop = nir_cf_node_as_loop(cf_node);
1669 progress |= opt_if_regs_cf_list(&loop->body);
1670 progress |= opt_peel_loop_initial_if(loop);
1671 break;
1672 }
1673
1674 case nir_cf_node_function:
1675 unreachable("Invalid cf type");
1676 }
1677 }
1678
1679 return progress;
1680 }
1681
1682 /**
1683 * These optimisations depend on nir_metadata_block_index and therefore must
1684 * not do anything to cause the metadata to become invalid.
1685 */
1686 static bool
opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list)1687 opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list)
1688 {
1689 bool progress = false;
1690 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1691 switch (cf_node->type) {
1692 case nir_cf_node_block:
1693 break;
1694
1695 case nir_cf_node_if: {
1696 nir_if *nif = nir_cf_node_as_if(cf_node);
1697 progress |= opt_if_safe_cf_list(b, &nif->then_list);
1698 progress |= opt_if_safe_cf_list(b, &nif->else_list);
1699 progress |= opt_if_evaluate_condition_use(b, nif);
1700 nir_ssa_scalar cond = nir_ssa_scalar_resolved(nif->condition.ssa, 0);
1701 progress |= opt_if_rewrite_uniform_uses(b, nif, cond, true);
1702 break;
1703 }
1704
1705 case nir_cf_node_loop: {
1706 nir_loop *loop = nir_cf_node_as_loop(cf_node);
1707 progress |= opt_if_safe_cf_list(b, &loop->body);
1708 progress |= opt_split_alu_of_phi(b, loop);
1709 break;
1710 }
1711
1712 case nir_cf_node_function:
1713 unreachable("Invalid cf type");
1714 }
1715 }
1716
1717 return progress;
1718 }
1719
1720 bool
nir_opt_if(nir_shader *shader, nir_opt_if_options options)1721 nir_opt_if(nir_shader *shader, nir_opt_if_options options)
1722 {
1723 bool progress = false;
1724
1725 nir_foreach_function(function, shader) {
1726 if (function->impl == NULL)
1727 continue;
1728
1729 nir_builder b;
1730 nir_builder_init(&b, function->impl);
1731
1732 nir_metadata_require(function->impl, nir_metadata_block_index |
1733 nir_metadata_dominance);
1734 progress = opt_if_safe_cf_list(&b, &function->impl->body);
1735 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1736 nir_metadata_dominance);
1737
1738 bool preserve = true;
1739
1740 if (opt_if_cf_list(&b, &function->impl->body, options)) {
1741 preserve = false;
1742 progress = true;
1743 }
1744
1745 if (opt_if_regs_cf_list(&function->impl->body)) {
1746 preserve = false;
1747 progress = true;
1748
1749 /* If that made progress, we're no longer really in SSA form. We
1750 * need to convert registers back into SSA defs and clean up SSA defs
1751 * that don't dominate their uses.
1752 */
1753 nir_lower_regs_to_ssa_impl(function->impl);
1754 }
1755
1756 if (preserve) {
1757 nir_metadata_preserve(function->impl, nir_metadata_none);
1758 } else {
1759 nir_metadata_preserve(function->impl, nir_metadata_all);
1760 }
1761 }
1762
1763 return progress;
1764 }
1765