1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Jason Ekstrand (jason@jlekstrand.net)
25  *
26  */
27 
28 #include "nir.h"
29 #include "nir/nir_builder.h"
30 #include "nir_control_flow.h"
31 #include "nir_search_helpers.h"
32 
33 /*
34  * Implements a small peephole optimization that looks for
35  *
36  * if (cond) {
37  *    <then SSA defs>
38  * } else {
39  *    <else SSA defs>
40  * }
41  * phi
42  * ...
43  * phi
44  *
45  * and replaces it with:
46  *
47  * <then SSA defs>
48  * <else SSA defs>
49  * bcsel
50  * ...
51  * bcsel
52  *
53  * where the SSA defs are ALU operations or other cheap instructions (not
54  * texturing, for example).
55  *
56  * If the number of ALU operations in the branches is greater than the limit
57  * parameter, then the optimization is skipped.  In limit=0 mode, the SSA defs
58  * must only be MOVs which we expect to get copy-propagated away once they're
59  * out of the inner blocks.
60  */
61 
62 static bool
block_check_for_allowed_instrs(nir_block *block, unsigned *count, unsigned limit, bool indirect_load_ok, bool expensive_alu_ok)63 block_check_for_allowed_instrs(nir_block *block, unsigned *count,
64                                unsigned limit, bool indirect_load_ok,
65                                bool expensive_alu_ok)
66 {
67    bool alu_ok = limit != 0;
68 
69    /* Used on non-control-flow HW to flatten all IFs. */
70    if (limit == ~0) {
71       nir_foreach_instr(instr, block) {
72          switch (instr->type) {
73          case nir_instr_type_alu:
74          case nir_instr_type_deref:
75          case nir_instr_type_load_const:
76          case nir_instr_type_phi:
77          case nir_instr_type_ssa_undef:
78          case nir_instr_type_tex:
79             break;
80 
81          case nir_instr_type_intrinsic:
82             if (!nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr)))
83                return false;
84             break;
85 
86          case nir_instr_type_call:
87          case nir_instr_type_jump:
88          case nir_instr_type_parallel_copy:
89             return false;
90          }
91       }
92       return true;
93    }
94 
95    nir_foreach_instr(instr, block) {
96       switch (instr->type) {
97       case nir_instr_type_intrinsic: {
98          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
99 
100          switch (intrin->intrinsic) {
101          case nir_intrinsic_load_deref: {
102             nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]);
103 
104             switch (deref->modes) {
105             case nir_var_shader_in:
106             case nir_var_uniform:
107             case nir_var_image:
108                /* Don't try to remove flow control around an indirect load
109                 * because that flow control may be trying to avoid invalid
110                 * loads.
111                 */
112                if (!indirect_load_ok && nir_deref_instr_has_indirect(deref))
113                   return false;
114 
115                break;
116 
117             default:
118                return false;
119             }
120             break;
121          }
122 
123          case nir_intrinsic_load_uniform:
124          case nir_intrinsic_load_helper_invocation:
125          case nir_intrinsic_is_helper_invocation:
126          case nir_intrinsic_load_front_face:
127          case nir_intrinsic_load_view_index:
128          case nir_intrinsic_load_layer_id:
129          case nir_intrinsic_load_frag_coord:
130          case nir_intrinsic_load_sample_pos:
131          case nir_intrinsic_load_sample_pos_or_center:
132          case nir_intrinsic_load_sample_id:
133          case nir_intrinsic_load_sample_mask_in:
134          case nir_intrinsic_load_vertex_id_zero_base:
135          case nir_intrinsic_load_first_vertex:
136          case nir_intrinsic_load_base_instance:
137          case nir_intrinsic_load_instance_id:
138          case nir_intrinsic_load_draw_id:
139          case nir_intrinsic_load_num_workgroups:
140          case nir_intrinsic_load_workgroup_id:
141          case nir_intrinsic_load_local_invocation_id:
142          case nir_intrinsic_load_local_invocation_index:
143          case nir_intrinsic_load_subgroup_id:
144          case nir_intrinsic_load_subgroup_invocation:
145          case nir_intrinsic_load_num_subgroups:
146          case nir_intrinsic_load_frag_shading_rate:
147          case nir_intrinsic_is_sparse_texels_resident:
148          case nir_intrinsic_sparse_residency_code_and:
149             if (!alu_ok)
150                return false;
151             break;
152 
153          default:
154             return false;
155          }
156 
157          break;
158       }
159 
160       case nir_instr_type_deref:
161       case nir_instr_type_load_const:
162       case nir_instr_type_ssa_undef:
163          break;
164 
165       case nir_instr_type_alu: {
166          nir_alu_instr *mov = nir_instr_as_alu(instr);
167          bool movelike = false;
168 
169          switch (mov->op) {
170          case nir_op_mov:
171          case nir_op_fneg:
172          case nir_op_ineg:
173          case nir_op_fabs:
174          case nir_op_iabs:
175          case nir_op_vec2:
176          case nir_op_vec3:
177          case nir_op_vec4:
178          case nir_op_vec5:
179          case nir_op_vec8:
180          case nir_op_vec16:
181             movelike = true;
182             break;
183 
184          case nir_op_fcos:
185          case nir_op_fdiv:
186          case nir_op_fexp2:
187          case nir_op_flog2:
188          case nir_op_fmod:
189          case nir_op_fpow:
190          case nir_op_frcp:
191          case nir_op_frem:
192          case nir_op_frsq:
193          case nir_op_fsin:
194          case nir_op_idiv:
195          case nir_op_irem:
196          case nir_op_udiv:
197             if (!alu_ok || !expensive_alu_ok)
198                return false;
199 
200             break;
201 
202          default:
203             if (!alu_ok) {
204                /* It must be a move-like operation. */
205                return false;
206             }
207             break;
208          }
209 
210          /* It must be SSA */
211          if (!mov->dest.dest.is_ssa)
212             return false;
213 
214          if (alu_ok) {
215             /* If the ALU operation is an fsat or a move-like operation, do
216              * not count it.  The expectation is that it will eventually be
217              * merged as a destination modifier or source modifier on some
218              * other instruction.
219              */
220             if (mov->op != nir_op_fsat && !movelike)
221                (*count)++;
222          } else {
223             /* Can't handle saturate */
224             if (mov->dest.saturate)
225                return false;
226 
227             /* It cannot have any if-uses */
228             if (!list_is_empty(&mov->dest.dest.ssa.if_uses))
229                return false;
230 
231             /* The only uses of this definition must be phis in the successor */
232             nir_foreach_use(use, &mov->dest.dest.ssa) {
233                if (use->parent_instr->type != nir_instr_type_phi ||
234                    use->parent_instr->block != block->successors[0])
235                   return false;
236             }
237          }
238          break;
239       }
240 
241       default:
242          return false;
243       }
244    }
245 
246    return true;
247 }
248 
249 /**
250  * Try to collapse nested ifs:
251  * This optimization turns
252  *
253  * if (cond1) {
254  *   <allowed instruction>
255  *   if (cond2) {
256  *     <any code>
257  *   } else {
258  *   }
259  * } else {
260  * }
261  *
262  * into
263  *
264  * <allowed instruction>
265  * if (cond1 && cond2) {
266  *   <any code>
267  * } else {
268  * }
269  *
270  */
271 static bool
nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit, bool indirect_load_ok, bool expensive_alu_ok)272 nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit,
273                     bool indirect_load_ok, bool expensive_alu_ok)
274 {
275    /* the if has to be nested */
276    if (if_stmt->cf_node.parent->type != nir_cf_node_if)
277       return false;
278 
279    nir_if *parent_if = nir_cf_node_as_if(if_stmt->cf_node.parent);
280    if (parent_if->control == nir_selection_control_dont_flatten)
281       return false;
282 
283    /* check if the else block is empty */
284    if (!nir_cf_list_is_empty_block(&if_stmt->else_list))
285       return false;
286 
287    /* this opt doesn't make much sense if the branch is empty */
288    if (nir_cf_list_is_empty_block(&if_stmt->then_list))
289       return false;
290 
291    /* the nested if has to be the only cf_node:
292     * i.e. <block> <if_stmt> <block> */
293    if (exec_list_length(&parent_if->then_list) != 3)
294       return false;
295 
296    /* check if the else block of the parent if is empty */
297    if (!nir_cf_list_is_empty_block(&parent_if->else_list))
298       return false;
299 
300    /* check if the block after the nested if is empty except for phis */
301    nir_block *last = nir_if_last_then_block(parent_if);
302    nir_instr *last_instr = nir_block_last_instr(last);
303    if (last_instr && last_instr->type != nir_instr_type_phi)
304       return false;
305 
306    /* check if all outer phis become trivial after merging the ifs */
307    nir_foreach_instr(instr, last) {
308       if (parent_if->control == nir_selection_control_flatten)
309          break;
310 
311       nir_phi_instr *phi = nir_instr_as_phi(instr);
312       nir_phi_src *else_src =
313          nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt));
314 
315       nir_foreach_use (src, &phi->dest.ssa) {
316          assert(src->parent_instr->type == nir_instr_type_phi);
317          nir_phi_src *phi_src =
318             nir_phi_get_src_from_block(nir_instr_as_phi(src->parent_instr),
319                                        nir_if_first_else_block(parent_if));
320          if (phi_src->src.ssa != else_src->src.ssa)
321             return false;
322       }
323    }
324 
325    if (parent_if->control == nir_selection_control_flatten) {
326       /* Override driver defaults */
327       indirect_load_ok = true;
328       expensive_alu_ok = true;
329    }
330 
331    /* check if the block before the nested if matches the requirements */
332    nir_block *first = nir_if_first_then_block(parent_if);
333    unsigned count = 0;
334    if (!block_check_for_allowed_instrs(first, &count, limit != 0,
335                                        indirect_load_ok, expensive_alu_ok))
336       return false;
337 
338    if (count > limit && parent_if->control != nir_selection_control_flatten)
339       return false;
340 
341    /* trivialize succeeding phis */
342    nir_foreach_instr(instr, last) {
343       nir_phi_instr *phi = nir_instr_as_phi(instr);
344       nir_phi_src *else_src =
345          nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt));
346       nir_foreach_use_safe(src, &phi->dest.ssa) {
347          nir_phi_src *phi_src =
348             nir_phi_get_src_from_block(nir_instr_as_phi(src->parent_instr),
349                                        nir_if_first_else_block(parent_if));
350          if (phi_src->src.ssa == else_src->src.ssa)
351             nir_instr_rewrite_src(src->parent_instr, &phi_src->src,
352                                   nir_src_for_ssa(&phi->dest.ssa));
353       }
354    }
355 
356    /* combine the conditions */
357    struct nir_builder b;
358    nir_builder_init(&b, nir_cf_node_get_function(&if_stmt->cf_node)->function->impl);
359    b.cursor = nir_before_cf_node(&if_stmt->cf_node);
360    nir_ssa_def *cond = nir_iand(&b, if_stmt->condition.ssa,
361                                 parent_if->condition.ssa);
362    nir_if_rewrite_condition(if_stmt, nir_src_for_ssa(cond));
363 
364    /* move the whole inner if before the parent if */
365    nir_cf_list tmp;
366    nir_cf_extract(&tmp, nir_before_block(first),
367                         nir_after_block(last));
368    nir_cf_reinsert(&tmp, nir_before_cf_node(&parent_if->cf_node));
369 
370    /* The now empty parent if will be cleaned up by other passes */
371    return true;
372 }
373 
374 static bool
nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, unsigned limit, bool indirect_load_ok, bool expensive_alu_ok)375 nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
376                               unsigned limit, bool indirect_load_ok,
377                               bool expensive_alu_ok)
378 {
379    if (nir_cf_node_is_first(&block->cf_node))
380       return false;
381 
382    nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
383    if (prev_node->type != nir_cf_node_if)
384       return false;
385 
386    nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
387 
388    /* If the last instruction before this if/else block is a jump, we can't
389     * append stuff after it because it would break a bunch of assumption about
390     * control flow (nir_validate expects the successor of a return/halt jump
391     * to be the end of the function, which might not match the successor of
392     * the if/else blocks).
393     */
394    if (nir_block_ends_in_return_or_halt(prev_block))
395       return false;
396 
397    nir_if *if_stmt = nir_cf_node_as_if(prev_node);
398 
399    /* first, try to collapse the if */
400    if (nir_opt_collapse_if(if_stmt, shader, limit,
401                            indirect_load_ok, expensive_alu_ok))
402       return true;
403 
404    if (if_stmt->control == nir_selection_control_dont_flatten)
405       return false;
406 
407    nir_block *then_block = nir_if_first_then_block(if_stmt);
408    nir_block *else_block = nir_if_first_else_block(if_stmt);
409 
410    /* We can only have one block in each side ... */
411    if (nir_if_last_then_block(if_stmt) != then_block ||
412        nir_if_last_else_block(if_stmt) != else_block)
413       return false;
414 
415    if (if_stmt->control == nir_selection_control_flatten) {
416       /* Override driver defaults */
417       indirect_load_ok = true;
418       expensive_alu_ok = true;
419    }
420 
421    /* ... and those blocks must only contain "allowed" instructions. */
422    unsigned count = 0;
423    if (!block_check_for_allowed_instrs(then_block, &count, limit,
424                                        indirect_load_ok, expensive_alu_ok) ||
425        !block_check_for_allowed_instrs(else_block, &count, limit,
426                                        indirect_load_ok, expensive_alu_ok))
427       return false;
428 
429    if (count > limit && if_stmt->control != nir_selection_control_flatten)
430       return false;
431 
432    /* At this point, we know that the previous CFG node is an if-then
433     * statement containing only moves to phi nodes in this block.  We can
434     * just remove that entire CF node and replace all of the phi nodes with
435     * selects.
436     */
437 
438    /* First, we move the remaining instructions from the blocks to the
439     * block before.  We have already guaranteed that this is safe by
440     * calling block_check_for_allowed_instrs()
441     */
442    nir_foreach_instr_safe(instr, then_block) {
443       exec_node_remove(&instr->node);
444       instr->block = prev_block;
445       exec_list_push_tail(&prev_block->instr_list, &instr->node);
446    }
447 
448    nir_foreach_instr_safe(instr, else_block) {
449       exec_node_remove(&instr->node);
450       instr->block = prev_block;
451       exec_list_push_tail(&prev_block->instr_list, &instr->node);
452    }
453 
454    nir_foreach_instr_safe(instr, block) {
455       if (instr->type != nir_instr_type_phi)
456          break;
457 
458       nir_phi_instr *phi = nir_instr_as_phi(instr);
459       nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel);
460       nir_src_copy(&sel->src[0].src, &if_stmt->condition);
461       /* Splat the condition to all channels */
462       memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
463 
464       assert(exec_list_length(&phi->srcs) == 2);
465       nir_foreach_phi_src(src, phi) {
466          assert(src->pred == then_block || src->pred == else_block);
467          assert(src->src.is_ssa);
468 
469          unsigned idx = src->pred == then_block ? 1 : 2;
470          nir_src_copy(&sel->src[idx].src, &src->src);
471       }
472 
473       nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
474                         phi->dest.ssa.num_components,
475                         phi->dest.ssa.bit_size, NULL);
476       sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
477 
478       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
479                                &sel->dest.dest.ssa);
480 
481       nir_instr_insert_before(&phi->instr, &sel->instr);
482       nir_instr_remove(&phi->instr);
483    }
484 
485    nir_cf_node_remove(&if_stmt->cf_node);
486    return true;
487 }
488 
489 static bool
nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit, bool indirect_load_ok, bool expensive_alu_ok)490 nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
491                              bool indirect_load_ok, bool expensive_alu_ok)
492 {
493    nir_shader *shader = impl->function->shader;
494    bool progress = false;
495 
496    nir_foreach_block_safe(block, impl) {
497       progress |= nir_opt_peephole_select_block(block, shader, limit,
498                                                 indirect_load_ok,
499                                                 expensive_alu_ok);
500    }
501 
502    if (progress) {
503       nir_metadata_preserve(impl, nir_metadata_none);
504    } else {
505       nir_metadata_preserve(impl, nir_metadata_all);
506    }
507 
508    return progress;
509 }
510 
511 bool
nir_opt_peephole_select(nir_shader *shader, unsigned limit, bool indirect_load_ok, bool expensive_alu_ok)512 nir_opt_peephole_select(nir_shader *shader, unsigned limit,
513                         bool indirect_load_ok, bool expensive_alu_ok)
514 {
515    bool progress = false;
516 
517    nir_foreach_function(function, shader) {
518       if (function->impl)
519          progress |= nir_opt_peephole_select_impl(function->impl, limit,
520                                                   indirect_load_ok,
521                                                   expensive_alu_ok);
522    }
523 
524    return progress;
525 }
526