1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir_instr_set.h"
25bf215546Sopenharmony_ci#include "nir_search_helpers.h"
26bf215546Sopenharmony_ci#include "nir_builder.h"
27bf215546Sopenharmony_ci#include "util/u_vector.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci/* Partial redundancy elimination of compares
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci * Seaches for comparisons of the form 'a cmp b' that dominate arithmetic
32bf215546Sopenharmony_ci * instructions like 'b - a'.  The comparison is replaced by the arithmetic
33bf215546Sopenharmony_ci * instruction, and the result is compared with zero.  For example,
34bf215546Sopenharmony_ci *
35bf215546Sopenharmony_ci *       vec1 32 ssa_111 = flt 0.37, ssa_110.w
36bf215546Sopenharmony_ci *       if ssa_111 {
37bf215546Sopenharmony_ci *               block block_1:
38bf215546Sopenharmony_ci *              vec1 32 ssa_112 = fadd ssa_110.w, -0.37
39bf215546Sopenharmony_ci *              ...
40bf215546Sopenharmony_ci *
41bf215546Sopenharmony_ci * becomes
42bf215546Sopenharmony_ci *
43bf215546Sopenharmony_ci *       vec1 32 ssa_111 = fadd ssa_110.w, -0.37
44bf215546Sopenharmony_ci *       vec1 32 ssa_112 = flt 0.0, ssa_111
45bf215546Sopenharmony_ci *       if ssa_112 {
46bf215546Sopenharmony_ci *               block block_1:
47bf215546Sopenharmony_ci *              ...
48bf215546Sopenharmony_ci */
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cistruct block_queue {
51bf215546Sopenharmony_ci   /**
52bf215546Sopenharmony_ci    * Stack of blocks from the current location in the CFG to the entry point
53bf215546Sopenharmony_ci    * of the function.
54bf215546Sopenharmony_ci    *
55bf215546Sopenharmony_ci    * This is sort of a poor man's dominator tree.
56bf215546Sopenharmony_ci    */
57bf215546Sopenharmony_ci   struct exec_list blocks;
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   /** List of freed block_instructions structures that can be reused. */
60bf215546Sopenharmony_ci   struct exec_list reusable_blocks;
61bf215546Sopenharmony_ci};
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_cistruct block_instructions {
64bf215546Sopenharmony_ci   struct exec_node node;
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   /**
67bf215546Sopenharmony_ci    * Set of comparison instructions from the block that are candidates for
68bf215546Sopenharmony_ci    * being replaced by add instructions.
69bf215546Sopenharmony_ci    */
70bf215546Sopenharmony_ci   struct u_vector instructions;
71bf215546Sopenharmony_ci};
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_cistatic void
74bf215546Sopenharmony_ciblock_queue_init(struct block_queue *bq)
75bf215546Sopenharmony_ci{
76bf215546Sopenharmony_ci   exec_list_make_empty(&bq->blocks);
77bf215546Sopenharmony_ci   exec_list_make_empty(&bq->reusable_blocks);
78bf215546Sopenharmony_ci}
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_cistatic void
81bf215546Sopenharmony_ciblock_queue_finish(struct block_queue *bq)
82bf215546Sopenharmony_ci{
83bf215546Sopenharmony_ci   struct block_instructions *n;
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   while ((n = (struct block_instructions *) exec_list_pop_head(&bq->blocks)) != NULL) {
86bf215546Sopenharmony_ci      u_vector_finish(&n->instructions);
87bf215546Sopenharmony_ci      free(n);
88bf215546Sopenharmony_ci   }
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci   while ((n = (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks)) != NULL) {
91bf215546Sopenharmony_ci      free(n);
92bf215546Sopenharmony_ci   }
93bf215546Sopenharmony_ci}
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_cistatic struct block_instructions *
96bf215546Sopenharmony_cipush_block(struct block_queue *bq)
97bf215546Sopenharmony_ci{
98bf215546Sopenharmony_ci   struct block_instructions *bi =
99bf215546Sopenharmony_ci      (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks);
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci   if (bi == NULL) {
102bf215546Sopenharmony_ci      bi = calloc(1, sizeof(struct block_instructions));
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci      if (bi == NULL)
105bf215546Sopenharmony_ci         return NULL;
106bf215546Sopenharmony_ci   }
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci   if (!u_vector_init_pow2(&bi->instructions, 8, sizeof(nir_alu_instr *))) {
109bf215546Sopenharmony_ci      free(bi);
110bf215546Sopenharmony_ci      return NULL;
111bf215546Sopenharmony_ci   }
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   exec_list_push_tail(&bq->blocks, &bi->node);
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   return bi;
116bf215546Sopenharmony_ci}
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_cistatic void
119bf215546Sopenharmony_cipop_block(struct block_queue *bq, struct block_instructions *bi)
120bf215546Sopenharmony_ci{
121bf215546Sopenharmony_ci   u_vector_finish(&bi->instructions);
122bf215546Sopenharmony_ci   exec_node_remove(&bi->node);
123bf215546Sopenharmony_ci   exec_list_push_head(&bq->reusable_blocks, &bi->node);
124bf215546Sopenharmony_ci}
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_cistatic void
127bf215546Sopenharmony_ciadd_instruction_for_block(struct block_instructions *bi,
128bf215546Sopenharmony_ci                          nir_alu_instr *alu)
129bf215546Sopenharmony_ci{
130bf215546Sopenharmony_ci   nir_alu_instr **data =
131bf215546Sopenharmony_ci      u_vector_add(&bi->instructions);
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci   *data = alu;
134bf215546Sopenharmony_ci}
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_cistatic void
137bf215546Sopenharmony_cirewrite_compare_instruction(nir_builder *bld, nir_alu_instr *orig_cmp,
138bf215546Sopenharmony_ci                            nir_alu_instr *orig_add, bool zero_on_left)
139bf215546Sopenharmony_ci{
140bf215546Sopenharmony_ci   bld->cursor = nir_before_instr(&orig_cmp->instr);
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   /* This is somewhat tricky.  The compare instruction may be something like
143bf215546Sopenharmony_ci    * (fcmp, a, b) while the add instruction is something like (fadd, fneg(a),
144bf215546Sopenharmony_ci    * b).  This is problematic because the SSA value for the fneg(a) may not
145bf215546Sopenharmony_ci    * exist yet at the compare instruction.
146bf215546Sopenharmony_ci    *
147bf215546Sopenharmony_ci    * We fabricate the operands of the new add.  This is done using
148bf215546Sopenharmony_ci    * information provided by zero_on_left.  If zero_on_left is true, we know
149bf215546Sopenharmony_ci    * the resulting compare instruction is (fcmp, 0.0, (fadd, x, y)).  If the
150bf215546Sopenharmony_ci    * original compare instruction was (fcmp, a, b), x = b and y = -a.  If
151bf215546Sopenharmony_ci    * zero_on_left is false, the resulting compare instruction is (fcmp,
152bf215546Sopenharmony_ci    * (fadd, x, y), 0.0) and x = a and y = -b.
153bf215546Sopenharmony_ci    */
154bf215546Sopenharmony_ci   nir_ssa_def *const a = nir_ssa_for_alu_src(bld, orig_cmp, 0);
155bf215546Sopenharmony_ci   nir_ssa_def *const b = nir_ssa_for_alu_src(bld, orig_cmp, 1);
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   nir_ssa_def *const fadd = zero_on_left
158bf215546Sopenharmony_ci      ? nir_fadd(bld, b, nir_fneg(bld, a))
159bf215546Sopenharmony_ci      : nir_fadd(bld, a, nir_fneg(bld, b));
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   nir_ssa_def *const zero =
162bf215546Sopenharmony_ci      nir_imm_floatN_t(bld, 0.0, orig_add->dest.dest.ssa.bit_size);
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci   nir_ssa_def *const cmp = zero_on_left
165bf215546Sopenharmony_ci      ? nir_build_alu(bld, orig_cmp->op, zero, fadd, NULL, NULL)
166bf215546Sopenharmony_ci      : nir_build_alu(bld, orig_cmp->op, fadd, zero, NULL, NULL);
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   /* Generating extra moves of the results is the easy way to make sure the
169bf215546Sopenharmony_ci    * writemasks match the original instructions.  Later optimization passes
170bf215546Sopenharmony_ci    * will clean these up.  This is similar to nir_replace_instr (in
171bf215546Sopenharmony_ci    * nir_search.c).
172bf215546Sopenharmony_ci    */
173bf215546Sopenharmony_ci   nir_alu_instr *mov_add = nir_alu_instr_create(bld->shader, nir_op_mov);
174bf215546Sopenharmony_ci   mov_add->dest.write_mask = orig_add->dest.write_mask;
175bf215546Sopenharmony_ci   nir_ssa_dest_init(&mov_add->instr, &mov_add->dest.dest,
176bf215546Sopenharmony_ci                     orig_add->dest.dest.ssa.num_components,
177bf215546Sopenharmony_ci                     orig_add->dest.dest.ssa.bit_size, NULL);
178bf215546Sopenharmony_ci   mov_add->src[0].src = nir_src_for_ssa(fadd);
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci   nir_builder_instr_insert(bld, &mov_add->instr);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   nir_alu_instr *mov_cmp = nir_alu_instr_create(bld->shader, nir_op_mov);
183bf215546Sopenharmony_ci   mov_cmp->dest.write_mask = orig_cmp->dest.write_mask;
184bf215546Sopenharmony_ci   nir_ssa_dest_init(&mov_cmp->instr, &mov_cmp->dest.dest,
185bf215546Sopenharmony_ci                     orig_cmp->dest.dest.ssa.num_components,
186bf215546Sopenharmony_ci                     orig_cmp->dest.dest.ssa.bit_size, NULL);
187bf215546Sopenharmony_ci   mov_cmp->src[0].src = nir_src_for_ssa(cmp);
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   nir_builder_instr_insert(bld, &mov_cmp->instr);
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&orig_cmp->dest.dest.ssa,
192bf215546Sopenharmony_ci                            &mov_cmp->dest.dest.ssa);
193bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&orig_add->dest.dest.ssa,
194bf215546Sopenharmony_ci                            &mov_add->dest.dest.ssa);
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   /* We know these have no more uses because we just rewrote them all, so we
197bf215546Sopenharmony_ci    * can remove them.
198bf215546Sopenharmony_ci    */
199bf215546Sopenharmony_ci   nir_instr_remove(&orig_cmp->instr);
200bf215546Sopenharmony_ci   nir_instr_remove(&orig_add->instr);
201bf215546Sopenharmony_ci}
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_cistatic bool
204bf215546Sopenharmony_cicomparison_pre_block(nir_block *block, struct block_queue *bq, nir_builder *bld)
205bf215546Sopenharmony_ci{
206bf215546Sopenharmony_ci   bool progress = false;
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   struct block_instructions *bi = push_block(bq);
209bf215546Sopenharmony_ci   if (bi == NULL)
210bf215546Sopenharmony_ci      return false;
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   /* Starting with the current block, examine each instruction.  If the
213bf215546Sopenharmony_ci    * instruction is a comparison that matches the '±a cmp ±b' pattern, add it
214bf215546Sopenharmony_ci    * to the block_instructions::instructions set.  If the instruction is an
215bf215546Sopenharmony_ci    * add instruction, walk up the block queue looking at the stored
216bf215546Sopenharmony_ci    * instructions.  If a matching comparison is found, move the addition and
217bf215546Sopenharmony_ci    * replace the comparison with a different comparison based on the result
218bf215546Sopenharmony_ci    * of the addition.  All of the blocks in the queue are guaranteed to be
219bf215546Sopenharmony_ci    * dominators of the current block.
220bf215546Sopenharmony_ci    *
221bf215546Sopenharmony_ci    * After processing the current block, recurse into the blocks dominated by
222bf215546Sopenharmony_ci    * the current block.
223bf215546Sopenharmony_ci    */
224bf215546Sopenharmony_ci   nir_foreach_instr_safe(instr, block) {
225bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_alu)
226bf215546Sopenharmony_ci         continue;
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci      nir_alu_instr *const alu = nir_instr_as_alu(instr);
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci      if (alu->dest.dest.ssa.num_components != 1)
231bf215546Sopenharmony_ci         continue;
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci      if (alu->dest.saturate)
234bf215546Sopenharmony_ci         continue;
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci      static const uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = {0};
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci      switch (alu->op) {
239bf215546Sopenharmony_ci      case nir_op_fadd: {
240bf215546Sopenharmony_ci         /* If the instruction is fadd, check it against comparison
241bf215546Sopenharmony_ci          * instructions that dominate it.
242bf215546Sopenharmony_ci          */
243bf215546Sopenharmony_ci         struct block_instructions *b =
244bf215546Sopenharmony_ci            (struct block_instructions *) exec_list_get_head_raw(&bq->blocks);
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci         while (b->node.next != NULL) {
247bf215546Sopenharmony_ci            nir_alu_instr **a;
248bf215546Sopenharmony_ci            bool rewrote_compare = false;
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci            u_vector_foreach(a, &b->instructions) {
251bf215546Sopenharmony_ci               nir_alu_instr *const cmp = *a;
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_ci               if (cmp == NULL)
254bf215546Sopenharmony_ci                  continue;
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci               /* The operands of both instructions are, with some liberty,
257bf215546Sopenharmony_ci                * commutative.  Check all four permutations.  The third and
258bf215546Sopenharmony_ci                * fourth permutations are negations of the first two.
259bf215546Sopenharmony_ci                */
260bf215546Sopenharmony_ci               if ((nir_alu_srcs_equal(cmp, alu, 0, 0) &&
261bf215546Sopenharmony_ci                    nir_alu_srcs_negative_equal(cmp, alu, 1, 1)) ||
262bf215546Sopenharmony_ci                   (nir_alu_srcs_equal(cmp, alu, 0, 1) &&
263bf215546Sopenharmony_ci                    nir_alu_srcs_negative_equal(cmp, alu, 1, 0))) {
264bf215546Sopenharmony_ci                  /* These are the cases where (A cmp B) matches either (A +
265bf215546Sopenharmony_ci                   * -B) or (-B + A)
266bf215546Sopenharmony_ci                   *
267bf215546Sopenharmony_ci                   *    A cmp B <=> A + -B cmp 0
268bf215546Sopenharmony_ci                   */
269bf215546Sopenharmony_ci                  rewrite_compare_instruction(bld, cmp, alu, false);
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci                  *a = NULL;
272bf215546Sopenharmony_ci                  rewrote_compare = true;
273bf215546Sopenharmony_ci                  break;
274bf215546Sopenharmony_ci               } else if ((nir_alu_srcs_equal(cmp, alu, 1, 0) &&
275bf215546Sopenharmony_ci                           nir_alu_srcs_negative_equal(cmp, alu, 0, 1)) ||
276bf215546Sopenharmony_ci                          (nir_alu_srcs_equal(cmp, alu, 1, 1) &&
277bf215546Sopenharmony_ci                           nir_alu_srcs_negative_equal(cmp, alu, 0, 0))) {
278bf215546Sopenharmony_ci                  /* This is the case where (A cmp B) matches (B + -A) or (-A
279bf215546Sopenharmony_ci                   * + B).
280bf215546Sopenharmony_ci                   *
281bf215546Sopenharmony_ci                   *    A cmp B <=> 0 cmp B + -A
282bf215546Sopenharmony_ci                   */
283bf215546Sopenharmony_ci                  rewrite_compare_instruction(bld, cmp, alu, true);
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci                  *a = NULL;
286bf215546Sopenharmony_ci                  rewrote_compare = true;
287bf215546Sopenharmony_ci                  break;
288bf215546Sopenharmony_ci               }
289bf215546Sopenharmony_ci            }
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci            /* Bail after a compare in the most dominating block is found.
292bf215546Sopenharmony_ci             * This is necessary because 'alu' has been removed from the
293bf215546Sopenharmony_ci             * instruction stream.  Should there be a matching compare in
294bf215546Sopenharmony_ci             * another block, calling rewrite_compare_instruction again will
295bf215546Sopenharmony_ci             * try to operate on a node that is not in the list as if it were
296bf215546Sopenharmony_ci             * in the list.
297bf215546Sopenharmony_ci             *
298bf215546Sopenharmony_ci             * FINISHME: There may be opportunity for additional optimization
299bf215546Sopenharmony_ci             * here.  I discovered this problem due to a shader in Guacamelee.
300bf215546Sopenharmony_ci             * It may be possible to rewrite the matching compares that are
301bf215546Sopenharmony_ci             * encountered later to reuse the result from the compare that was
302bf215546Sopenharmony_ci             * first rewritten.  It's also possible that this is just taken
303bf215546Sopenharmony_ci             * care of by calling the optimization pass repeatedly.
304bf215546Sopenharmony_ci             */
305bf215546Sopenharmony_ci            if (rewrote_compare) {
306bf215546Sopenharmony_ci               progress = true;
307bf215546Sopenharmony_ci               break;
308bf215546Sopenharmony_ci            }
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci            b = (struct block_instructions *) b->node.next;
311bf215546Sopenharmony_ci         }
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci         break;
314bf215546Sopenharmony_ci      }
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci      case nir_op_flt:
317bf215546Sopenharmony_ci      case nir_op_fge:
318bf215546Sopenharmony_ci      case nir_op_fneu:
319bf215546Sopenharmony_ci      case nir_op_feq:
320bf215546Sopenharmony_ci         /* If the instruction is a comparison that is used by an if-statement
321bf215546Sopenharmony_ci          * and neither operand is immediate value 0, add it to the set.
322bf215546Sopenharmony_ci          */
323bf215546Sopenharmony_ci         if (is_used_by_if(alu) &&
324bf215546Sopenharmony_ci             is_not_const_zero(NULL, alu, 0, 1, swizzle) &&
325bf215546Sopenharmony_ci             is_not_const_zero(NULL, alu, 1, 1, swizzle))
326bf215546Sopenharmony_ci            add_instruction_for_block(bi, alu);
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci         break;
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci      default:
331bf215546Sopenharmony_ci         break;
332bf215546Sopenharmony_ci      }
333bf215546Sopenharmony_ci   }
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   for (unsigned i = 0; i < block->num_dom_children; i++) {
336bf215546Sopenharmony_ci      nir_block *child = block->dom_children[i];
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci      if (comparison_pre_block(child, bq, bld))
339bf215546Sopenharmony_ci         progress = true;
340bf215546Sopenharmony_ci   }
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci   pop_block(bq, bi);
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   return progress;
345bf215546Sopenharmony_ci}
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_cibool
348bf215546Sopenharmony_cinir_opt_comparison_pre_impl(nir_function_impl *impl)
349bf215546Sopenharmony_ci{
350bf215546Sopenharmony_ci   struct block_queue bq;
351bf215546Sopenharmony_ci   nir_builder bld;
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   block_queue_init(&bq);
354bf215546Sopenharmony_ci   nir_builder_init(&bld, impl);
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   nir_metadata_require(impl, nir_metadata_dominance);
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci   const bool progress =
359bf215546Sopenharmony_ci      comparison_pre_block(nir_start_block(impl), &bq, &bld);
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci   block_queue_finish(&bq);
362bf215546Sopenharmony_ci
363bf215546Sopenharmony_ci   if (progress) {
364bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index |
365bf215546Sopenharmony_ci                                  nir_metadata_dominance);
366bf215546Sopenharmony_ci   } else {
367bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
368bf215546Sopenharmony_ci   }
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci   return progress;
371bf215546Sopenharmony_ci}
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_cibool
374bf215546Sopenharmony_cinir_opt_comparison_pre(nir_shader *shader)
375bf215546Sopenharmony_ci{
376bf215546Sopenharmony_ci   bool progress = false;
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
379bf215546Sopenharmony_ci      if (function->impl)
380bf215546Sopenharmony_ci         progress |= nir_opt_comparison_pre_impl(function->impl);
381bf215546Sopenharmony_ci   }
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   return progress;
384bf215546Sopenharmony_ci}
385