1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2019 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir.h"
25bf215546Sopenharmony_ci#include "nir/nir_builder.h"
26bf215546Sopenharmony_ci#include "nir_constant_expressions.h"
27bf215546Sopenharmony_ci#include "nir_control_flow.h"
28bf215546Sopenharmony_ci#include "nir_loop_analyze.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_cistatic bool
31bf215546Sopenharmony_ciis_two_src_comparison(const nir_alu_instr *instr)
32bf215546Sopenharmony_ci{
33bf215546Sopenharmony_ci   switch (instr->op) {
34bf215546Sopenharmony_ci   case nir_op_flt:
35bf215546Sopenharmony_ci   case nir_op_flt32:
36bf215546Sopenharmony_ci   case nir_op_fge:
37bf215546Sopenharmony_ci   case nir_op_fge32:
38bf215546Sopenharmony_ci   case nir_op_feq:
39bf215546Sopenharmony_ci   case nir_op_feq32:
40bf215546Sopenharmony_ci   case nir_op_fneu:
41bf215546Sopenharmony_ci   case nir_op_fneu32:
42bf215546Sopenharmony_ci   case nir_op_ilt:
43bf215546Sopenharmony_ci   case nir_op_ilt32:
44bf215546Sopenharmony_ci   case nir_op_ult:
45bf215546Sopenharmony_ci   case nir_op_ult32:
46bf215546Sopenharmony_ci   case nir_op_ige:
47bf215546Sopenharmony_ci   case nir_op_ige32:
48bf215546Sopenharmony_ci   case nir_op_uge:
49bf215546Sopenharmony_ci   case nir_op_uge32:
50bf215546Sopenharmony_ci   case nir_op_ieq:
51bf215546Sopenharmony_ci   case nir_op_ieq32:
52bf215546Sopenharmony_ci   case nir_op_ine:
53bf215546Sopenharmony_ci   case nir_op_ine32:
54bf215546Sopenharmony_ci      return true;
55bf215546Sopenharmony_ci   default:
56bf215546Sopenharmony_ci      return false;
57bf215546Sopenharmony_ci   }
58bf215546Sopenharmony_ci}
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_cistatic bool
61bf215546Sopenharmony_ciall_srcs_are_ssa(const nir_alu_instr *instr)
62bf215546Sopenharmony_ci{
63bf215546Sopenharmony_ci   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
64bf215546Sopenharmony_ci      if (!instr->src[i].src.is_ssa)
65bf215546Sopenharmony_ci         return false;
66bf215546Sopenharmony_ci   }
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   return true;
69bf215546Sopenharmony_ci}
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_cistatic bool
73bf215546Sopenharmony_ciall_uses_are_bcsel(const nir_alu_instr *instr)
74bf215546Sopenharmony_ci{
75bf215546Sopenharmony_ci   if (!instr->dest.dest.is_ssa)
76bf215546Sopenharmony_ci      return false;
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci   nir_foreach_use(use, &instr->dest.dest.ssa) {
79bf215546Sopenharmony_ci      if (use->parent_instr->type != nir_instr_type_alu)
80bf215546Sopenharmony_ci         return false;
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci      nir_alu_instr *const alu = nir_instr_as_alu(use->parent_instr);
83bf215546Sopenharmony_ci      if (alu->op != nir_op_bcsel &&
84bf215546Sopenharmony_ci          alu->op != nir_op_b32csel)
85bf215546Sopenharmony_ci         return false;
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci      /* Not only must the result be used by a bcsel, but it must be used as
88bf215546Sopenharmony_ci       * the first source (the condition).
89bf215546Sopenharmony_ci       */
90bf215546Sopenharmony_ci      if (alu->src[0].src.ssa != &instr->dest.dest.ssa)
91bf215546Sopenharmony_ci         return false;
92bf215546Sopenharmony_ci   }
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   return true;
95bf215546Sopenharmony_ci}
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_cistatic bool
98bf215546Sopenharmony_cinir_opt_rematerialize_compares_impl(nir_shader *shader, nir_function_impl *impl)
99bf215546Sopenharmony_ci{
100bf215546Sopenharmony_ci   bool progress = false;
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
103bf215546Sopenharmony_ci      nir_foreach_instr(instr, block) {
104bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_alu)
105bf215546Sopenharmony_ci            continue;
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci         nir_alu_instr *const alu = nir_instr_as_alu(instr);
108bf215546Sopenharmony_ci         if (!is_two_src_comparison(alu))
109bf215546Sopenharmony_ci            continue;
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci         if (!all_srcs_are_ssa(alu))
112bf215546Sopenharmony_ci            continue;
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci         if (!all_uses_are_bcsel(alu))
115bf215546Sopenharmony_ci            continue;
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci         /* At this point it is known that alu is a comparison instruction
118bf215546Sopenharmony_ci          * that is only used by nir_op_bcsel and possibly by if-statements
119bf215546Sopenharmony_ci          * (though the latter has not been explicitly checked).
120bf215546Sopenharmony_ci          *
121bf215546Sopenharmony_ci          * Iterate through each use of the comparison.  For every use (or use
122bf215546Sopenharmony_ci          * by an if-statement) that is in a different block, emit a copy of
123bf215546Sopenharmony_ci          * the comparison.  Care must be taken here.  The original
124bf215546Sopenharmony_ci          * instruction must be duplicated only once in each block because CSE
125bf215546Sopenharmony_ci          * cannot be run after this pass.
126bf215546Sopenharmony_ci          */
127bf215546Sopenharmony_ci         nir_foreach_use_safe(use, &alu->dest.dest.ssa) {
128bf215546Sopenharmony_ci            nir_instr *const use_instr = use->parent_instr;
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci            /* If the use is in the same block as the def, don't
131bf215546Sopenharmony_ci             * rematerialize.
132bf215546Sopenharmony_ci             */
133bf215546Sopenharmony_ci            if (use_instr->block == alu->instr.block)
134bf215546Sopenharmony_ci               continue;
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci            nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci            nir_instr_insert_before(use_instr, &clone->instr);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci            nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr);
141bf215546Sopenharmony_ci            for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) {
142bf215546Sopenharmony_ci               if (use_alu->src[i].src.ssa == &alu->dest.dest.ssa) {
143bf215546Sopenharmony_ci                  nir_instr_rewrite_src(&use_alu->instr,
144bf215546Sopenharmony_ci                                        &use_alu->src[i].src,
145bf215546Sopenharmony_ci                                        nir_src_for_ssa(&clone->dest.dest.ssa));
146bf215546Sopenharmony_ci                  progress = true;
147bf215546Sopenharmony_ci               }
148bf215546Sopenharmony_ci            }
149bf215546Sopenharmony_ci         }
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci         nir_foreach_if_use_safe(use, &alu->dest.dest.ssa) {
152bf215546Sopenharmony_ci            nir_if *const if_stmt = use->parent_if;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci            nir_block *const prev_block =
155bf215546Sopenharmony_ci               nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci            /* If the compare is from the previous block, don't
158bf215546Sopenharmony_ci             * rematerialize.
159bf215546Sopenharmony_ci             */
160bf215546Sopenharmony_ci            if (prev_block == alu->instr.block)
161bf215546Sopenharmony_ci               continue;
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci            nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci            nir_instr_insert_after_block(prev_block, &clone->instr);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci            nir_if_rewrite_condition(if_stmt,
168bf215546Sopenharmony_ci                                     nir_src_for_ssa(&clone->dest.dest.ssa));
169bf215546Sopenharmony_ci            progress = true;
170bf215546Sopenharmony_ci         }
171bf215546Sopenharmony_ci      }
172bf215546Sopenharmony_ci   }
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   if (progress) {
175bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index |
176bf215546Sopenharmony_ci                                  nir_metadata_dominance);
177bf215546Sopenharmony_ci   } else {
178bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
179bf215546Sopenharmony_ci   }
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   return progress;
182bf215546Sopenharmony_ci}
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_cibool
185bf215546Sopenharmony_cinir_opt_rematerialize_compares(nir_shader *shader)
186bf215546Sopenharmony_ci{
187bf215546Sopenharmony_ci   bool progress = false;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
190bf215546Sopenharmony_ci      if (function->impl == NULL)
191bf215546Sopenharmony_ci         continue;
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci      progress = nir_opt_rematerialize_compares_impl(shader, function->impl)
194bf215546Sopenharmony_ci         || progress;
195bf215546Sopenharmony_ci   }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   return progress;
198bf215546Sopenharmony_ci}
199