1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir_phi_builder.h"
25bf215546Sopenharmony_ci#include "nir/nir_vla.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_cistruct nir_phi_builder {
28bf215546Sopenharmony_ci   nir_shader *shader;
29bf215546Sopenharmony_ci   nir_function_impl *impl;
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci   /* Copied from the impl for easy access */
32bf215546Sopenharmony_ci   unsigned num_blocks;
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci   /* Array of all blocks indexed by block->index. */
35bf215546Sopenharmony_ci   nir_block **blocks;
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci   /* Hold on to the values so we can easily iterate over them. */
38bf215546Sopenharmony_ci   struct exec_list values;
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci   /* Worklist for phi adding */
41bf215546Sopenharmony_ci   unsigned iter_count;
42bf215546Sopenharmony_ci   unsigned *work;
43bf215546Sopenharmony_ci   nir_block **W;
44bf215546Sopenharmony_ci};
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1)
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_cistruct nir_phi_builder_value {
49bf215546Sopenharmony_ci   struct exec_node node;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   struct nir_phi_builder *builder;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   /* Needed so we can create phis and undefs */
54bf215546Sopenharmony_ci   unsigned num_components;
55bf215546Sopenharmony_ci   unsigned bit_size;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   /* The list of phi nodes associated with this value.  Phi nodes are not
58bf215546Sopenharmony_ci    * added directly.  Instead, they are created, the instr->block pointer
59bf215546Sopenharmony_ci    * set, and then added to this list.  Later, in phi_builder_finish, we
60bf215546Sopenharmony_ci    * set up their sources and add them to the top of their respective
61bf215546Sopenharmony_ci    * blocks.
62bf215546Sopenharmony_ci    */
63bf215546Sopenharmony_ci   struct exec_list phis;
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   /* Array of SSA defs, indexed by block.  For each block, this array has has
66bf215546Sopenharmony_ci    * one of three types of values:
67bf215546Sopenharmony_ci    *
68bf215546Sopenharmony_ci    *  - NULL. Indicates that there is no known definition in this block.  If
69bf215546Sopenharmony_ci    *    you need to find one, look at the block's immediate dominator.
70bf215546Sopenharmony_ci    *
71bf215546Sopenharmony_ci    *  - NEEDS_PHI. Indicates that the block may need a phi node but none has
72bf215546Sopenharmony_ci    *    been created yet.  If a def is requested for a block, a phi will need
73bf215546Sopenharmony_ci    *    to be created.
74bf215546Sopenharmony_ci    *
75bf215546Sopenharmony_ci    *  - A regular SSA def.  This will be either the result of a phi node or
76bf215546Sopenharmony_ci    *    one of the defs provided by nir_phi_builder_value_set_blocK_def().
77bf215546Sopenharmony_ci    */
78bf215546Sopenharmony_ci   struct hash_table ht;
79bf215546Sopenharmony_ci};
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci/**
82bf215546Sopenharmony_ci * Convert a block index into a value that can be used as a key for a hash table
83bf215546Sopenharmony_ci *
84bf215546Sopenharmony_ci * The hash table functions want a pointer that is not \c NULL.
85bf215546Sopenharmony_ci * _mesa_hash_pointer drops the two least significant bits, but that's where
86bf215546Sopenharmony_ci * most of our data likely is.  Shift by 2 and add 1 to make everything happy.
87bf215546Sopenharmony_ci */
88bf215546Sopenharmony_ci#define INDEX_TO_KEY(x) ((void *)(uintptr_t) ((x << 2) + 1))
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_cistruct nir_phi_builder *
91bf215546Sopenharmony_cinir_phi_builder_create(nir_function_impl *impl)
92bf215546Sopenharmony_ci{
93bf215546Sopenharmony_ci   struct nir_phi_builder *pb = rzalloc(NULL, struct nir_phi_builder);
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   pb->shader = impl->function->shader;
96bf215546Sopenharmony_ci   pb->impl = impl;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   assert(impl->valid_metadata & (nir_metadata_block_index |
99bf215546Sopenharmony_ci                                  nir_metadata_dominance));
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci   pb->num_blocks = impl->num_blocks;
102bf215546Sopenharmony_ci   pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks);
103bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
104bf215546Sopenharmony_ci      pb->blocks[block->index] = block;
105bf215546Sopenharmony_ci   }
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci   exec_list_make_empty(&pb->values);
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci   pb->iter_count = 0;
110bf215546Sopenharmony_ci   pb->work = rzalloc_array(pb, unsigned, pb->num_blocks);
111bf215546Sopenharmony_ci   pb->W = ralloc_array(pb, nir_block *, pb->num_blocks);
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   return pb;
114bf215546Sopenharmony_ci}
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_cistruct nir_phi_builder_value *
117bf215546Sopenharmony_cinir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
118bf215546Sopenharmony_ci                          unsigned bit_size, const BITSET_WORD *defs)
119bf215546Sopenharmony_ci{
120bf215546Sopenharmony_ci   struct nir_phi_builder_value *val;
121bf215546Sopenharmony_ci   unsigned i, w_start = 0, w_end = 0;
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci   val = rzalloc_size(pb, sizeof(*val));
124bf215546Sopenharmony_ci   val->builder = pb;
125bf215546Sopenharmony_ci   val->num_components = num_components;
126bf215546Sopenharmony_ci   val->bit_size = bit_size;
127bf215546Sopenharmony_ci   exec_list_make_empty(&val->phis);
128bf215546Sopenharmony_ci   exec_list_push_tail(&pb->values, &val->node);
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci   _mesa_hash_table_init(&val->ht, pb, _mesa_hash_pointer,
131bf215546Sopenharmony_ci                         _mesa_key_pointer_equal);
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci   pb->iter_count++;
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci   BITSET_FOREACH_SET(i, defs, pb->num_blocks) {
136bf215546Sopenharmony_ci      if (pb->work[i] < pb->iter_count)
137bf215546Sopenharmony_ci         pb->W[w_end++] = pb->blocks[i];
138bf215546Sopenharmony_ci      pb->work[i] = pb->iter_count;
139bf215546Sopenharmony_ci   }
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci   while (w_start != w_end) {
142bf215546Sopenharmony_ci      nir_block *cur = pb->W[w_start++];
143bf215546Sopenharmony_ci      set_foreach(cur->dom_frontier, dom_entry) {
144bf215546Sopenharmony_ci         nir_block *next = (nir_block *) dom_entry->key;
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci         /* If there's more than one return statement, then the end block
147bf215546Sopenharmony_ci          * can be a join point for some definitions. However, there are
148bf215546Sopenharmony_ci          * no instructions in the end block, so nothing would use those
149bf215546Sopenharmony_ci          * phi nodes. Of course, we couldn't place those phi nodes
150bf215546Sopenharmony_ci          * anyways due to the restriction of having no instructions in the
151bf215546Sopenharmony_ci          * end block...
152bf215546Sopenharmony_ci          */
153bf215546Sopenharmony_ci         if (next == pb->impl->end_block)
154bf215546Sopenharmony_ci            continue;
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci         if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(next->index)) == NULL) {
157bf215546Sopenharmony_ci            /* Instead of creating a phi node immediately, we simply set the
158bf215546Sopenharmony_ci             * value to the magic value NEEDS_PHI.  Later, we create phi nodes
159bf215546Sopenharmony_ci             * on demand in nir_phi_builder_value_get_block_def().
160bf215546Sopenharmony_ci             */
161bf215546Sopenharmony_ci            nir_phi_builder_value_set_block_def(val, next, NEEDS_PHI);
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci            if (pb->work[next->index] < pb->iter_count) {
164bf215546Sopenharmony_ci               pb->work[next->index] = pb->iter_count;
165bf215546Sopenharmony_ci               pb->W[w_end++] = next;
166bf215546Sopenharmony_ci            }
167bf215546Sopenharmony_ci         }
168bf215546Sopenharmony_ci      }
169bf215546Sopenharmony_ci   }
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   return val;
172bf215546Sopenharmony_ci}
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_civoid
175bf215546Sopenharmony_cinir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
176bf215546Sopenharmony_ci                                    nir_block *block, nir_ssa_def *def)
177bf215546Sopenharmony_ci{
178bf215546Sopenharmony_ci   _mesa_hash_table_insert(&val->ht, INDEX_TO_KEY(block->index), def);
179bf215546Sopenharmony_ci}
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_cinir_ssa_def *
182bf215546Sopenharmony_cinir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
183bf215546Sopenharmony_ci                                    nir_block *block)
184bf215546Sopenharmony_ci{
185bf215546Sopenharmony_ci   /* Crawl up the dominance tree and find the closest dominator for which we
186bf215546Sopenharmony_ci    * have a valid ssa_def, if any.
187bf215546Sopenharmony_ci    */
188bf215546Sopenharmony_ci   nir_block *dom = block;
189bf215546Sopenharmony_ci   struct hash_entry *he = NULL;
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   while (dom != NULL) {
192bf215546Sopenharmony_ci      he = _mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index));
193bf215546Sopenharmony_ci      if (he != NULL)
194bf215546Sopenharmony_ci         break;
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci      dom = dom->imm_dom;
197bf215546Sopenharmony_ci   }
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   /* Exactly one of (he != NULL) and (dom == NULL) must be true. */
200bf215546Sopenharmony_ci   assert((he != NULL) != (dom == NULL));
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   nir_ssa_def *def;
203bf215546Sopenharmony_ci   if (dom == NULL) {
204bf215546Sopenharmony_ci      /* No dominator means either that we crawled to the top without ever
205bf215546Sopenharmony_ci       * finding a definition or that this block is unreachable.  In either
206bf215546Sopenharmony_ci       * case, the value is undefined so we need an SSA undef.
207bf215546Sopenharmony_ci       */
208bf215546Sopenharmony_ci      nir_ssa_undef_instr *undef =
209bf215546Sopenharmony_ci         nir_ssa_undef_instr_create(val->builder->shader,
210bf215546Sopenharmony_ci                                    val->num_components,
211bf215546Sopenharmony_ci                                    val->bit_size);
212bf215546Sopenharmony_ci      nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
213bf215546Sopenharmony_ci                       &undef->instr);
214bf215546Sopenharmony_ci      def = &undef->def;
215bf215546Sopenharmony_ci   } else if (he->data == NEEDS_PHI) {
216bf215546Sopenharmony_ci      /* The magic value NEEDS_PHI indicates that the block needs a phi node
217bf215546Sopenharmony_ci       * but none has been created.  We need to create one now so we can
218bf215546Sopenharmony_ci       * return it to the caller.
219bf215546Sopenharmony_ci       *
220bf215546Sopenharmony_ci       * Because a phi node may use SSA defs that it does not dominate (this
221bf215546Sopenharmony_ci       * happens in loops), we do not yet have enough information to fully
222bf215546Sopenharmony_ci       * fill out the phi node.  Instead, the phi nodes we create here will be
223bf215546Sopenharmony_ci       * empty (have no sources) and won't actually be placed in the block's
224bf215546Sopenharmony_ci       * instruction list yet.  Later, in nir_phi_builder_finish(), we walk
225bf215546Sopenharmony_ci       * over all of the phi instructions, fill out the sources lists, and
226bf215546Sopenharmony_ci       * place them at the top of their respective block's instruction list.
227bf215546Sopenharmony_ci       *
228bf215546Sopenharmony_ci       * Creating phi nodes on-demand allows us to avoid creating dead phi
229bf215546Sopenharmony_ci       * nodes that will just get deleted later. While this probably isn't a
230bf215546Sopenharmony_ci       * big win for a full into-SSA pass, other users may use the phi builder
231bf215546Sopenharmony_ci       * to make small SSA form repairs where most of the phi nodes will never
232bf215546Sopenharmony_ci       * be used.
233bf215546Sopenharmony_ci       */
234bf215546Sopenharmony_ci      nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
235bf215546Sopenharmony_ci      nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components,
236bf215546Sopenharmony_ci                        val->bit_size, NULL);
237bf215546Sopenharmony_ci      phi->instr.block = dom;
238bf215546Sopenharmony_ci      exec_list_push_tail(&val->phis, &phi->instr.node);
239bf215546Sopenharmony_ci      def = &phi->dest.ssa;
240bf215546Sopenharmony_ci      he->data = def;
241bf215546Sopenharmony_ci   } else {
242bf215546Sopenharmony_ci      /* In this case, we have an actual SSA def.  It's either the result of a
243bf215546Sopenharmony_ci       * phi node created by the case above or one passed to us through
244bf215546Sopenharmony_ci       * nir_phi_builder_value_set_block_def().
245bf215546Sopenharmony_ci       */
246bf215546Sopenharmony_ci      def = (struct nir_ssa_def *) he->data;
247bf215546Sopenharmony_ci   }
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci   /* Walk the chain and stash the def in all of the applicable blocks.  We do
250bf215546Sopenharmony_ci    * this for two reasons:
251bf215546Sopenharmony_ci    *
252bf215546Sopenharmony_ci    *  1) To speed up lookup next time even if the next time is called from a
253bf215546Sopenharmony_ci    *     block that is not dominated by this one.
254bf215546Sopenharmony_ci    *  2) To avoid unneeded recreation of phi nodes and undefs.
255bf215546Sopenharmony_ci    */
256bf215546Sopenharmony_ci   for (dom = block; dom != NULL; dom = dom->imm_dom) {
257bf215546Sopenharmony_ci      if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index)) != NULL)
258bf215546Sopenharmony_ci         break;
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci      nir_phi_builder_value_set_block_def(val, dom, def);
261bf215546Sopenharmony_ci   }
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci   return def;
264bf215546Sopenharmony_ci}
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_civoid
267bf215546Sopenharmony_cinir_phi_builder_finish(struct nir_phi_builder *pb)
268bf215546Sopenharmony_ci{
269bf215546Sopenharmony_ci   foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
270bf215546Sopenharmony_ci      /* We treat the linked list of phi nodes like a worklist.  The list is
271bf215546Sopenharmony_ci       * pre-populated by calls to nir_phi_builder_value_get_block_def() that
272bf215546Sopenharmony_ci       * create phi nodes.  As we fill in the sources of phi nodes, more may
273bf215546Sopenharmony_ci       * be created and are added to the end of the list.
274bf215546Sopenharmony_ci       *
275bf215546Sopenharmony_ci       * Because we are adding and removing phi nodes from the list as we go,
276bf215546Sopenharmony_ci       * we can't iterate over it normally.  Instead, we just iterate until
277bf215546Sopenharmony_ci       * the list is empty.
278bf215546Sopenharmony_ci       */
279bf215546Sopenharmony_ci      while (!exec_list_is_empty(&val->phis)) {
280bf215546Sopenharmony_ci         struct exec_node *head = exec_list_get_head(&val->phis);
281bf215546Sopenharmony_ci         nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node);
282bf215546Sopenharmony_ci         assert(phi->instr.type == nir_instr_type_phi);
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci         exec_node_remove(&phi->instr.node);
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci         /* XXX: Constructing the array this many times seems expensive. */
287bf215546Sopenharmony_ci         nir_block **preds = nir_block_get_predecessors_sorted(phi->instr.block, pb);
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci         for (unsigned i = 0; i < phi->instr.block->predecessors->entries; i++) {
290bf215546Sopenharmony_ci            nir_phi_instr_add_src(phi, preds[i],
291bf215546Sopenharmony_ci                                  nir_src_for_ssa(nir_phi_builder_value_get_block_def(val, preds[i])));
292bf215546Sopenharmony_ci         }
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci         ralloc_free(preds);
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci         nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr);
297bf215546Sopenharmony_ci      }
298bf215546Sopenharmony_ci   }
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   ralloc_free(pb);
301bf215546Sopenharmony_ci}
302