1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Jason Ekstrand (jason@jlekstrand.net)
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "nir.h"
29bf215546Sopenharmony_ci#include "nir_builder.h"
30bf215546Sopenharmony_ci#include "nir_vla.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci/*
33bf215546Sopenharmony_ci * This file implements an out-of-SSA pass as described in "Revisiting
34bf215546Sopenharmony_ci * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
35bf215546Sopenharmony_ci * Boissinot et al.
36bf215546Sopenharmony_ci */
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_cistruct from_ssa_state {
39bf215546Sopenharmony_ci   nir_builder builder;
40bf215546Sopenharmony_ci   void *dead_ctx;
41bf215546Sopenharmony_ci   struct exec_list dead_instrs;
42bf215546Sopenharmony_ci   bool phi_webs_only;
43bf215546Sopenharmony_ci   struct hash_table *merge_node_table;
44bf215546Sopenharmony_ci   nir_instr *instr;
45bf215546Sopenharmony_ci   bool progress;
46bf215546Sopenharmony_ci};
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci/* Returns if def @a comes after def @b.
49bf215546Sopenharmony_ci *
50bf215546Sopenharmony_ci * The core observation that makes the Boissinot algorithm efficient
51bf215546Sopenharmony_ci * is that, given two properly sorted sets, we can check for
52bf215546Sopenharmony_ci * interference in these sets via a linear walk. This is accomplished
53bf215546Sopenharmony_ci * by doing single combined walk over union of the two sets in DFS
54bf215546Sopenharmony_ci * order. It doesn't matter what DFS we do so long as we're
55bf215546Sopenharmony_ci * consistent. Fortunately, the dominance algorithm we ran prior to
56bf215546Sopenharmony_ci * this pass did such a walk and recorded the pre- and post-indices in
57bf215546Sopenharmony_ci * the blocks.
58bf215546Sopenharmony_ci *
59bf215546Sopenharmony_ci * We treat SSA undefs as always coming before other instruction types.
60bf215546Sopenharmony_ci */
61bf215546Sopenharmony_cistatic bool
62bf215546Sopenharmony_cidef_after(nir_ssa_def *a, nir_ssa_def *b)
63bf215546Sopenharmony_ci{
64bf215546Sopenharmony_ci   if (a->parent_instr->type == nir_instr_type_ssa_undef)
65bf215546Sopenharmony_ci      return false;
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   if (b->parent_instr->type == nir_instr_type_ssa_undef)
68bf215546Sopenharmony_ci      return true;
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci   /* If they're in the same block, we can rely on whichever instruction
71bf215546Sopenharmony_ci    * comes first in the block.
72bf215546Sopenharmony_ci    */
73bf215546Sopenharmony_ci   if (a->parent_instr->block == b->parent_instr->block)
74bf215546Sopenharmony_ci      return a->parent_instr->index > b->parent_instr->index;
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci   /* Otherwise, if blocks are distinct, we sort them in DFS pre-order */
77bf215546Sopenharmony_ci   return a->parent_instr->block->dom_pre_index >
78bf215546Sopenharmony_ci          b->parent_instr->block->dom_pre_index;
79bf215546Sopenharmony_ci}
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci/* Returns true if a dominates b */
82bf215546Sopenharmony_cistatic bool
83bf215546Sopenharmony_cissa_def_dominates(nir_ssa_def *a, nir_ssa_def *b)
84bf215546Sopenharmony_ci{
85bf215546Sopenharmony_ci   if (a->parent_instr->type == nir_instr_type_ssa_undef) {
86bf215546Sopenharmony_ci      /* SSA undefs always dominate */
87bf215546Sopenharmony_ci      return true;
88bf215546Sopenharmony_ci   } if (def_after(a, b)) {
89bf215546Sopenharmony_ci      return false;
90bf215546Sopenharmony_ci   } else if (a->parent_instr->block == b->parent_instr->block) {
91bf215546Sopenharmony_ci      return def_after(b, a);
92bf215546Sopenharmony_ci   } else {
93bf215546Sopenharmony_ci      return nir_block_dominates(a->parent_instr->block,
94bf215546Sopenharmony_ci                                 b->parent_instr->block);
95bf215546Sopenharmony_ci   }
96bf215546Sopenharmony_ci}
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci/* The following data structure, which I have named merge_set is a way of
100bf215546Sopenharmony_ci * representing a set registers of non-interfering registers.  This is
101bf215546Sopenharmony_ci * based on the concept of a "dominance forest" presented in "Fast Copy
102bf215546Sopenharmony_ci * Coalescing and Live-Range Identification" by Budimlic et al. but the
103bf215546Sopenharmony_ci * implementation concept is taken from  "Revisiting Out-of-SSA Translation
104bf215546Sopenharmony_ci * for Correctness, Code Quality, and Efficiency" by Boissinot et al.
105bf215546Sopenharmony_ci *
106bf215546Sopenharmony_ci * Each SSA definition is associated with a merge_node and the association
107bf215546Sopenharmony_ci * is represented by a combination of a hash table and the "def" parameter
108bf215546Sopenharmony_ci * in the merge_node structure.  The merge_set stores a linked list of
109bf215546Sopenharmony_ci * merge_nodes, ordered by a pre-order DFS walk of the dominance tree.  (Since
110bf215546Sopenharmony_ci * the liveness analysis pass indexes the SSA values in dominance order for
111bf215546Sopenharmony_ci * us, this is an easy thing to keep up.)  It is assumed that no pair of the
112bf215546Sopenharmony_ci * nodes in a given set interfere.  Merging two sets or checking for
113bf215546Sopenharmony_ci * interference can be done in a single linear-time merge-sort walk of the
114bf215546Sopenharmony_ci * two lists of nodes.
115bf215546Sopenharmony_ci */
116bf215546Sopenharmony_cistruct merge_set;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_citypedef struct {
119bf215546Sopenharmony_ci   struct exec_node node;
120bf215546Sopenharmony_ci   struct merge_set *set;
121bf215546Sopenharmony_ci   nir_ssa_def *def;
122bf215546Sopenharmony_ci} merge_node;
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_citypedef struct merge_set {
125bf215546Sopenharmony_ci   struct exec_list nodes;
126bf215546Sopenharmony_ci   unsigned size;
127bf215546Sopenharmony_ci   bool divergent;
128bf215546Sopenharmony_ci   nir_register *reg;
129bf215546Sopenharmony_ci} merge_set;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci#if 0
132bf215546Sopenharmony_cistatic void
133bf215546Sopenharmony_cimerge_set_dump(merge_set *set, FILE *fp)
134bf215546Sopenharmony_ci{
135bf215546Sopenharmony_ci   nir_ssa_def *dom[set->size];
136bf215546Sopenharmony_ci   int dom_idx = -1;
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   foreach_list_typed(merge_node, node, node, &set->nodes) {
139bf215546Sopenharmony_ci      while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def))
140bf215546Sopenharmony_ci         dom_idx--;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci      for (int i = 0; i <= dom_idx; i++)
143bf215546Sopenharmony_ci         fprintf(fp, "  ");
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci      fprintf(fp, "ssa_%d\n", node->def->index);
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci      dom[++dom_idx] = node->def;
148bf215546Sopenharmony_ci   }
149bf215546Sopenharmony_ci}
150bf215546Sopenharmony_ci#endif
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_cistatic merge_node *
153bf215546Sopenharmony_ciget_merge_node(nir_ssa_def *def, struct from_ssa_state *state)
154bf215546Sopenharmony_ci{
155bf215546Sopenharmony_ci   struct hash_entry *entry =
156bf215546Sopenharmony_ci      _mesa_hash_table_search(state->merge_node_table, def);
157bf215546Sopenharmony_ci   if (entry)
158bf215546Sopenharmony_ci      return entry->data;
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   merge_set *set = ralloc(state->dead_ctx, merge_set);
161bf215546Sopenharmony_ci   exec_list_make_empty(&set->nodes);
162bf215546Sopenharmony_ci   set->size = 1;
163bf215546Sopenharmony_ci   set->divergent = def->divergent;
164bf215546Sopenharmony_ci   set->reg = NULL;
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   merge_node *node = ralloc(state->dead_ctx, merge_node);
167bf215546Sopenharmony_ci   node->set = set;
168bf215546Sopenharmony_ci   node->def = def;
169bf215546Sopenharmony_ci   exec_list_push_head(&set->nodes, &node->node);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   _mesa_hash_table_insert(state->merge_node_table, def, node);
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci   return node;
174bf215546Sopenharmony_ci}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_cistatic bool
177bf215546Sopenharmony_cimerge_nodes_interfere(merge_node *a, merge_node *b)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci   /* There's no need to check for interference within the same set,
180bf215546Sopenharmony_ci    * because we assume, that sets themselves are already
181bf215546Sopenharmony_ci    * interference-free.
182bf215546Sopenharmony_ci    */
183bf215546Sopenharmony_ci   if (a->set == b->set)
184bf215546Sopenharmony_ci      return false;
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   return nir_ssa_defs_interfere(a->def, b->def);
187bf215546Sopenharmony_ci}
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci/* Merges b into a
190bf215546Sopenharmony_ci *
191bf215546Sopenharmony_ci * This algorithm uses def_after to ensure that the sets always stay in the
192bf215546Sopenharmony_ci * same order as the pre-order DFS done by the liveness algorithm.
193bf215546Sopenharmony_ci */
194bf215546Sopenharmony_cistatic merge_set *
195bf215546Sopenharmony_cimerge_merge_sets(merge_set *a, merge_set *b)
196bf215546Sopenharmony_ci{
197bf215546Sopenharmony_ci   struct exec_node *an = exec_list_get_head(&a->nodes);
198bf215546Sopenharmony_ci   struct exec_node *bn = exec_list_get_head(&b->nodes);
199bf215546Sopenharmony_ci   while (!exec_node_is_tail_sentinel(bn)) {
200bf215546Sopenharmony_ci      merge_node *a_node = exec_node_data(merge_node, an, node);
201bf215546Sopenharmony_ci      merge_node *b_node = exec_node_data(merge_node, bn, node);
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci      if (exec_node_is_tail_sentinel(an) ||
204bf215546Sopenharmony_ci          def_after(a_node->def, b_node->def)) {
205bf215546Sopenharmony_ci         struct exec_node *next = bn->next;
206bf215546Sopenharmony_ci         exec_node_remove(bn);
207bf215546Sopenharmony_ci         exec_node_insert_node_before(an, bn);
208bf215546Sopenharmony_ci         exec_node_data(merge_node, bn, node)->set = a;
209bf215546Sopenharmony_ci         bn = next;
210bf215546Sopenharmony_ci      } else {
211bf215546Sopenharmony_ci         an = an->next;
212bf215546Sopenharmony_ci      }
213bf215546Sopenharmony_ci   }
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   a->size += b->size;
216bf215546Sopenharmony_ci   b->size = 0;
217bf215546Sopenharmony_ci   a->divergent |= b->divergent;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   return a;
220bf215546Sopenharmony_ci}
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci/* Checks for any interference between two merge sets
223bf215546Sopenharmony_ci *
224bf215546Sopenharmony_ci * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
225bf215546Sopenharmony_ci * Translation for Correctness, Code Quality, and Efficiency" by
226bf215546Sopenharmony_ci * Boissinot et al.
227bf215546Sopenharmony_ci */
228bf215546Sopenharmony_cistatic bool
229bf215546Sopenharmony_cimerge_sets_interfere(merge_set *a, merge_set *b)
230bf215546Sopenharmony_ci{
231bf215546Sopenharmony_ci   /* List of all the nodes which dominate the current node, in dominance
232bf215546Sopenharmony_ci    * order.
233bf215546Sopenharmony_ci    */
234bf215546Sopenharmony_ci   NIR_VLA(merge_node *, dom, a->size + b->size);
235bf215546Sopenharmony_ci   int dom_idx = -1;
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   struct exec_node *an = exec_list_get_head(&a->nodes);
238bf215546Sopenharmony_ci   struct exec_node *bn = exec_list_get_head(&b->nodes);
239bf215546Sopenharmony_ci   while (!exec_node_is_tail_sentinel(an) ||
240bf215546Sopenharmony_ci          !exec_node_is_tail_sentinel(bn)) {
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci      /* We walk the union of the two sets in the same order as the pre-order
243bf215546Sopenharmony_ci       * DFS done by liveness analysis.
244bf215546Sopenharmony_ci       */
245bf215546Sopenharmony_ci      merge_node *current;
246bf215546Sopenharmony_ci      if (exec_node_is_tail_sentinel(an)) {
247bf215546Sopenharmony_ci         current = exec_node_data(merge_node, bn, node);
248bf215546Sopenharmony_ci         bn = bn->next;
249bf215546Sopenharmony_ci      } else if (exec_node_is_tail_sentinel(bn)) {
250bf215546Sopenharmony_ci         current = exec_node_data(merge_node, an, node);
251bf215546Sopenharmony_ci         an = an->next;
252bf215546Sopenharmony_ci      } else {
253bf215546Sopenharmony_ci         merge_node *a_node = exec_node_data(merge_node, an, node);
254bf215546Sopenharmony_ci         merge_node *b_node = exec_node_data(merge_node, bn, node);
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci         if (def_after(b_node->def, a_node->def)) {
257bf215546Sopenharmony_ci            current = a_node;
258bf215546Sopenharmony_ci            an = an->next;
259bf215546Sopenharmony_ci         } else {
260bf215546Sopenharmony_ci            current = b_node;
261bf215546Sopenharmony_ci            bn = bn->next;
262bf215546Sopenharmony_ci         }
263bf215546Sopenharmony_ci      }
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci      /* Because our walk is a pre-order DFS, we can maintain the list of
266bf215546Sopenharmony_ci       * dominating nodes as a simple stack, pushing every node onto the list
267bf215546Sopenharmony_ci       * after we visit it and popping any non-dominating nodes off before we
268bf215546Sopenharmony_ci       * visit the current node.
269bf215546Sopenharmony_ci       */
270bf215546Sopenharmony_ci      while (dom_idx >= 0 &&
271bf215546Sopenharmony_ci             !ssa_def_dominates(dom[dom_idx]->def, current->def))
272bf215546Sopenharmony_ci         dom_idx--;
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci      /* There are three invariants of this algorithm that are important here:
275bf215546Sopenharmony_ci       *
276bf215546Sopenharmony_ci       *  1. There is no interference within either set a or set b.
277bf215546Sopenharmony_ci       *  2. None of the nodes processed up until this point interfere.
278bf215546Sopenharmony_ci       *  3. All the dominators of `current` have been processed
279bf215546Sopenharmony_ci       *
280bf215546Sopenharmony_ci       * Because of these invariants, we only need to check the current node
281bf215546Sopenharmony_ci       * against its minimal dominator.  If any other node N in the union
282bf215546Sopenharmony_ci       * interferes with current, then N must dominate current because we are
283bf215546Sopenharmony_ci       * in SSA form.  If N dominates current then it must also dominate our
284bf215546Sopenharmony_ci       * minimal dominator dom[dom_idx].  Since N is live at current it must
285bf215546Sopenharmony_ci       * also be live at the minimal dominator which means N interferes with
286bf215546Sopenharmony_ci       * the minimal dominator dom[dom_idx] and, by invariants 2 and 3 above,
287bf215546Sopenharmony_ci       * the algorithm would have already terminated.  Therefore, if we got
288bf215546Sopenharmony_ci       * here, the only node that can possibly interfere with current is the
289bf215546Sopenharmony_ci       * minimal dominator dom[dom_idx].
290bf215546Sopenharmony_ci       *
291bf215546Sopenharmony_ci       * This is what allows us to do a interference check of the union of the
292bf215546Sopenharmony_ci       * two sets with a single linear-time walk.
293bf215546Sopenharmony_ci       */
294bf215546Sopenharmony_ci      if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx]))
295bf215546Sopenharmony_ci         return true;
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci      dom[++dom_idx] = current;
298bf215546Sopenharmony_ci   }
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   return false;
301bf215546Sopenharmony_ci}
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_cistatic bool
304bf215546Sopenharmony_ciadd_parallel_copy_to_end_of_block(nir_shader *shader, nir_block *block, void *dead_ctx)
305bf215546Sopenharmony_ci{
306bf215546Sopenharmony_ci   bool need_end_copy = false;
307bf215546Sopenharmony_ci   if (block->successors[0]) {
308bf215546Sopenharmony_ci      nir_instr *instr = nir_block_first_instr(block->successors[0]);
309bf215546Sopenharmony_ci      if (instr && instr->type == nir_instr_type_phi)
310bf215546Sopenharmony_ci         need_end_copy = true;
311bf215546Sopenharmony_ci   }
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   if (block->successors[1]) {
314bf215546Sopenharmony_ci      nir_instr *instr = nir_block_first_instr(block->successors[1]);
315bf215546Sopenharmony_ci      if (instr && instr->type == nir_instr_type_phi)
316bf215546Sopenharmony_ci         need_end_copy = true;
317bf215546Sopenharmony_ci   }
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   if (need_end_copy) {
320bf215546Sopenharmony_ci      /* If one of our successors has at least one phi node, we need to
321bf215546Sopenharmony_ci       * create a parallel copy at the end of the block but before the jump
322bf215546Sopenharmony_ci       * (if there is one).
323bf215546Sopenharmony_ci       */
324bf215546Sopenharmony_ci      nir_parallel_copy_instr *pcopy =
325bf215546Sopenharmony_ci         nir_parallel_copy_instr_create(shader);
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci      nir_instr_insert(nir_after_block_before_jump(block), &pcopy->instr);
328bf215546Sopenharmony_ci   }
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci   return true;
331bf215546Sopenharmony_ci}
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_cistatic nir_parallel_copy_instr *
334bf215546Sopenharmony_ciget_parallel_copy_at_end_of_block(nir_block *block)
335bf215546Sopenharmony_ci{
336bf215546Sopenharmony_ci   nir_instr *last_instr = nir_block_last_instr(block);
337bf215546Sopenharmony_ci   if (last_instr == NULL)
338bf215546Sopenharmony_ci      return NULL;
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   /* The last instruction may be a jump in which case the parallel copy is
341bf215546Sopenharmony_ci    * right before it.
342bf215546Sopenharmony_ci    */
343bf215546Sopenharmony_ci   if (last_instr->type == nir_instr_type_jump)
344bf215546Sopenharmony_ci      last_instr = nir_instr_prev(last_instr);
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   if (last_instr && last_instr->type == nir_instr_type_parallel_copy)
347bf215546Sopenharmony_ci      return nir_instr_as_parallel_copy(last_instr);
348bf215546Sopenharmony_ci   else
349bf215546Sopenharmony_ci      return NULL;
350bf215546Sopenharmony_ci}
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci/** Isolate phi nodes with parallel copies
353bf215546Sopenharmony_ci *
354bf215546Sopenharmony_ci * In order to solve the dependency problems with the sources and
355bf215546Sopenharmony_ci * destinations of phi nodes, we first isolate them by adding parallel
356bf215546Sopenharmony_ci * copies to the beginnings and ends of basic blocks.  For every block with
357bf215546Sopenharmony_ci * phi nodes, we add a parallel copy immediately following the last phi
358bf215546Sopenharmony_ci * node that copies the destinations of all of the phi nodes to new SSA
359bf215546Sopenharmony_ci * values.  We also add a parallel copy to the end of every block that has
360bf215546Sopenharmony_ci * a successor with phi nodes that, for each phi node in each successor,
361bf215546Sopenharmony_ci * copies the corresponding sorce of the phi node and adjust the phi to
362bf215546Sopenharmony_ci * used the destination of the parallel copy.
363bf215546Sopenharmony_ci *
364bf215546Sopenharmony_ci * In SSA form, each value has exactly one definition.  What this does is
365bf215546Sopenharmony_ci * ensure that each value used in a phi also has exactly one use.  The
366bf215546Sopenharmony_ci * destinations of phis are only used by the parallel copy immediately
367bf215546Sopenharmony_ci * following the phi nodes and.  Thanks to the parallel copy at the end of
368bf215546Sopenharmony_ci * the predecessor block, the sources of phi nodes are are the only use of
369bf215546Sopenharmony_ci * that value.  This allows us to immediately assign all the sources and
370bf215546Sopenharmony_ci * destinations of any given phi node to the same register without worrying
371bf215546Sopenharmony_ci * about interference at all.  We do coalescing to get rid of the parallel
372bf215546Sopenharmony_ci * copies where possible.
373bf215546Sopenharmony_ci *
374bf215546Sopenharmony_ci * Before this pass can be run, we have to iterate over the blocks with
375bf215546Sopenharmony_ci * add_parallel_copy_to_end_of_block to ensure that the parallel copies at
376bf215546Sopenharmony_ci * the ends of blocks exist.  We can create the ones at the beginnings as
377bf215546Sopenharmony_ci * we go, but the ones at the ends of blocks need to be created ahead of
378bf215546Sopenharmony_ci * time because of potential back-edges in the CFG.
379bf215546Sopenharmony_ci */
380bf215546Sopenharmony_cistatic bool
381bf215546Sopenharmony_ciisolate_phi_nodes_block(nir_shader *shader, nir_block *block, void *dead_ctx)
382bf215546Sopenharmony_ci{
383bf215546Sopenharmony_ci   nir_instr *last_phi_instr = NULL;
384bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
385bf215546Sopenharmony_ci      /* Phi nodes only ever come at the start of a block */
386bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_phi)
387bf215546Sopenharmony_ci         break;
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci      last_phi_instr = instr;
390bf215546Sopenharmony_ci   }
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci   /* If we don't have any phis, then there's nothing for us to do. */
393bf215546Sopenharmony_ci   if (last_phi_instr == NULL)
394bf215546Sopenharmony_ci      return true;
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci   /* If we have phi nodes, we need to create a parallel copy at the
397bf215546Sopenharmony_ci    * start of this block but after the phi nodes.
398bf215546Sopenharmony_ci    */
399bf215546Sopenharmony_ci   nir_parallel_copy_instr *block_pcopy =
400bf215546Sopenharmony_ci      nir_parallel_copy_instr_create(shader);
401bf215546Sopenharmony_ci   nir_instr_insert_after(last_phi_instr, &block_pcopy->instr);
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
404bf215546Sopenharmony_ci      /* Phi nodes only ever come at the start of a block */
405bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_phi)
406bf215546Sopenharmony_ci         break;
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci      nir_phi_instr *phi = nir_instr_as_phi(instr);
409bf215546Sopenharmony_ci      assert(phi->dest.is_ssa);
410bf215546Sopenharmony_ci      nir_foreach_phi_src(src, phi) {
411bf215546Sopenharmony_ci         if (nir_src_is_undef(src->src))
412bf215546Sopenharmony_ci            continue;
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci         nir_parallel_copy_instr *pcopy =
415bf215546Sopenharmony_ci            get_parallel_copy_at_end_of_block(src->pred);
416bf215546Sopenharmony_ci         assert(pcopy);
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci         nir_parallel_copy_entry *entry = rzalloc(dead_ctx,
419bf215546Sopenharmony_ci                                                  nir_parallel_copy_entry);
420bf215546Sopenharmony_ci         nir_ssa_dest_init(&pcopy->instr, &entry->dest,
421bf215546Sopenharmony_ci                           phi->dest.ssa.num_components,
422bf215546Sopenharmony_ci                           phi->dest.ssa.bit_size, NULL);
423bf215546Sopenharmony_ci         entry->dest.ssa.divergent = nir_src_is_divergent(src->src);
424bf215546Sopenharmony_ci         exec_list_push_tail(&pcopy->entries, &entry->node);
425bf215546Sopenharmony_ci
426bf215546Sopenharmony_ci         assert(src->src.is_ssa);
427bf215546Sopenharmony_ci         nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src);
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci         nir_instr_rewrite_src(&phi->instr, &src->src,
430bf215546Sopenharmony_ci                               nir_src_for_ssa(&entry->dest.ssa));
431bf215546Sopenharmony_ci      }
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci      nir_parallel_copy_entry *entry = rzalloc(dead_ctx,
434bf215546Sopenharmony_ci                                               nir_parallel_copy_entry);
435bf215546Sopenharmony_ci      nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
436bf215546Sopenharmony_ci                        phi->dest.ssa.num_components, phi->dest.ssa.bit_size,
437bf215546Sopenharmony_ci                        NULL);
438bf215546Sopenharmony_ci      entry->dest.ssa.divergent = phi->dest.ssa.divergent;
439bf215546Sopenharmony_ci      exec_list_push_tail(&block_pcopy->entries, &entry->node);
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&phi->dest.ssa,
442bf215546Sopenharmony_ci                               &entry->dest.ssa);
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci      nir_instr_rewrite_src(&block_pcopy->instr, &entry->src,
445bf215546Sopenharmony_ci                            nir_src_for_ssa(&phi->dest.ssa));
446bf215546Sopenharmony_ci   }
447bf215546Sopenharmony_ci
448bf215546Sopenharmony_ci   return true;
449bf215546Sopenharmony_ci}
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_cistatic bool
452bf215546Sopenharmony_cicoalesce_phi_nodes_block(nir_block *block, struct from_ssa_state *state)
453bf215546Sopenharmony_ci{
454bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
455bf215546Sopenharmony_ci      /* Phi nodes only ever come at the start of a block */
456bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_phi)
457bf215546Sopenharmony_ci         break;
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci      nir_phi_instr *phi = nir_instr_as_phi(instr);
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci      assert(phi->dest.is_ssa);
462bf215546Sopenharmony_ci      merge_node *dest_node = get_merge_node(&phi->dest.ssa, state);
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci      nir_foreach_phi_src(src, phi) {
465bf215546Sopenharmony_ci         assert(src->src.is_ssa);
466bf215546Sopenharmony_ci         if (nir_src_is_undef(src->src))
467bf215546Sopenharmony_ci            continue;
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci         merge_node *src_node = get_merge_node(src->src.ssa, state);
470bf215546Sopenharmony_ci         if (src_node->set != dest_node->set)
471bf215546Sopenharmony_ci            merge_merge_sets(dest_node->set, src_node->set);
472bf215546Sopenharmony_ci      }
473bf215546Sopenharmony_ci   }
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci   return true;
476bf215546Sopenharmony_ci}
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_cistatic void
479bf215546Sopenharmony_ciaggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy,
480bf215546Sopenharmony_ci                                 struct from_ssa_state *state)
481bf215546Sopenharmony_ci{
482bf215546Sopenharmony_ci   nir_foreach_parallel_copy_entry(entry, pcopy) {
483bf215546Sopenharmony_ci      if (!entry->src.is_ssa)
484bf215546Sopenharmony_ci         continue;
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci      /* Since load_const instructions are SSA only, we can't replace their
487bf215546Sopenharmony_ci       * destinations with registers and, therefore, can't coalesce them.
488bf215546Sopenharmony_ci       */
489bf215546Sopenharmony_ci      if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const)
490bf215546Sopenharmony_ci         continue;
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci      /* Don't try and coalesce these */
493bf215546Sopenharmony_ci      if (entry->dest.ssa.num_components != entry->src.ssa->num_components)
494bf215546Sopenharmony_ci         continue;
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci      merge_node *src_node = get_merge_node(entry->src.ssa, state);
497bf215546Sopenharmony_ci      merge_node *dest_node = get_merge_node(&entry->dest.ssa, state);
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci      if (src_node->set == dest_node->set)
500bf215546Sopenharmony_ci         continue;
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci      /* TODO: We can probably do better here but for now we should be safe if
503bf215546Sopenharmony_ci       * we just don't coalesce things with different divergence.
504bf215546Sopenharmony_ci       */
505bf215546Sopenharmony_ci      if (dest_node->set->divergent != src_node->set->divergent)
506bf215546Sopenharmony_ci         continue;
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci      if (!merge_sets_interfere(src_node->set, dest_node->set))
509bf215546Sopenharmony_ci         merge_merge_sets(src_node->set, dest_node->set);
510bf215546Sopenharmony_ci   }
511bf215546Sopenharmony_ci}
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_cistatic bool
514bf215546Sopenharmony_ciaggressive_coalesce_block(nir_block *block, struct from_ssa_state *state)
515bf215546Sopenharmony_ci{
516bf215546Sopenharmony_ci   nir_parallel_copy_instr *start_pcopy = NULL;
517bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
518bf215546Sopenharmony_ci      /* Phi nodes only ever come at the start of a block */
519bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_phi) {
520bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_parallel_copy)
521bf215546Sopenharmony_ci            break; /* The parallel copy must be right after the phis */
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci         start_pcopy = nir_instr_as_parallel_copy(instr);
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci         aggressive_coalesce_parallel_copy(start_pcopy, state);
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci         break;
528bf215546Sopenharmony_ci      }
529bf215546Sopenharmony_ci   }
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci   nir_parallel_copy_instr *end_pcopy =
532bf215546Sopenharmony_ci      get_parallel_copy_at_end_of_block(block);
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci   if (end_pcopy && end_pcopy != start_pcopy)
535bf215546Sopenharmony_ci      aggressive_coalesce_parallel_copy(end_pcopy, state);
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci   return true;
538bf215546Sopenharmony_ci}
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_cistatic nir_register *
541bf215546Sopenharmony_cicreate_reg_for_ssa_def(nir_ssa_def *def, nir_function_impl *impl)
542bf215546Sopenharmony_ci{
543bf215546Sopenharmony_ci   nir_register *reg = nir_local_reg_create(impl);
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci   reg->num_components = def->num_components;
546bf215546Sopenharmony_ci   reg->bit_size = def->bit_size;
547bf215546Sopenharmony_ci   reg->num_array_elems = 0;
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci   return reg;
550bf215546Sopenharmony_ci}
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_cistatic bool
553bf215546Sopenharmony_cirewrite_ssa_def(nir_ssa_def *def, void *void_state)
554bf215546Sopenharmony_ci{
555bf215546Sopenharmony_ci   struct from_ssa_state *state = void_state;
556bf215546Sopenharmony_ci   nir_register *reg;
557bf215546Sopenharmony_ci
558bf215546Sopenharmony_ci   struct hash_entry *entry =
559bf215546Sopenharmony_ci      _mesa_hash_table_search(state->merge_node_table, def);
560bf215546Sopenharmony_ci   if (entry) {
561bf215546Sopenharmony_ci      /* In this case, we're part of a phi web.  Use the web's register. */
562bf215546Sopenharmony_ci      merge_node *node = (merge_node *)entry->data;
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci      /* If it doesn't have a register yet, create one.  Note that all of
565bf215546Sopenharmony_ci       * the things in the merge set should be the same so it doesn't
566bf215546Sopenharmony_ci       * matter which node's definition we use.
567bf215546Sopenharmony_ci       */
568bf215546Sopenharmony_ci      if (node->set->reg == NULL) {
569bf215546Sopenharmony_ci         node->set->reg = create_reg_for_ssa_def(def, state->builder.impl);
570bf215546Sopenharmony_ci         node->set->reg->divergent = node->set->divergent;
571bf215546Sopenharmony_ci      }
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_ci      reg = node->set->reg;
574bf215546Sopenharmony_ci   } else {
575bf215546Sopenharmony_ci      if (state->phi_webs_only)
576bf215546Sopenharmony_ci         return true;
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci      /* We leave load_const SSA values alone.  They act as immediates to
579bf215546Sopenharmony_ci       * the backend.  If it got coalesced into a phi, that's ok.
580bf215546Sopenharmony_ci       */
581bf215546Sopenharmony_ci      if (def->parent_instr->type == nir_instr_type_load_const)
582bf215546Sopenharmony_ci         return true;
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci      reg = create_reg_for_ssa_def(def, state->builder.impl);
585bf215546Sopenharmony_ci   }
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses_src(def, nir_src_for_reg(reg));
588bf215546Sopenharmony_ci   assert(nir_ssa_def_is_unused(def));
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_ci   if (def->parent_instr->type == nir_instr_type_ssa_undef) {
591bf215546Sopenharmony_ci      /* If it's an ssa_undef instruction, remove it since we know we just got
592bf215546Sopenharmony_ci       * rid of all its uses.
593bf215546Sopenharmony_ci       */
594bf215546Sopenharmony_ci      nir_instr *parent_instr = def->parent_instr;
595bf215546Sopenharmony_ci      nir_instr_remove(parent_instr);
596bf215546Sopenharmony_ci      exec_list_push_tail(&state->dead_instrs, &parent_instr->node);
597bf215546Sopenharmony_ci      state->progress = true;
598bf215546Sopenharmony_ci      return true;
599bf215546Sopenharmony_ci   }
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   assert(def->parent_instr->type != nir_instr_type_load_const);
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci   /* At this point we know a priori that this SSA def is part of a
604bf215546Sopenharmony_ci    * nir_dest.  We can use exec_node_data to get the dest pointer.
605bf215546Sopenharmony_ci    */
606bf215546Sopenharmony_ci   nir_dest *dest = exec_node_data(nir_dest, def, ssa);
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg));
609bf215546Sopenharmony_ci   state->progress = true;
610bf215546Sopenharmony_ci   return true;
611bf215546Sopenharmony_ci}
612bf215546Sopenharmony_ci
613bf215546Sopenharmony_ci/* Resolves ssa definitions to registers.  While we're at it, we also
614bf215546Sopenharmony_ci * remove phi nodes.
615bf215546Sopenharmony_ci */
616bf215546Sopenharmony_cistatic void
617bf215546Sopenharmony_ciresolve_registers_block(nir_block *block, struct from_ssa_state *state)
618bf215546Sopenharmony_ci{
619bf215546Sopenharmony_ci   nir_foreach_instr_safe(instr, block) {
620bf215546Sopenharmony_ci      state->instr = instr;
621bf215546Sopenharmony_ci      nir_foreach_ssa_def(instr, rewrite_ssa_def, state);
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci      if (instr->type == nir_instr_type_phi) {
624bf215546Sopenharmony_ci         nir_instr_remove(instr);
625bf215546Sopenharmony_ci         exec_list_push_tail(&state->dead_instrs, &instr->node);
626bf215546Sopenharmony_ci         state->progress = true;
627bf215546Sopenharmony_ci      }
628bf215546Sopenharmony_ci   }
629bf215546Sopenharmony_ci   state->instr = NULL;
630bf215546Sopenharmony_ci}
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_cistatic void
633bf215546Sopenharmony_ciemit_copy(nir_builder *b, nir_src src, nir_src dest_src)
634bf215546Sopenharmony_ci{
635bf215546Sopenharmony_ci   assert(!dest_src.is_ssa &&
636bf215546Sopenharmony_ci          dest_src.reg.indirect == NULL &&
637bf215546Sopenharmony_ci          dest_src.reg.base_offset == 0);
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   assert(!nir_src_is_divergent(src) || nir_src_is_divergent(dest_src));
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci   if (src.is_ssa)
642bf215546Sopenharmony_ci      assert(src.ssa->num_components >= dest_src.reg.reg->num_components);
643bf215546Sopenharmony_ci   else
644bf215546Sopenharmony_ci      assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci   nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
647bf215546Sopenharmony_ci   nir_src_copy(&mov->src[0].src, &src);
648bf215546Sopenharmony_ci   mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
649bf215546Sopenharmony_ci   mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
650bf215546Sopenharmony_ci
651bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &mov->instr);
652bf215546Sopenharmony_ci}
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci/* Resolves a single parallel copy operation into a sequence of movs
655bf215546Sopenharmony_ci *
656bf215546Sopenharmony_ci * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
657bf215546Sopenharmony_ci * Correctness, Code Quality, and Efficiency" by Boissinot et al.
658bf215546Sopenharmony_ci * However, I never got the algorithm to work as written, so this version
659bf215546Sopenharmony_ci * is slightly modified.
660bf215546Sopenharmony_ci *
661bf215546Sopenharmony_ci * The algorithm works by playing this little shell game with the values.
662bf215546Sopenharmony_ci * We start by recording where every source value is and which source value
663bf215546Sopenharmony_ci * each destination value should receive.  We then grab any copy whose
664bf215546Sopenharmony_ci * destination is "empty", i.e. not used as a source, and do the following:
665bf215546Sopenharmony_ci *  - Find where its source value currently lives
666bf215546Sopenharmony_ci *  - Emit the move instruction
667bf215546Sopenharmony_ci *  - Set the location of the source value to the destination
668bf215546Sopenharmony_ci *  - Mark the location containing the source value
669bf215546Sopenharmony_ci *  - Mark the destination as no longer needing to be copied
670bf215546Sopenharmony_ci *
671bf215546Sopenharmony_ci * When we run out of "empty" destinations, we have a cycle and so we
672bf215546Sopenharmony_ci * create a temporary register, copy to that register, and mark the value
673bf215546Sopenharmony_ci * we copied as living in that temporary.  Now, the cycle is broken, so we
674bf215546Sopenharmony_ci * can continue with the above steps.
675bf215546Sopenharmony_ci */
676bf215546Sopenharmony_cistatic void
677bf215546Sopenharmony_ciresolve_parallel_copy(nir_parallel_copy_instr *pcopy,
678bf215546Sopenharmony_ci                      struct from_ssa_state *state)
679bf215546Sopenharmony_ci{
680bf215546Sopenharmony_ci   unsigned num_copies = 0;
681bf215546Sopenharmony_ci   nir_foreach_parallel_copy_entry(entry, pcopy) {
682bf215546Sopenharmony_ci      /* Sources may be SSA */
683bf215546Sopenharmony_ci      if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
684bf215546Sopenharmony_ci         continue;
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_ci      num_copies++;
687bf215546Sopenharmony_ci   }
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_ci   if (num_copies == 0) {
690bf215546Sopenharmony_ci      /* Hooray, we don't need any copies! */
691bf215546Sopenharmony_ci      nir_instr_remove(&pcopy->instr);
692bf215546Sopenharmony_ci      exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node);
693bf215546Sopenharmony_ci      return;
694bf215546Sopenharmony_ci   }
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci   /* The register/source corresponding to the given index */
697bf215546Sopenharmony_ci   NIR_VLA_ZERO(nir_src, values, num_copies * 2);
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci   /* The current location of a given piece of data.  We will use -1 for "null" */
700bf215546Sopenharmony_ci   NIR_VLA_FILL(int, loc, num_copies * 2, -1);
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci   /* The piece of data that the given piece of data is to be copied from.  We will use -1 for "null" */
703bf215546Sopenharmony_ci   NIR_VLA_FILL(int, pred, num_copies * 2, -1);
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci   /* The destinations we have yet to properly fill */
706bf215546Sopenharmony_ci   NIR_VLA(int, to_do, num_copies * 2);
707bf215546Sopenharmony_ci   int to_do_idx = -1;
708bf215546Sopenharmony_ci
709bf215546Sopenharmony_ci   state->builder.cursor = nir_before_instr(&pcopy->instr);
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_ci   /* Now we set everything up:
712bf215546Sopenharmony_ci    *  - All values get assigned a temporary index
713bf215546Sopenharmony_ci    *  - Current locations are set from sources
714bf215546Sopenharmony_ci    *  - Predicessors are recorded from sources and destinations
715bf215546Sopenharmony_ci    */
716bf215546Sopenharmony_ci   int num_vals = 0;
717bf215546Sopenharmony_ci   nir_foreach_parallel_copy_entry(entry, pcopy) {
718bf215546Sopenharmony_ci      /* Sources may be SSA */
719bf215546Sopenharmony_ci      if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
720bf215546Sopenharmony_ci         continue;
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci      int src_idx = -1;
723bf215546Sopenharmony_ci      for (int i = 0; i < num_vals; ++i) {
724bf215546Sopenharmony_ci         if (nir_srcs_equal(values[i], entry->src))
725bf215546Sopenharmony_ci            src_idx = i;
726bf215546Sopenharmony_ci      }
727bf215546Sopenharmony_ci      if (src_idx < 0) {
728bf215546Sopenharmony_ci         src_idx = num_vals++;
729bf215546Sopenharmony_ci         values[src_idx] = entry->src;
730bf215546Sopenharmony_ci      }
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci      nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg);
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ci      int dest_idx = -1;
735bf215546Sopenharmony_ci      for (int i = 0; i < num_vals; ++i) {
736bf215546Sopenharmony_ci         if (nir_srcs_equal(values[i], dest_src)) {
737bf215546Sopenharmony_ci            /* Each destination of a parallel copy instruction should be
738bf215546Sopenharmony_ci             * unique.  A destination may get used as a source, so we still
739bf215546Sopenharmony_ci             * have to walk the list.  However, the predecessor should not,
740bf215546Sopenharmony_ci             * at this point, be set yet, so we should have -1 here.
741bf215546Sopenharmony_ci             */
742bf215546Sopenharmony_ci            assert(pred[i] == -1);
743bf215546Sopenharmony_ci            dest_idx = i;
744bf215546Sopenharmony_ci         }
745bf215546Sopenharmony_ci      }
746bf215546Sopenharmony_ci      if (dest_idx < 0) {
747bf215546Sopenharmony_ci         dest_idx = num_vals++;
748bf215546Sopenharmony_ci         values[dest_idx] = dest_src;
749bf215546Sopenharmony_ci      }
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci      loc[src_idx] = src_idx;
752bf215546Sopenharmony_ci      pred[dest_idx] = src_idx;
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci      to_do[++to_do_idx] = dest_idx;
755bf215546Sopenharmony_ci   }
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci   /* Currently empty destinations we can go ahead and fill */
758bf215546Sopenharmony_ci   NIR_VLA(int, ready, num_copies * 2);
759bf215546Sopenharmony_ci   int ready_idx = -1;
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci   /* Mark the ones that are ready for copying.  We know an index is a
762bf215546Sopenharmony_ci    * destination if it has a predecessor and it's ready for copying if
763bf215546Sopenharmony_ci    * it's not marked as containing data.
764bf215546Sopenharmony_ci    */
765bf215546Sopenharmony_ci   for (int i = 0; i < num_vals; i++) {
766bf215546Sopenharmony_ci      if (pred[i] != -1 && loc[i] == -1)
767bf215546Sopenharmony_ci         ready[++ready_idx] = i;
768bf215546Sopenharmony_ci   }
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci   while (to_do_idx >= 0) {
771bf215546Sopenharmony_ci      while (ready_idx >= 0) {
772bf215546Sopenharmony_ci         int b = ready[ready_idx--];
773bf215546Sopenharmony_ci         int a = pred[b];
774bf215546Sopenharmony_ci         emit_copy(&state->builder, values[loc[a]], values[b]);
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci         /* b has been filled, mark it as not needing to be copied */
777bf215546Sopenharmony_ci         pred[b] = -1;
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_ci         /* The next bit only applies if the source and destination have the
780bf215546Sopenharmony_ci          * same divergence.  If they differ (it must be convergent ->
781bf215546Sopenharmony_ci          * divergent), then we can't guarantee we won't need the convergent
782bf215546Sopenharmony_ci          * version of again.
783bf215546Sopenharmony_ci          */
784bf215546Sopenharmony_ci         if (nir_src_is_divergent(values[a]) ==
785bf215546Sopenharmony_ci             nir_src_is_divergent(values[b])) {
786bf215546Sopenharmony_ci            /* If any other copies want a they can find it at b but only if the
787bf215546Sopenharmony_ci             * two have the same divergence.
788bf215546Sopenharmony_ci             */
789bf215546Sopenharmony_ci            loc[a] = b;
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci            /* If a needs to be filled... */
792bf215546Sopenharmony_ci            if (pred[a] != -1) {
793bf215546Sopenharmony_ci               /* If any other copies want a they can find it at b */
794bf215546Sopenharmony_ci               loc[a] = b;
795bf215546Sopenharmony_ci
796bf215546Sopenharmony_ci               /* It's ready for copying now */
797bf215546Sopenharmony_ci               ready[++ready_idx] = a;
798bf215546Sopenharmony_ci            }
799bf215546Sopenharmony_ci         }
800bf215546Sopenharmony_ci      }
801bf215546Sopenharmony_ci      int b = to_do[to_do_idx--];
802bf215546Sopenharmony_ci      if (pred[b] == -1)
803bf215546Sopenharmony_ci         continue;
804bf215546Sopenharmony_ci
805bf215546Sopenharmony_ci      /* If we got here, then we don't have any more trivial copies that we
806bf215546Sopenharmony_ci       * can do.  We have to break a cycle, so we create a new temporary
807bf215546Sopenharmony_ci       * register for that purpose.  Normally, if going out of SSA after
808bf215546Sopenharmony_ci       * register allocation, you would want to avoid creating temporary
809bf215546Sopenharmony_ci       * registers.  However, we are going out of SSA before register
810bf215546Sopenharmony_ci       * allocation, so we would rather not create extra register
811bf215546Sopenharmony_ci       * dependencies for the backend to deal with.  If it wants, the
812bf215546Sopenharmony_ci       * backend can coalesce the (possibly multiple) temporaries.
813bf215546Sopenharmony_ci       */
814bf215546Sopenharmony_ci      assert(num_vals < num_copies * 2);
815bf215546Sopenharmony_ci      nir_register *reg = nir_local_reg_create(state->builder.impl);
816bf215546Sopenharmony_ci      reg->num_array_elems = 0;
817bf215546Sopenharmony_ci      if (values[b].is_ssa) {
818bf215546Sopenharmony_ci         reg->num_components = values[b].ssa->num_components;
819bf215546Sopenharmony_ci         reg->bit_size = values[b].ssa->bit_size;
820bf215546Sopenharmony_ci      } else {
821bf215546Sopenharmony_ci         reg->num_components = values[b].reg.reg->num_components;
822bf215546Sopenharmony_ci         reg->bit_size = values[b].reg.reg->bit_size;
823bf215546Sopenharmony_ci      }
824bf215546Sopenharmony_ci      reg->divergent = nir_src_is_divergent(values[b]);
825bf215546Sopenharmony_ci      values[num_vals].is_ssa = false;
826bf215546Sopenharmony_ci      values[num_vals].reg.reg = reg;
827bf215546Sopenharmony_ci
828bf215546Sopenharmony_ci      emit_copy(&state->builder, values[b], values[num_vals]);
829bf215546Sopenharmony_ci      loc[b] = num_vals;
830bf215546Sopenharmony_ci      ready[++ready_idx] = b;
831bf215546Sopenharmony_ci      num_vals++;
832bf215546Sopenharmony_ci   }
833bf215546Sopenharmony_ci
834bf215546Sopenharmony_ci   nir_instr_remove(&pcopy->instr);
835bf215546Sopenharmony_ci   exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node);
836bf215546Sopenharmony_ci}
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci/* Resolves the parallel copies in a block.  Each block can have at most
839bf215546Sopenharmony_ci * two:  One at the beginning, right after all the phi noces, and one at
840bf215546Sopenharmony_ci * the end (or right before the final jump if it exists).
841bf215546Sopenharmony_ci */
842bf215546Sopenharmony_cistatic bool
843bf215546Sopenharmony_ciresolve_parallel_copies_block(nir_block *block, struct from_ssa_state *state)
844bf215546Sopenharmony_ci{
845bf215546Sopenharmony_ci   /* At this point, we have removed all of the phi nodes.  If a parallel
846bf215546Sopenharmony_ci    * copy existed right after the phi nodes in this block, it is now the
847bf215546Sopenharmony_ci    * first instruction.
848bf215546Sopenharmony_ci    */
849bf215546Sopenharmony_ci   nir_instr *first_instr = nir_block_first_instr(block);
850bf215546Sopenharmony_ci   if (first_instr == NULL)
851bf215546Sopenharmony_ci      return true; /* Empty, nothing to do. */
852bf215546Sopenharmony_ci
853bf215546Sopenharmony_ci   if (first_instr->type == nir_instr_type_parallel_copy) {
854bf215546Sopenharmony_ci      nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr);
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci      resolve_parallel_copy(pcopy, state);
857bf215546Sopenharmony_ci   }
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci   /* It's possible that the above code already cleaned up the end parallel
860bf215546Sopenharmony_ci    * copy.  However, doing so removed it form the instructions list so we
861bf215546Sopenharmony_ci    * won't find it here.  Therefore, it's safe to go ahead and just look
862bf215546Sopenharmony_ci    * for one and clean it up if it exists.
863bf215546Sopenharmony_ci    */
864bf215546Sopenharmony_ci   nir_parallel_copy_instr *end_pcopy =
865bf215546Sopenharmony_ci      get_parallel_copy_at_end_of_block(block);
866bf215546Sopenharmony_ci   if (end_pcopy)
867bf215546Sopenharmony_ci      resolve_parallel_copy(end_pcopy, state);
868bf215546Sopenharmony_ci
869bf215546Sopenharmony_ci   return true;
870bf215546Sopenharmony_ci}
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_cistatic bool
873bf215546Sopenharmony_cinir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
874bf215546Sopenharmony_ci{
875bf215546Sopenharmony_ci   nir_shader *shader = impl->function->shader;
876bf215546Sopenharmony_ci
877bf215546Sopenharmony_ci   struct from_ssa_state state;
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_ci   nir_builder_init(&state.builder, impl);
880bf215546Sopenharmony_ci   state.dead_ctx = ralloc_context(NULL);
881bf215546Sopenharmony_ci   state.phi_webs_only = phi_webs_only;
882bf215546Sopenharmony_ci   state.merge_node_table = _mesa_pointer_hash_table_create(NULL);
883bf215546Sopenharmony_ci   state.progress = false;
884bf215546Sopenharmony_ci   exec_list_make_empty(&state.dead_instrs);
885bf215546Sopenharmony_ci
886bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
887bf215546Sopenharmony_ci      add_parallel_copy_to_end_of_block(shader, block, state.dead_ctx);
888bf215546Sopenharmony_ci   }
889bf215546Sopenharmony_ci
890bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
891bf215546Sopenharmony_ci      isolate_phi_nodes_block(shader, block, state.dead_ctx);
892bf215546Sopenharmony_ci   }
893bf215546Sopenharmony_ci
894bf215546Sopenharmony_ci   /* Mark metadata as dirty before we ask for liveness analysis */
895bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_block_index |
896bf215546Sopenharmony_ci                               nir_metadata_dominance);
897bf215546Sopenharmony_ci
898bf215546Sopenharmony_ci   nir_metadata_require(impl, nir_metadata_instr_index |
899bf215546Sopenharmony_ci                              nir_metadata_live_ssa_defs |
900bf215546Sopenharmony_ci                              nir_metadata_dominance);
901bf215546Sopenharmony_ci
902bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
903bf215546Sopenharmony_ci      coalesce_phi_nodes_block(block, &state);
904bf215546Sopenharmony_ci   }
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
907bf215546Sopenharmony_ci      aggressive_coalesce_block(block, &state);
908bf215546Sopenharmony_ci   }
909bf215546Sopenharmony_ci
910bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
911bf215546Sopenharmony_ci      resolve_registers_block(block, &state);
912bf215546Sopenharmony_ci   }
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
915bf215546Sopenharmony_ci      resolve_parallel_copies_block(block, &state);
916bf215546Sopenharmony_ci   }
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_block_index |
919bf215546Sopenharmony_ci                               nir_metadata_dominance);
920bf215546Sopenharmony_ci
921bf215546Sopenharmony_ci   /* Clean up dead instructions and the hash tables */
922bf215546Sopenharmony_ci   nir_instr_free_list(&state.dead_instrs);
923bf215546Sopenharmony_ci   _mesa_hash_table_destroy(state.merge_node_table, NULL);
924bf215546Sopenharmony_ci   ralloc_free(state.dead_ctx);
925bf215546Sopenharmony_ci   return state.progress;
926bf215546Sopenharmony_ci}
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_cibool
929bf215546Sopenharmony_cinir_convert_from_ssa(nir_shader *shader, bool phi_webs_only)
930bf215546Sopenharmony_ci{
931bf215546Sopenharmony_ci   bool progress = false;
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
934bf215546Sopenharmony_ci      if (function->impl)
935bf215546Sopenharmony_ci         progress |= nir_convert_from_ssa_impl(function->impl, phi_webs_only);
936bf215546Sopenharmony_ci   }
937bf215546Sopenharmony_ci
938bf215546Sopenharmony_ci   return progress;
939bf215546Sopenharmony_ci}
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci
942bf215546Sopenharmony_cistatic void
943bf215546Sopenharmony_ciplace_phi_read(nir_builder *b, nir_register *reg,
944bf215546Sopenharmony_ci               nir_ssa_def *def, nir_block *block, struct set *visited_blocks)
945bf215546Sopenharmony_ci{
946bf215546Sopenharmony_ci  /* Search already visited blocks to avoid back edges in tree */
947bf215546Sopenharmony_ci  if (_mesa_set_search(visited_blocks, block) == NULL) {
948bf215546Sopenharmony_ci      /* Try to go up the single-successor tree */
949bf215546Sopenharmony_ci      bool all_single_successors = true;
950bf215546Sopenharmony_ci      set_foreach(block->predecessors, entry) {
951bf215546Sopenharmony_ci         nir_block *pred = (nir_block *)entry->key;
952bf215546Sopenharmony_ci         if (pred->successors[0] && pred->successors[1]) {
953bf215546Sopenharmony_ci            all_single_successors = false;
954bf215546Sopenharmony_ci            break;
955bf215546Sopenharmony_ci         }
956bf215546Sopenharmony_ci      }
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci      if (all_single_successors) {
959bf215546Sopenharmony_ci         /* All predecessors of this block have exactly one successor and it
960bf215546Sopenharmony_ci          * is this block so they must eventually lead here without
961bf215546Sopenharmony_ci          * intersecting each other.  Place the reads in the predecessors
962bf215546Sopenharmony_ci          * instead of this block.
963bf215546Sopenharmony_ci          */
964bf215546Sopenharmony_ci         _mesa_set_add(visited_blocks, block);
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci         set_foreach(block->predecessors, entry) {
967bf215546Sopenharmony_ci            place_phi_read(b, reg, def, (nir_block *)entry->key, visited_blocks);
968bf215546Sopenharmony_ci         }
969bf215546Sopenharmony_ci         return;
970bf215546Sopenharmony_ci      }
971bf215546Sopenharmony_ci   }
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci   b->cursor = nir_after_block_before_jump(block);
974bf215546Sopenharmony_ci   nir_store_reg(b, reg, def, ~0);
975bf215546Sopenharmony_ci}
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_ci/** Lower all of the phi nodes in a block to movs to and from a register
978bf215546Sopenharmony_ci *
979bf215546Sopenharmony_ci * This provides a very quick-and-dirty out-of-SSA pass that you can run on a
980bf215546Sopenharmony_ci * single block to convert all of its phis to a register and some movs.
981bf215546Sopenharmony_ci * The code that is generated, while not optimal for actual codegen in a
982bf215546Sopenharmony_ci * back-end, is easy to generate, correct, and will turn into the same set of
983bf215546Sopenharmony_ci * phis after you call regs_to_ssa and do some copy propagation.  For each phi
984bf215546Sopenharmony_ci * node we do the following:
985bf215546Sopenharmony_ci *
986bf215546Sopenharmony_ci *  1. For each phi instruction in the block, create a new nir_register
987bf215546Sopenharmony_ci *
988bf215546Sopenharmony_ci *  2. Insert movs at the top of the destination block for each phi and
989bf215546Sopenharmony_ci *     rewrite all uses of the phi to use the mov.
990bf215546Sopenharmony_ci *
991bf215546Sopenharmony_ci *  3. For each phi source, insert movs in the predecessor block from the phi
992bf215546Sopenharmony_ci *     source to the register associated with the phi.
993bf215546Sopenharmony_ci *
994bf215546Sopenharmony_ci * Correctness is guaranteed by the fact that we create a new register for
995bf215546Sopenharmony_ci * each phi and emit movs on both sides of the control-flow edge.  Because all
996bf215546Sopenharmony_ci * the phis have SSA destinations (we assert this) and there is a separate
997bf215546Sopenharmony_ci * temporary for each phi, all movs inserted in any particular block have
998bf215546Sopenharmony_ci * unique destinations so the order of operations does not matter.
999bf215546Sopenharmony_ci *
1000bf215546Sopenharmony_ci * The one intelligent thing this pass does is that it places the moves from
1001bf215546Sopenharmony_ci * the phi sources as high up the predecessor tree as possible instead of in
1002bf215546Sopenharmony_ci * the exact predecessor.  This means that, in particular, it will crawl into
1003bf215546Sopenharmony_ci * the deepest nesting of any if-ladders.  In order to ensure that doing so is
1004bf215546Sopenharmony_ci * safe, it stops as soon as one of the predecessors has multiple successors.
1005bf215546Sopenharmony_ci */
1006bf215546Sopenharmony_cibool
1007bf215546Sopenharmony_cinir_lower_phis_to_regs_block(nir_block *block)
1008bf215546Sopenharmony_ci{
1009bf215546Sopenharmony_ci   nir_builder b;
1010bf215546Sopenharmony_ci   nir_builder_init(&b, nir_cf_node_get_function(&block->cf_node));
1011bf215546Sopenharmony_ci   struct set *visited_blocks = _mesa_set_create(NULL, _mesa_hash_pointer,
1012bf215546Sopenharmony_ci                                                 _mesa_key_pointer_equal);
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci   bool progress = false;
1015bf215546Sopenharmony_ci   nir_foreach_instr_safe(instr, block) {
1016bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_phi)
1017bf215546Sopenharmony_ci         break;
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_ci      nir_phi_instr *phi = nir_instr_as_phi(instr);
1020bf215546Sopenharmony_ci      assert(phi->dest.is_ssa);
1021bf215546Sopenharmony_ci
1022bf215546Sopenharmony_ci      nir_register *reg = create_reg_for_ssa_def(&phi->dest.ssa, b.impl);
1023bf215546Sopenharmony_ci
1024bf215546Sopenharmony_ci      b.cursor = nir_after_instr(&phi->instr);
1025bf215546Sopenharmony_ci      nir_ssa_def *def = nir_load_reg(&b, reg);
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&phi->dest.ssa, def);
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_ci      nir_foreach_phi_src(src, phi) {
1030bf215546Sopenharmony_ci         if (src->src.is_ssa) {
1031bf215546Sopenharmony_ci            _mesa_set_add(visited_blocks, src->src.ssa->parent_instr->block);
1032bf215546Sopenharmony_ci            place_phi_read(&b, reg, src->src.ssa, src->pred, visited_blocks);
1033bf215546Sopenharmony_ci            _mesa_set_clear(visited_blocks, NULL);
1034bf215546Sopenharmony_ci         } else {
1035bf215546Sopenharmony_ci            b.cursor = nir_after_block_before_jump(src->pred);
1036bf215546Sopenharmony_ci            nir_ssa_def *src_ssa =
1037bf215546Sopenharmony_ci               nir_ssa_for_src(&b, src->src, phi->dest.ssa.num_components);
1038bf215546Sopenharmony_ci            nir_store_reg(&b, reg, src_ssa, ~0);
1039bf215546Sopenharmony_ci         }
1040bf215546Sopenharmony_ci      }
1041bf215546Sopenharmony_ci
1042bf215546Sopenharmony_ci      nir_instr_remove(&phi->instr);
1043bf215546Sopenharmony_ci
1044bf215546Sopenharmony_ci      progress = true;
1045bf215546Sopenharmony_ci   }
1046bf215546Sopenharmony_ci
1047bf215546Sopenharmony_ci   _mesa_set_destroy(visited_blocks, NULL);
1048bf215546Sopenharmony_ci
1049bf215546Sopenharmony_ci   return progress;
1050bf215546Sopenharmony_ci}
1051bf215546Sopenharmony_ci
1052bf215546Sopenharmony_cistruct ssa_def_to_reg_state {
1053bf215546Sopenharmony_ci   nir_function_impl *impl;
1054bf215546Sopenharmony_ci   bool progress;
1055bf215546Sopenharmony_ci};
1056bf215546Sopenharmony_ci
1057bf215546Sopenharmony_cistatic bool
1058bf215546Sopenharmony_cidest_replace_ssa_with_reg(nir_dest *dest, void *void_state)
1059bf215546Sopenharmony_ci{
1060bf215546Sopenharmony_ci   struct ssa_def_to_reg_state *state = void_state;
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_ci   if (!dest->is_ssa)
1063bf215546Sopenharmony_ci      return true;
1064bf215546Sopenharmony_ci
1065bf215546Sopenharmony_ci   nir_register *reg = create_reg_for_ssa_def(&dest->ssa, state->impl);
1066bf215546Sopenharmony_ci
1067bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses_src(&dest->ssa, nir_src_for_reg(reg));
1068bf215546Sopenharmony_ci
1069bf215546Sopenharmony_ci   nir_instr *instr = dest->ssa.parent_instr;
1070bf215546Sopenharmony_ci   *dest = nir_dest_for_reg(reg);
1071bf215546Sopenharmony_ci   dest->reg.parent_instr = instr;
1072bf215546Sopenharmony_ci   list_addtail(&dest->reg.def_link, &reg->defs);
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci   state->progress = true;
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_ci   return true;
1077bf215546Sopenharmony_ci}
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_cistatic bool
1080bf215546Sopenharmony_cissa_def_is_local_to_block(nir_ssa_def *def, UNUSED void *state)
1081bf215546Sopenharmony_ci{
1082bf215546Sopenharmony_ci   nir_block *block = def->parent_instr->block;
1083bf215546Sopenharmony_ci   nir_foreach_use(use_src, def) {
1084bf215546Sopenharmony_ci      if (use_src->parent_instr->block != block ||
1085bf215546Sopenharmony_ci          use_src->parent_instr->type == nir_instr_type_phi) {
1086bf215546Sopenharmony_ci         return false;
1087bf215546Sopenharmony_ci      }
1088bf215546Sopenharmony_ci   }
1089bf215546Sopenharmony_ci
1090bf215546Sopenharmony_ci   if (!list_is_empty(&def->if_uses))
1091bf215546Sopenharmony_ci      return false;
1092bf215546Sopenharmony_ci
1093bf215546Sopenharmony_ci   return true;
1094bf215546Sopenharmony_ci}
1095bf215546Sopenharmony_ci
1096bf215546Sopenharmony_ci/** Lower all of the SSA defs in a block to registers
1097bf215546Sopenharmony_ci *
1098bf215546Sopenharmony_ci * This performs the very simple operation of blindly replacing all of the SSA
1099bf215546Sopenharmony_ci * defs in the given block with registers.  If not used carefully, this may
1100bf215546Sopenharmony_ci * result in phi nodes with register sources which is technically invalid.
1101bf215546Sopenharmony_ci * Fortunately, the register-based into-SSA pass handles them anyway.
1102bf215546Sopenharmony_ci */
1103bf215546Sopenharmony_cibool
1104bf215546Sopenharmony_cinir_lower_ssa_defs_to_regs_block(nir_block *block)
1105bf215546Sopenharmony_ci{
1106bf215546Sopenharmony_ci   nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
1107bf215546Sopenharmony_ci   nir_shader *shader = impl->function->shader;
1108bf215546Sopenharmony_ci
1109bf215546Sopenharmony_ci   struct ssa_def_to_reg_state state = {
1110bf215546Sopenharmony_ci      .impl = impl,
1111bf215546Sopenharmony_ci      .progress = false,
1112bf215546Sopenharmony_ci   };
1113bf215546Sopenharmony_ci
1114bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
1115bf215546Sopenharmony_ci      if (instr->type == nir_instr_type_ssa_undef) {
1116bf215546Sopenharmony_ci         /* Undefs are just a read of something never written. */
1117bf215546Sopenharmony_ci         nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(instr);
1118bf215546Sopenharmony_ci         nir_register *reg = create_reg_for_ssa_def(&undef->def, state.impl);
1119bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses_src(&undef->def, nir_src_for_reg(reg));
1120bf215546Sopenharmony_ci      } else if (instr->type == nir_instr_type_load_const) {
1121bf215546Sopenharmony_ci         /* Constant loads are SSA-only, we need to insert a move */
1122bf215546Sopenharmony_ci         nir_load_const_instr *load = nir_instr_as_load_const(instr);
1123bf215546Sopenharmony_ci         nir_register *reg = create_reg_for_ssa_def(&load->def, state.impl);
1124bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses_src(&load->def, nir_src_for_reg(reg));
1125bf215546Sopenharmony_ci
1126bf215546Sopenharmony_ci         nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
1127bf215546Sopenharmony_ci         mov->src[0].src = nir_src_for_ssa(&load->def);
1128bf215546Sopenharmony_ci         mov->dest.dest = nir_dest_for_reg(reg);
1129bf215546Sopenharmony_ci         mov->dest.write_mask = (1 << reg->num_components) - 1;
1130bf215546Sopenharmony_ci         nir_instr_insert(nir_after_instr(&load->instr), &mov->instr);
1131bf215546Sopenharmony_ci      } else if (nir_foreach_ssa_def(instr, ssa_def_is_local_to_block, NULL)) {
1132bf215546Sopenharmony_ci         /* If the SSA def produced by this instruction is only in the block
1133bf215546Sopenharmony_ci          * in which it is defined and is not used by ifs or phis, then we
1134bf215546Sopenharmony_ci          * don't have a reason to convert it to a register.
1135bf215546Sopenharmony_ci          */
1136bf215546Sopenharmony_ci      } else {
1137bf215546Sopenharmony_ci         nir_foreach_dest(instr, dest_replace_ssa_with_reg, &state);
1138bf215546Sopenharmony_ci      }
1139bf215546Sopenharmony_ci   }
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_ci   return state.progress;
1142bf215546Sopenharmony_ci}
1143