1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2019 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "aco_builder.h"
26bf215546Sopenharmony_ci#include "aco_ir.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include <algorithm>
29bf215546Sopenharmony_ci#include <map>
30bf215546Sopenharmony_ci#include <unordered_map>
31bf215546Sopenharmony_ci#include <vector>
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci/*
34bf215546Sopenharmony_ci * Implements an algorithm to lower to Concentional SSA Form (CSSA).
35bf215546Sopenharmony_ci * After "Revisiting Out-of-SSA Translation for Correctness, CodeQuality, and Efficiency"
36bf215546Sopenharmony_ci * by B. Boissinot, A. Darte, F. Rastello, B. Dupont de Dinechin, C. Guillon,
37bf215546Sopenharmony_ci *
38bf215546Sopenharmony_ci * By lowering the IR to CSSA, the insertion of parallelcopies is separated from
39bf215546Sopenharmony_ci * the register coalescing problem. Additionally, correctness is ensured w.r.t. spilling.
40bf215546Sopenharmony_ci * The algorithm coalesces non-interfering phi-resources while taking value-equality
41bf215546Sopenharmony_ci * into account. Re-indexes the SSA-defs.
42bf215546Sopenharmony_ci */
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_cinamespace aco {
45bf215546Sopenharmony_cinamespace {
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_citypedef std::vector<Temp> merge_set;
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_cistruct copy {
50bf215546Sopenharmony_ci   Definition def;
51bf215546Sopenharmony_ci   Operand op;
52bf215546Sopenharmony_ci};
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_cistruct merge_node {
55bf215546Sopenharmony_ci   Operand value = Operand(); /* original value: can be an SSA-def or constant value */
56bf215546Sopenharmony_ci   uint32_t index = -1u;      /* index into the vector of merge sets */
57bf215546Sopenharmony_ci   uint32_t defined_at = -1u; /* defining block */
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   /* we also remember two dominating defs with the same value: */
60bf215546Sopenharmony_ci   Temp equal_anc_in = Temp();  /* within the same merge set */
61bf215546Sopenharmony_ci   Temp equal_anc_out = Temp(); /* from a different set */
62bf215546Sopenharmony_ci};
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cistruct cssa_ctx {
65bf215546Sopenharmony_ci   Program* program;
66bf215546Sopenharmony_ci   std::vector<IDSet>& live_out;                  /* live-out sets per block */
67bf215546Sopenharmony_ci   std::vector<std::vector<copy>> parallelcopies; /* copies per block */
68bf215546Sopenharmony_ci   std::vector<merge_set> merge_sets;             /* each vector is one (ordered) merge set */
69bf215546Sopenharmony_ci   std::unordered_map<uint32_t, merge_node> merge_node_table; /* tempid -> merge node */
70bf215546Sopenharmony_ci};
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci/* create (virtual) parallelcopies for each phi instruction and
73bf215546Sopenharmony_ci * already merge copy-definitions with phi-defs into merge sets */
74bf215546Sopenharmony_civoid
75bf215546Sopenharmony_cicollect_parallelcopies(cssa_ctx& ctx)
76bf215546Sopenharmony_ci{
77bf215546Sopenharmony_ci   ctx.parallelcopies.resize(ctx.program->blocks.size());
78bf215546Sopenharmony_ci   Builder bld(ctx.program);
79bf215546Sopenharmony_ci   for (Block& block : ctx.program->blocks) {
80bf215546Sopenharmony_ci      for (aco_ptr<Instruction>& phi : block.instructions) {
81bf215546Sopenharmony_ci         if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi)
82bf215546Sopenharmony_ci            break;
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci         const Definition& def = phi->definitions[0];
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci         /* if the definition is not temp, it is the exec mask.
87bf215546Sopenharmony_ci          * We can reload the exec mask directly from the spill slot.
88bf215546Sopenharmony_ci          */
89bf215546Sopenharmony_ci         if (!def.isTemp())
90bf215546Sopenharmony_ci            continue;
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci         std::vector<unsigned>& preds =
93bf215546Sopenharmony_ci            phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds;
94bf215546Sopenharmony_ci         uint32_t index = ctx.merge_sets.size();
95bf215546Sopenharmony_ci         merge_set set;
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci         bool has_preheader_copy = false;
98bf215546Sopenharmony_ci         for (unsigned i = 0; i < phi->operands.size(); i++) {
99bf215546Sopenharmony_ci            Operand op = phi->operands[i];
100bf215546Sopenharmony_ci            if (op.isUndefined())
101bf215546Sopenharmony_ci               continue;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci            if (def.regClass().type() == RegType::sgpr && !op.isTemp()) {
104bf215546Sopenharmony_ci               /* SGPR inline constants and literals on GFX10+ can be spilled
105bf215546Sopenharmony_ci                * and reloaded directly (without intermediate register) */
106bf215546Sopenharmony_ci               if (op.isConstant()) {
107bf215546Sopenharmony_ci                  if (ctx.program->gfx_level >= GFX10)
108bf215546Sopenharmony_ci                     continue;
109bf215546Sopenharmony_ci                  if (op.size() == 1 && !op.isLiteral())
110bf215546Sopenharmony_ci                     continue;
111bf215546Sopenharmony_ci               } else {
112bf215546Sopenharmony_ci                  assert(op.isFixed() && op.physReg() == exec);
113bf215546Sopenharmony_ci                  continue;
114bf215546Sopenharmony_ci               }
115bf215546Sopenharmony_ci            }
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci            /* create new temporary and rename operands */
118bf215546Sopenharmony_ci            Temp tmp = bld.tmp(def.regClass());
119bf215546Sopenharmony_ci            ctx.parallelcopies[preds[i]].emplace_back(copy{Definition(tmp), op});
120bf215546Sopenharmony_ci            phi->operands[i] = Operand(tmp);
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci            /* place the new operands in the same merge set */
123bf215546Sopenharmony_ci            set.emplace_back(tmp);
124bf215546Sopenharmony_ci            ctx.merge_node_table[tmp.id()] = {op, index, preds[i]};
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci            /* update the liveness information */
127bf215546Sopenharmony_ci            if (op.isKill())
128bf215546Sopenharmony_ci               ctx.live_out[preds[i]].erase(op.tempId());
129bf215546Sopenharmony_ci            ctx.live_out[preds[i]].insert(tmp.id());
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci            has_preheader_copy |= i == 0 && block.kind & block_kind_loop_header;
132bf215546Sopenharmony_ci         }
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci         if (set.empty())
135bf215546Sopenharmony_ci            continue;
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci         /* place the definition in dominance-order */
138bf215546Sopenharmony_ci         if (def.isTemp()) {
139bf215546Sopenharmony_ci            if (has_preheader_copy)
140bf215546Sopenharmony_ci               set.emplace(std::next(set.begin()), def.getTemp());
141bf215546Sopenharmony_ci            else if (block.kind & block_kind_loop_header)
142bf215546Sopenharmony_ci               set.emplace(set.begin(), def.getTemp());
143bf215546Sopenharmony_ci            else
144bf215546Sopenharmony_ci               set.emplace_back(def.getTemp());
145bf215546Sopenharmony_ci            ctx.merge_node_table[def.tempId()] = {Operand(def.getTemp()), index, block.index};
146bf215546Sopenharmony_ci         }
147bf215546Sopenharmony_ci         ctx.merge_sets.emplace_back(set);
148bf215546Sopenharmony_ci      }
149bf215546Sopenharmony_ci   }
150bf215546Sopenharmony_ci}
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci/* check whether the definition of a comes after b. */
153bf215546Sopenharmony_ciinline bool
154bf215546Sopenharmony_cidefined_after(cssa_ctx& ctx, Temp a, Temp b)
155bf215546Sopenharmony_ci{
156bf215546Sopenharmony_ci   merge_node& node_a = ctx.merge_node_table[a.id()];
157bf215546Sopenharmony_ci   merge_node& node_b = ctx.merge_node_table[b.id()];
158bf215546Sopenharmony_ci   if (node_a.defined_at == node_b.defined_at)
159bf215546Sopenharmony_ci      return a.id() > b.id();
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   return node_a.defined_at > node_b.defined_at;
162bf215546Sopenharmony_ci}
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci/* check whether a dominates b where b is defined after a */
165bf215546Sopenharmony_ciinline bool
166bf215546Sopenharmony_cidominates(cssa_ctx& ctx, Temp a, Temp b)
167bf215546Sopenharmony_ci{
168bf215546Sopenharmony_ci   assert(defined_after(ctx, b, a));
169bf215546Sopenharmony_ci   merge_node& node_a = ctx.merge_node_table[a.id()];
170bf215546Sopenharmony_ci   merge_node& node_b = ctx.merge_node_table[b.id()];
171bf215546Sopenharmony_ci   unsigned idom = node_b.defined_at;
172bf215546Sopenharmony_ci   while (idom > node_a.defined_at)
173bf215546Sopenharmony_ci      idom = b.regClass().type() == RegType::vgpr ? ctx.program->blocks[idom].logical_idom
174bf215546Sopenharmony_ci                                                  : ctx.program->blocks[idom].linear_idom;
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci   return idom == node_a.defined_at;
177bf215546Sopenharmony_ci}
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci/* check intersection between var and parent:
180bf215546Sopenharmony_ci * We already know that parent dominates var. */
181bf215546Sopenharmony_ciinline bool
182bf215546Sopenharmony_ciintersects(cssa_ctx& ctx, Temp var, Temp parent)
183bf215546Sopenharmony_ci{
184bf215546Sopenharmony_ci   merge_node& node_var = ctx.merge_node_table[var.id()];
185bf215546Sopenharmony_ci   merge_node& node_parent = ctx.merge_node_table[parent.id()];
186bf215546Sopenharmony_ci   assert(node_var.index != node_parent.index);
187bf215546Sopenharmony_ci   uint32_t block_idx = node_var.defined_at;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   /* if the parent is live-out at the definition block of var, they intersect */
190bf215546Sopenharmony_ci   bool parent_live = ctx.live_out[block_idx].count(parent.id());
191bf215546Sopenharmony_ci   if (parent_live)
192bf215546Sopenharmony_ci      return true;
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci   /* parent is defined in a different block than var */
195bf215546Sopenharmony_ci   if (node_parent.defined_at < node_var.defined_at) {
196bf215546Sopenharmony_ci      /* if the parent is not live-in, they don't interfere */
197bf215546Sopenharmony_ci      std::vector<uint32_t>& preds = var.type() == RegType::vgpr
198bf215546Sopenharmony_ci                                        ? ctx.program->blocks[block_idx].logical_preds
199bf215546Sopenharmony_ci                                        : ctx.program->blocks[block_idx].linear_preds;
200bf215546Sopenharmony_ci      for (uint32_t pred : preds) {
201bf215546Sopenharmony_ci         if (!ctx.live_out[pred].count(parent.id()))
202bf215546Sopenharmony_ci            return false;
203bf215546Sopenharmony_ci      }
204bf215546Sopenharmony_ci   }
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   for (const copy& cp : ctx.parallelcopies[block_idx]) {
207bf215546Sopenharmony_ci      /* if var is defined at the edge, they don't intersect */
208bf215546Sopenharmony_ci      if (cp.def.getTemp() == var)
209bf215546Sopenharmony_ci         return false;
210bf215546Sopenharmony_ci      if (cp.op.isTemp() && cp.op.getTemp() == parent)
211bf215546Sopenharmony_ci         parent_live = true;
212bf215546Sopenharmony_ci   }
213bf215546Sopenharmony_ci   /* if the parent is live at the edge, they intersect */
214bf215546Sopenharmony_ci   if (parent_live)
215bf215546Sopenharmony_ci      return true;
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   /* both, parent and var, are present in the same block */
218bf215546Sopenharmony_ci   const Block& block = ctx.program->blocks[block_idx];
219bf215546Sopenharmony_ci   for (auto it = block.instructions.crbegin(); it != block.instructions.crend(); ++it) {
220bf215546Sopenharmony_ci      /* if the parent was not encountered yet, it can only be used by a phi */
221bf215546Sopenharmony_ci      if (is_phi(it->get()))
222bf215546Sopenharmony_ci         break;
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci      for (const Definition& def : (*it)->definitions) {
225bf215546Sopenharmony_ci         if (!def.isTemp())
226bf215546Sopenharmony_ci            continue;
227bf215546Sopenharmony_ci         /* if parent was not found yet, they don't intersect */
228bf215546Sopenharmony_ci         if (def.getTemp() == var)
229bf215546Sopenharmony_ci            return false;
230bf215546Sopenharmony_ci      }
231bf215546Sopenharmony_ci
232bf215546Sopenharmony_ci      for (const Operand& op : (*it)->operands) {
233bf215546Sopenharmony_ci         if (!op.isTemp())
234bf215546Sopenharmony_ci            continue;
235bf215546Sopenharmony_ci         /* if the var was defined before this point, they intersect */
236bf215546Sopenharmony_ci         if (op.getTemp() == parent)
237bf215546Sopenharmony_ci            return true;
238bf215546Sopenharmony_ci      }
239bf215546Sopenharmony_ci   }
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci   return false;
242bf215546Sopenharmony_ci}
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci/* check interference between var and parent:
245bf215546Sopenharmony_ci * i.e. they have different values and intersect.
246bf215546Sopenharmony_ci * If parent and var share the same value, also updates the equal ancestor. */
247bf215546Sopenharmony_ciinline bool
248bf215546Sopenharmony_ciinterference(cssa_ctx& ctx, Temp var, Temp parent)
249bf215546Sopenharmony_ci{
250bf215546Sopenharmony_ci   assert(var != parent);
251bf215546Sopenharmony_ci   merge_node& node_var = ctx.merge_node_table[var.id()];
252bf215546Sopenharmony_ci   node_var.equal_anc_out = Temp();
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   if (node_var.index == ctx.merge_node_table[parent.id()].index) {
255bf215546Sopenharmony_ci      /* check/update in other set */
256bf215546Sopenharmony_ci      parent = ctx.merge_node_table[parent.id()].equal_anc_out;
257bf215546Sopenharmony_ci   }
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   Temp tmp = parent;
260bf215546Sopenharmony_ci   /* check if var intersects with parent or any equal-valued ancestor */
261bf215546Sopenharmony_ci   while (tmp != Temp() && !intersects(ctx, var, tmp)) {
262bf215546Sopenharmony_ci      merge_node& node_tmp = ctx.merge_node_table[tmp.id()];
263bf215546Sopenharmony_ci      tmp = node_tmp.equal_anc_in;
264bf215546Sopenharmony_ci   }
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   /* no intersection found */
267bf215546Sopenharmony_ci   if (tmp == Temp())
268bf215546Sopenharmony_ci      return false;
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci   /* var and parent, same value, but in different sets */
271bf215546Sopenharmony_ci   if (node_var.value == ctx.merge_node_table[parent.id()].value) {
272bf215546Sopenharmony_ci      node_var.equal_anc_out = tmp;
273bf215546Sopenharmony_ci      return false;
274bf215546Sopenharmony_ci   }
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci   /* var and parent, different values and intersect */
277bf215546Sopenharmony_ci   return true;
278bf215546Sopenharmony_ci}
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci/* tries to merge set_b into set_a of given temporary and
281bf215546Sopenharmony_ci * drops that temporary as it is being coalesced */
282bf215546Sopenharmony_cibool
283bf215546Sopenharmony_citry_merge_merge_set(cssa_ctx& ctx, Temp dst, merge_set& set_b)
284bf215546Sopenharmony_ci{
285bf215546Sopenharmony_ci   auto def_node_it = ctx.merge_node_table.find(dst.id());
286bf215546Sopenharmony_ci   uint32_t index = def_node_it->second.index;
287bf215546Sopenharmony_ci   merge_set& set_a = ctx.merge_sets[index];
288bf215546Sopenharmony_ci   std::vector<Temp> dom; /* stack of the traversal */
289bf215546Sopenharmony_ci   merge_set union_set;   /* the new merged merge-set */
290bf215546Sopenharmony_ci   uint32_t i_a = 0;
291bf215546Sopenharmony_ci   uint32_t i_b = 0;
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   while (i_a < set_a.size() || i_b < set_b.size()) {
294bf215546Sopenharmony_ci      Temp current;
295bf215546Sopenharmony_ci      if (i_a == set_a.size())
296bf215546Sopenharmony_ci         current = set_b[i_b++];
297bf215546Sopenharmony_ci      else if (i_b == set_b.size())
298bf215546Sopenharmony_ci         current = set_a[i_a++];
299bf215546Sopenharmony_ci      /* else pick the one defined first */
300bf215546Sopenharmony_ci      else if (defined_after(ctx, set_a[i_a], set_b[i_b]))
301bf215546Sopenharmony_ci         current = set_b[i_b++];
302bf215546Sopenharmony_ci      else
303bf215546Sopenharmony_ci         current = set_a[i_a++];
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci      while (!dom.empty() && !dominates(ctx, dom.back(), current))
306bf215546Sopenharmony_ci         dom.pop_back(); /* not the desired parent, remove */
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci      if (!dom.empty() && interference(ctx, current, dom.back()))
309bf215546Sopenharmony_ci         return false; /* intersection detected */
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci      dom.emplace_back(current); /* otherwise, keep checking */
312bf215546Sopenharmony_ci      if (current != dst)
313bf215546Sopenharmony_ci         union_set.emplace_back(current); /* maintain the new merge-set sorted */
314bf215546Sopenharmony_ci   }
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci   /* update hashmap */
317bf215546Sopenharmony_ci   for (Temp t : union_set) {
318bf215546Sopenharmony_ci      merge_node& node = ctx.merge_node_table[t.id()];
319bf215546Sopenharmony_ci      /* update the equal ancestors:
320bf215546Sopenharmony_ci       * i.e. the 'closest' dominating def with the same value */
321bf215546Sopenharmony_ci      Temp in = node.equal_anc_in;
322bf215546Sopenharmony_ci      Temp out = node.equal_anc_out;
323bf215546Sopenharmony_ci      if (in == Temp() || (out != Temp() && defined_after(ctx, out, in)))
324bf215546Sopenharmony_ci         node.equal_anc_in = out;
325bf215546Sopenharmony_ci      node.equal_anc_out = Temp();
326bf215546Sopenharmony_ci      /* update merge-set index */
327bf215546Sopenharmony_ci      node.index = index;
328bf215546Sopenharmony_ci   }
329bf215546Sopenharmony_ci   set_b = merge_set(); /* free the old set_b */
330bf215546Sopenharmony_ci   ctx.merge_sets[index] = union_set;
331bf215546Sopenharmony_ci   ctx.merge_node_table.erase(dst.id()); /* remove the temporary */
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci   return true;
334bf215546Sopenharmony_ci}
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci/* returns true if the copy can safely be omitted */
337bf215546Sopenharmony_cibool
338bf215546Sopenharmony_citry_coalesce_copy(cssa_ctx& ctx, copy copy, uint32_t block_idx)
339bf215546Sopenharmony_ci{
340bf215546Sopenharmony_ci   /* we can only coalesce temporaries */
341bf215546Sopenharmony_ci   if (!copy.op.isTemp())
342bf215546Sopenharmony_ci      return false;
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   /* try emplace a merge_node for the copy operand */
345bf215546Sopenharmony_ci   merge_node& op_node = ctx.merge_node_table[copy.op.tempId()];
346bf215546Sopenharmony_ci   if (op_node.defined_at == -1u) {
347bf215546Sopenharmony_ci      /* find defining block of operand */
348bf215546Sopenharmony_ci      uint32_t pred = block_idx;
349bf215546Sopenharmony_ci      do {
350bf215546Sopenharmony_ci         block_idx = pred;
351bf215546Sopenharmony_ci         pred = copy.op.regClass().type() == RegType::vgpr ? ctx.program->blocks[pred].logical_idom
352bf215546Sopenharmony_ci                                                           : ctx.program->blocks[pred].linear_idom;
353bf215546Sopenharmony_ci      } while (block_idx != pred && ctx.live_out[pred].count(copy.op.tempId()));
354bf215546Sopenharmony_ci      op_node.defined_at = block_idx;
355bf215546Sopenharmony_ci      op_node.value = copy.op;
356bf215546Sopenharmony_ci   }
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci   /* we can only coalesce copies of the same register class */
359bf215546Sopenharmony_ci   if (copy.op.regClass() != copy.def.regClass())
360bf215546Sopenharmony_ci      return false;
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   /* check if this operand has not yet been coalesced */
363bf215546Sopenharmony_ci   if (op_node.index == -1u) {
364bf215546Sopenharmony_ci      merge_set op_set = merge_set{copy.op.getTemp()};
365bf215546Sopenharmony_ci      return try_merge_merge_set(ctx, copy.def.getTemp(), op_set);
366bf215546Sopenharmony_ci   }
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci   /* check if this operand has been coalesced into the same set */
369bf215546Sopenharmony_ci   assert(ctx.merge_node_table.count(copy.def.tempId()));
370bf215546Sopenharmony_ci   if (op_node.index == ctx.merge_node_table[copy.def.tempId()].index)
371bf215546Sopenharmony_ci      return true;
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci   /* otherwise, try to coalesce both merge sets */
374bf215546Sopenharmony_ci   return try_merge_merge_set(ctx, copy.def.getTemp(), ctx.merge_sets[op_node.index]);
375bf215546Sopenharmony_ci}
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci/* node in the location-transfer-graph */
378bf215546Sopenharmony_cistruct ltg_node {
379bf215546Sopenharmony_ci   copy cp;
380bf215546Sopenharmony_ci   uint32_t read_idx;
381bf215546Sopenharmony_ci   uint32_t num_uses = 0;
382bf215546Sopenharmony_ci};
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci/* emit the copies in an order that does not
385bf215546Sopenharmony_ci * create interferences within a merge-set */
386bf215546Sopenharmony_civoid
387bf215546Sopenharmony_ciemit_copies_block(Builder& bld, std::map<uint32_t, ltg_node>& ltg, RegType type)
388bf215546Sopenharmony_ci{
389bf215546Sopenharmony_ci   auto&& it = ltg.begin();
390bf215546Sopenharmony_ci   while (it != ltg.end()) {
391bf215546Sopenharmony_ci      const copy& cp = it->second.cp;
392bf215546Sopenharmony_ci      /* wrong regclass or still needed as operand */
393bf215546Sopenharmony_ci      if (cp.def.regClass().type() != type || it->second.num_uses > 0) {
394bf215546Sopenharmony_ci         ++it;
395bf215546Sopenharmony_ci         continue;
396bf215546Sopenharmony_ci      }
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci      /* emit the copy */
399bf215546Sopenharmony_ci      bld.copy(cp.def, it->second.cp.op);
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci      /* update the location transfer graph */
402bf215546Sopenharmony_ci      if (it->second.read_idx != -1u) {
403bf215546Sopenharmony_ci         auto&& other = ltg.find(it->second.read_idx);
404bf215546Sopenharmony_ci         if (other != ltg.end())
405bf215546Sopenharmony_ci            other->second.num_uses--;
406bf215546Sopenharmony_ci      }
407bf215546Sopenharmony_ci      ltg.erase(it);
408bf215546Sopenharmony_ci      it = ltg.begin();
409bf215546Sopenharmony_ci   }
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci   /* count the number of remaining circular dependencies */
412bf215546Sopenharmony_ci   unsigned num = std::count_if(ltg.begin(), ltg.end(),
413bf215546Sopenharmony_ci                                [&](auto& n) { return n.second.cp.def.regClass().type() == type; });
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci   /* if there are circular dependencies, we just emit them as single parallelcopy */
416bf215546Sopenharmony_ci   if (num) {
417bf215546Sopenharmony_ci      // TODO: this should be restricted to a feasible number of registers
418bf215546Sopenharmony_ci      // and otherwise use a temporary to avoid having to reload more (spilled)
419bf215546Sopenharmony_ci      // variables than we have registers.
420bf215546Sopenharmony_ci      aco_ptr<Pseudo_instruction> copy{create_instruction<Pseudo_instruction>(
421bf215546Sopenharmony_ci         aco_opcode::p_parallelcopy, Format::PSEUDO, num, num)};
422bf215546Sopenharmony_ci      it = ltg.begin();
423bf215546Sopenharmony_ci      for (unsigned i = 0; i < num; i++) {
424bf215546Sopenharmony_ci         while (it->second.cp.def.regClass().type() != type)
425bf215546Sopenharmony_ci            ++it;
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci         copy->definitions[i] = it->second.cp.def;
428bf215546Sopenharmony_ci         copy->operands[i] = it->second.cp.op;
429bf215546Sopenharmony_ci         it = ltg.erase(it);
430bf215546Sopenharmony_ci      }
431bf215546Sopenharmony_ci      bld.insert(std::move(copy));
432bf215546Sopenharmony_ci   }
433bf215546Sopenharmony_ci}
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci/* either emits or coalesces all parallelcopies and
436bf215546Sopenharmony_ci * renames the phi-operands accordingly. */
437bf215546Sopenharmony_civoid
438bf215546Sopenharmony_ciemit_parallelcopies(cssa_ctx& ctx)
439bf215546Sopenharmony_ci{
440bf215546Sopenharmony_ci   std::unordered_map<uint32_t, Operand> renames;
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci   /* we iterate backwards to prioritize coalescing in else-blocks */
443bf215546Sopenharmony_ci   for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) {
444bf215546Sopenharmony_ci      if (ctx.parallelcopies[i].empty())
445bf215546Sopenharmony_ci         continue;
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci      std::map<uint32_t, ltg_node> ltg;
448bf215546Sopenharmony_ci      bool has_vgpr_copy = false;
449bf215546Sopenharmony_ci      bool has_sgpr_copy = false;
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci      /* first, try to coalesce all parallelcopies */
452bf215546Sopenharmony_ci      for (const copy& cp : ctx.parallelcopies[i]) {
453bf215546Sopenharmony_ci         if (try_coalesce_copy(ctx, cp, i)) {
454bf215546Sopenharmony_ci            renames.emplace(cp.def.tempId(), cp.op);
455bf215546Sopenharmony_ci            /* update liveness info */
456bf215546Sopenharmony_ci            ctx.live_out[i].erase(cp.def.tempId());
457bf215546Sopenharmony_ci            ctx.live_out[i].insert(cp.op.tempId());
458bf215546Sopenharmony_ci         } else {
459bf215546Sopenharmony_ci            uint32_t read_idx = -1u;
460bf215546Sopenharmony_ci            if (cp.op.isTemp())
461bf215546Sopenharmony_ci               read_idx = ctx.merge_node_table[cp.op.tempId()].index;
462bf215546Sopenharmony_ci            uint32_t write_idx = ctx.merge_node_table[cp.def.tempId()].index;
463bf215546Sopenharmony_ci            assert(write_idx != -1u);
464bf215546Sopenharmony_ci            ltg[write_idx] = {cp, read_idx};
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci            bool is_vgpr = cp.def.regClass().type() == RegType::vgpr;
467bf215546Sopenharmony_ci            has_vgpr_copy |= is_vgpr;
468bf215546Sopenharmony_ci            has_sgpr_copy |= !is_vgpr;
469bf215546Sopenharmony_ci         }
470bf215546Sopenharmony_ci      }
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci      /* build location-transfer-graph */
473bf215546Sopenharmony_ci      for (auto& pair : ltg) {
474bf215546Sopenharmony_ci         if (pair.second.read_idx == -1u)
475bf215546Sopenharmony_ci            continue;
476bf215546Sopenharmony_ci         auto&& it = ltg.find(pair.second.read_idx);
477bf215546Sopenharmony_ci         if (it != ltg.end())
478bf215546Sopenharmony_ci            it->second.num_uses++;
479bf215546Sopenharmony_ci      }
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci      /* emit parallelcopies ordered */
482bf215546Sopenharmony_ci      Builder bld(ctx.program);
483bf215546Sopenharmony_ci      Block& block = ctx.program->blocks[i];
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci      if (has_vgpr_copy) {
486bf215546Sopenharmony_ci         /* emit VGPR copies */
487bf215546Sopenharmony_ci         auto IsLogicalEnd = [](const aco_ptr<Instruction>& inst) -> bool
488bf215546Sopenharmony_ci         { return inst->opcode == aco_opcode::p_logical_end; };
489bf215546Sopenharmony_ci         auto it =
490bf215546Sopenharmony_ci            std::find_if(block.instructions.rbegin(), block.instructions.rend(), IsLogicalEnd);
491bf215546Sopenharmony_ci         bld.reset(&block.instructions, std::prev(it.base()));
492bf215546Sopenharmony_ci         emit_copies_block(bld, ltg, RegType::vgpr);
493bf215546Sopenharmony_ci      }
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci      if (has_sgpr_copy) {
496bf215546Sopenharmony_ci         /* emit SGPR copies */
497bf215546Sopenharmony_ci         aco_ptr<Instruction> branch = std::move(block.instructions.back());
498bf215546Sopenharmony_ci         block.instructions.pop_back();
499bf215546Sopenharmony_ci         bld.reset(&block.instructions);
500bf215546Sopenharmony_ci         emit_copies_block(bld, ltg, RegType::sgpr);
501bf215546Sopenharmony_ci         bld.insert(std::move(branch));
502bf215546Sopenharmony_ci      }
503bf215546Sopenharmony_ci   }
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci   /* finally, rename coalesced phi operands */
506bf215546Sopenharmony_ci   for (Block& block : ctx.program->blocks) {
507bf215546Sopenharmony_ci      for (aco_ptr<Instruction>& phi : block.instructions) {
508bf215546Sopenharmony_ci         if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi)
509bf215546Sopenharmony_ci            break;
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci         for (Operand& op : phi->operands) {
512bf215546Sopenharmony_ci            if (!op.isTemp())
513bf215546Sopenharmony_ci               continue;
514bf215546Sopenharmony_ci            auto&& it = renames.find(op.tempId());
515bf215546Sopenharmony_ci            if (it != renames.end()) {
516bf215546Sopenharmony_ci               op = it->second;
517bf215546Sopenharmony_ci               renames.erase(it);
518bf215546Sopenharmony_ci            }
519bf215546Sopenharmony_ci         }
520bf215546Sopenharmony_ci      }
521bf215546Sopenharmony_ci   }
522bf215546Sopenharmony_ci   assert(renames.empty());
523bf215546Sopenharmony_ci}
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci} /* end namespace */
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_civoid
528bf215546Sopenharmony_cilower_to_cssa(Program* program, live& live_vars)
529bf215546Sopenharmony_ci{
530bf215546Sopenharmony_ci   reindex_ssa(program, live_vars.live_out);
531bf215546Sopenharmony_ci   cssa_ctx ctx = {program, live_vars.live_out};
532bf215546Sopenharmony_ci   collect_parallelcopies(ctx);
533bf215546Sopenharmony_ci   emit_parallelcopies(ctx);
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci   /* update live variable information */
536bf215546Sopenharmony_ci   live_vars = live_var_analysis(program);
537bf215546Sopenharmony_ci}
538bf215546Sopenharmony_ci} // namespace aco
539