1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "aco_builder.h" 26bf215546Sopenharmony_ci#include "aco_ir.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include <algorithm> 29bf215546Sopenharmony_ci#include <map> 30bf215546Sopenharmony_ci#include <unordered_map> 31bf215546Sopenharmony_ci#include <vector> 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci/* 34bf215546Sopenharmony_ci * Implements an algorithm to lower to Concentional SSA Form (CSSA). 35bf215546Sopenharmony_ci * After "Revisiting Out-of-SSA Translation for Correctness, CodeQuality, and Efficiency" 36bf215546Sopenharmony_ci * by B. Boissinot, A. Darte, F. Rastello, B. Dupont de Dinechin, C. Guillon, 37bf215546Sopenharmony_ci * 38bf215546Sopenharmony_ci * By lowering the IR to CSSA, the insertion of parallelcopies is separated from 39bf215546Sopenharmony_ci * the register coalescing problem. Additionally, correctness is ensured w.r.t. spilling. 40bf215546Sopenharmony_ci * The algorithm coalesces non-interfering phi-resources while taking value-equality 41bf215546Sopenharmony_ci * into account. Re-indexes the SSA-defs. 42bf215546Sopenharmony_ci */ 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_cinamespace aco { 45bf215546Sopenharmony_cinamespace { 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_citypedef std::vector<Temp> merge_set; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_cistruct copy { 50bf215546Sopenharmony_ci Definition def; 51bf215546Sopenharmony_ci Operand op; 52bf215546Sopenharmony_ci}; 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_cistruct merge_node { 55bf215546Sopenharmony_ci Operand value = Operand(); /* original value: can be an SSA-def or constant value */ 56bf215546Sopenharmony_ci uint32_t index = -1u; /* index into the vector of merge sets */ 57bf215546Sopenharmony_ci uint32_t defined_at = -1u; /* defining block */ 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci /* we also remember two dominating defs with the same value: */ 60bf215546Sopenharmony_ci Temp equal_anc_in = Temp(); /* within the same merge set */ 61bf215546Sopenharmony_ci Temp equal_anc_out = Temp(); /* from a different set */ 62bf215546Sopenharmony_ci}; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_cistruct cssa_ctx { 65bf215546Sopenharmony_ci Program* program; 66bf215546Sopenharmony_ci std::vector<IDSet>& live_out; /* live-out sets per block */ 67bf215546Sopenharmony_ci std::vector<std::vector<copy>> parallelcopies; /* copies per block */ 68bf215546Sopenharmony_ci std::vector<merge_set> merge_sets; /* each vector is one (ordered) merge set */ 69bf215546Sopenharmony_ci std::unordered_map<uint32_t, merge_node> merge_node_table; /* tempid -> merge node */ 70bf215546Sopenharmony_ci}; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci/* create (virtual) parallelcopies for each phi instruction and 73bf215546Sopenharmony_ci * already merge copy-definitions with phi-defs into merge sets */ 74bf215546Sopenharmony_civoid 75bf215546Sopenharmony_cicollect_parallelcopies(cssa_ctx& ctx) 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci ctx.parallelcopies.resize(ctx.program->blocks.size()); 78bf215546Sopenharmony_ci Builder bld(ctx.program); 79bf215546Sopenharmony_ci for (Block& block : ctx.program->blocks) { 80bf215546Sopenharmony_ci for (aco_ptr<Instruction>& phi : block.instructions) { 81bf215546Sopenharmony_ci if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi) 82bf215546Sopenharmony_ci break; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci const Definition& def = phi->definitions[0]; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci /* if the definition is not temp, it is the exec mask. 87bf215546Sopenharmony_ci * We can reload the exec mask directly from the spill slot. 88bf215546Sopenharmony_ci */ 89bf215546Sopenharmony_ci if (!def.isTemp()) 90bf215546Sopenharmony_ci continue; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci std::vector<unsigned>& preds = 93bf215546Sopenharmony_ci phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds; 94bf215546Sopenharmony_ci uint32_t index = ctx.merge_sets.size(); 95bf215546Sopenharmony_ci merge_set set; 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci bool has_preheader_copy = false; 98bf215546Sopenharmony_ci for (unsigned i = 0; i < phi->operands.size(); i++) { 99bf215546Sopenharmony_ci Operand op = phi->operands[i]; 100bf215546Sopenharmony_ci if (op.isUndefined()) 101bf215546Sopenharmony_ci continue; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci if (def.regClass().type() == RegType::sgpr && !op.isTemp()) { 104bf215546Sopenharmony_ci /* SGPR inline constants and literals on GFX10+ can be spilled 105bf215546Sopenharmony_ci * and reloaded directly (without intermediate register) */ 106bf215546Sopenharmony_ci if (op.isConstant()) { 107bf215546Sopenharmony_ci if (ctx.program->gfx_level >= GFX10) 108bf215546Sopenharmony_ci continue; 109bf215546Sopenharmony_ci if (op.size() == 1 && !op.isLiteral()) 110bf215546Sopenharmony_ci continue; 111bf215546Sopenharmony_ci } else { 112bf215546Sopenharmony_ci assert(op.isFixed() && op.physReg() == exec); 113bf215546Sopenharmony_ci continue; 114bf215546Sopenharmony_ci } 115bf215546Sopenharmony_ci } 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci /* create new temporary and rename operands */ 118bf215546Sopenharmony_ci Temp tmp = bld.tmp(def.regClass()); 119bf215546Sopenharmony_ci ctx.parallelcopies[preds[i]].emplace_back(copy{Definition(tmp), op}); 120bf215546Sopenharmony_ci phi->operands[i] = Operand(tmp); 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci /* place the new operands in the same merge set */ 123bf215546Sopenharmony_ci set.emplace_back(tmp); 124bf215546Sopenharmony_ci ctx.merge_node_table[tmp.id()] = {op, index, preds[i]}; 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci /* update the liveness information */ 127bf215546Sopenharmony_ci if (op.isKill()) 128bf215546Sopenharmony_ci ctx.live_out[preds[i]].erase(op.tempId()); 129bf215546Sopenharmony_ci ctx.live_out[preds[i]].insert(tmp.id()); 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci has_preheader_copy |= i == 0 && block.kind & block_kind_loop_header; 132bf215546Sopenharmony_ci } 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci if (set.empty()) 135bf215546Sopenharmony_ci continue; 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci /* place the definition in dominance-order */ 138bf215546Sopenharmony_ci if (def.isTemp()) { 139bf215546Sopenharmony_ci if (has_preheader_copy) 140bf215546Sopenharmony_ci set.emplace(std::next(set.begin()), def.getTemp()); 141bf215546Sopenharmony_ci else if (block.kind & block_kind_loop_header) 142bf215546Sopenharmony_ci set.emplace(set.begin(), def.getTemp()); 143bf215546Sopenharmony_ci else 144bf215546Sopenharmony_ci set.emplace_back(def.getTemp()); 145bf215546Sopenharmony_ci ctx.merge_node_table[def.tempId()] = {Operand(def.getTemp()), index, block.index}; 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci ctx.merge_sets.emplace_back(set); 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci} 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci/* check whether the definition of a comes after b. */ 153bf215546Sopenharmony_ciinline bool 154bf215546Sopenharmony_cidefined_after(cssa_ctx& ctx, Temp a, Temp b) 155bf215546Sopenharmony_ci{ 156bf215546Sopenharmony_ci merge_node& node_a = ctx.merge_node_table[a.id()]; 157bf215546Sopenharmony_ci merge_node& node_b = ctx.merge_node_table[b.id()]; 158bf215546Sopenharmony_ci if (node_a.defined_at == node_b.defined_at) 159bf215546Sopenharmony_ci return a.id() > b.id(); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci return node_a.defined_at > node_b.defined_at; 162bf215546Sopenharmony_ci} 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci/* check whether a dominates b where b is defined after a */ 165bf215546Sopenharmony_ciinline bool 166bf215546Sopenharmony_cidominates(cssa_ctx& ctx, Temp a, Temp b) 167bf215546Sopenharmony_ci{ 168bf215546Sopenharmony_ci assert(defined_after(ctx, b, a)); 169bf215546Sopenharmony_ci merge_node& node_a = ctx.merge_node_table[a.id()]; 170bf215546Sopenharmony_ci merge_node& node_b = ctx.merge_node_table[b.id()]; 171bf215546Sopenharmony_ci unsigned idom = node_b.defined_at; 172bf215546Sopenharmony_ci while (idom > node_a.defined_at) 173bf215546Sopenharmony_ci idom = b.regClass().type() == RegType::vgpr ? ctx.program->blocks[idom].logical_idom 174bf215546Sopenharmony_ci : ctx.program->blocks[idom].linear_idom; 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci return idom == node_a.defined_at; 177bf215546Sopenharmony_ci} 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci/* check intersection between var and parent: 180bf215546Sopenharmony_ci * We already know that parent dominates var. */ 181bf215546Sopenharmony_ciinline bool 182bf215546Sopenharmony_ciintersects(cssa_ctx& ctx, Temp var, Temp parent) 183bf215546Sopenharmony_ci{ 184bf215546Sopenharmony_ci merge_node& node_var = ctx.merge_node_table[var.id()]; 185bf215546Sopenharmony_ci merge_node& node_parent = ctx.merge_node_table[parent.id()]; 186bf215546Sopenharmony_ci assert(node_var.index != node_parent.index); 187bf215546Sopenharmony_ci uint32_t block_idx = node_var.defined_at; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci /* if the parent is live-out at the definition block of var, they intersect */ 190bf215546Sopenharmony_ci bool parent_live = ctx.live_out[block_idx].count(parent.id()); 191bf215546Sopenharmony_ci if (parent_live) 192bf215546Sopenharmony_ci return true; 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci /* parent is defined in a different block than var */ 195bf215546Sopenharmony_ci if (node_parent.defined_at < node_var.defined_at) { 196bf215546Sopenharmony_ci /* if the parent is not live-in, they don't interfere */ 197bf215546Sopenharmony_ci std::vector<uint32_t>& preds = var.type() == RegType::vgpr 198bf215546Sopenharmony_ci ? ctx.program->blocks[block_idx].logical_preds 199bf215546Sopenharmony_ci : ctx.program->blocks[block_idx].linear_preds; 200bf215546Sopenharmony_ci for (uint32_t pred : preds) { 201bf215546Sopenharmony_ci if (!ctx.live_out[pred].count(parent.id())) 202bf215546Sopenharmony_ci return false; 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci for (const copy& cp : ctx.parallelcopies[block_idx]) { 207bf215546Sopenharmony_ci /* if var is defined at the edge, they don't intersect */ 208bf215546Sopenharmony_ci if (cp.def.getTemp() == var) 209bf215546Sopenharmony_ci return false; 210bf215546Sopenharmony_ci if (cp.op.isTemp() && cp.op.getTemp() == parent) 211bf215546Sopenharmony_ci parent_live = true; 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci /* if the parent is live at the edge, they intersect */ 214bf215546Sopenharmony_ci if (parent_live) 215bf215546Sopenharmony_ci return true; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci /* both, parent and var, are present in the same block */ 218bf215546Sopenharmony_ci const Block& block = ctx.program->blocks[block_idx]; 219bf215546Sopenharmony_ci for (auto it = block.instructions.crbegin(); it != block.instructions.crend(); ++it) { 220bf215546Sopenharmony_ci /* if the parent was not encountered yet, it can only be used by a phi */ 221bf215546Sopenharmony_ci if (is_phi(it->get())) 222bf215546Sopenharmony_ci break; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci for (const Definition& def : (*it)->definitions) { 225bf215546Sopenharmony_ci if (!def.isTemp()) 226bf215546Sopenharmony_ci continue; 227bf215546Sopenharmony_ci /* if parent was not found yet, they don't intersect */ 228bf215546Sopenharmony_ci if (def.getTemp() == var) 229bf215546Sopenharmony_ci return false; 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci for (const Operand& op : (*it)->operands) { 233bf215546Sopenharmony_ci if (!op.isTemp()) 234bf215546Sopenharmony_ci continue; 235bf215546Sopenharmony_ci /* if the var was defined before this point, they intersect */ 236bf215546Sopenharmony_ci if (op.getTemp() == parent) 237bf215546Sopenharmony_ci return true; 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci return false; 242bf215546Sopenharmony_ci} 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci/* check interference between var and parent: 245bf215546Sopenharmony_ci * i.e. they have different values and intersect. 246bf215546Sopenharmony_ci * If parent and var share the same value, also updates the equal ancestor. */ 247bf215546Sopenharmony_ciinline bool 248bf215546Sopenharmony_ciinterference(cssa_ctx& ctx, Temp var, Temp parent) 249bf215546Sopenharmony_ci{ 250bf215546Sopenharmony_ci assert(var != parent); 251bf215546Sopenharmony_ci merge_node& node_var = ctx.merge_node_table[var.id()]; 252bf215546Sopenharmony_ci node_var.equal_anc_out = Temp(); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci if (node_var.index == ctx.merge_node_table[parent.id()].index) { 255bf215546Sopenharmony_ci /* check/update in other set */ 256bf215546Sopenharmony_ci parent = ctx.merge_node_table[parent.id()].equal_anc_out; 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci Temp tmp = parent; 260bf215546Sopenharmony_ci /* check if var intersects with parent or any equal-valued ancestor */ 261bf215546Sopenharmony_ci while (tmp != Temp() && !intersects(ctx, var, tmp)) { 262bf215546Sopenharmony_ci merge_node& node_tmp = ctx.merge_node_table[tmp.id()]; 263bf215546Sopenharmony_ci tmp = node_tmp.equal_anc_in; 264bf215546Sopenharmony_ci } 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci /* no intersection found */ 267bf215546Sopenharmony_ci if (tmp == Temp()) 268bf215546Sopenharmony_ci return false; 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci /* var and parent, same value, but in different sets */ 271bf215546Sopenharmony_ci if (node_var.value == ctx.merge_node_table[parent.id()].value) { 272bf215546Sopenharmony_ci node_var.equal_anc_out = tmp; 273bf215546Sopenharmony_ci return false; 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci /* var and parent, different values and intersect */ 277bf215546Sopenharmony_ci return true; 278bf215546Sopenharmony_ci} 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci/* tries to merge set_b into set_a of given temporary and 281bf215546Sopenharmony_ci * drops that temporary as it is being coalesced */ 282bf215546Sopenharmony_cibool 283bf215546Sopenharmony_citry_merge_merge_set(cssa_ctx& ctx, Temp dst, merge_set& set_b) 284bf215546Sopenharmony_ci{ 285bf215546Sopenharmony_ci auto def_node_it = ctx.merge_node_table.find(dst.id()); 286bf215546Sopenharmony_ci uint32_t index = def_node_it->second.index; 287bf215546Sopenharmony_ci merge_set& set_a = ctx.merge_sets[index]; 288bf215546Sopenharmony_ci std::vector<Temp> dom; /* stack of the traversal */ 289bf215546Sopenharmony_ci merge_set union_set; /* the new merged merge-set */ 290bf215546Sopenharmony_ci uint32_t i_a = 0; 291bf215546Sopenharmony_ci uint32_t i_b = 0; 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci while (i_a < set_a.size() || i_b < set_b.size()) { 294bf215546Sopenharmony_ci Temp current; 295bf215546Sopenharmony_ci if (i_a == set_a.size()) 296bf215546Sopenharmony_ci current = set_b[i_b++]; 297bf215546Sopenharmony_ci else if (i_b == set_b.size()) 298bf215546Sopenharmony_ci current = set_a[i_a++]; 299bf215546Sopenharmony_ci /* else pick the one defined first */ 300bf215546Sopenharmony_ci else if (defined_after(ctx, set_a[i_a], set_b[i_b])) 301bf215546Sopenharmony_ci current = set_b[i_b++]; 302bf215546Sopenharmony_ci else 303bf215546Sopenharmony_ci current = set_a[i_a++]; 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci while (!dom.empty() && !dominates(ctx, dom.back(), current)) 306bf215546Sopenharmony_ci dom.pop_back(); /* not the desired parent, remove */ 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci if (!dom.empty() && interference(ctx, current, dom.back())) 309bf215546Sopenharmony_ci return false; /* intersection detected */ 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci dom.emplace_back(current); /* otherwise, keep checking */ 312bf215546Sopenharmony_ci if (current != dst) 313bf215546Sopenharmony_ci union_set.emplace_back(current); /* maintain the new merge-set sorted */ 314bf215546Sopenharmony_ci } 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci /* update hashmap */ 317bf215546Sopenharmony_ci for (Temp t : union_set) { 318bf215546Sopenharmony_ci merge_node& node = ctx.merge_node_table[t.id()]; 319bf215546Sopenharmony_ci /* update the equal ancestors: 320bf215546Sopenharmony_ci * i.e. the 'closest' dominating def with the same value */ 321bf215546Sopenharmony_ci Temp in = node.equal_anc_in; 322bf215546Sopenharmony_ci Temp out = node.equal_anc_out; 323bf215546Sopenharmony_ci if (in == Temp() || (out != Temp() && defined_after(ctx, out, in))) 324bf215546Sopenharmony_ci node.equal_anc_in = out; 325bf215546Sopenharmony_ci node.equal_anc_out = Temp(); 326bf215546Sopenharmony_ci /* update merge-set index */ 327bf215546Sopenharmony_ci node.index = index; 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci set_b = merge_set(); /* free the old set_b */ 330bf215546Sopenharmony_ci ctx.merge_sets[index] = union_set; 331bf215546Sopenharmony_ci ctx.merge_node_table.erase(dst.id()); /* remove the temporary */ 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci return true; 334bf215546Sopenharmony_ci} 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci/* returns true if the copy can safely be omitted */ 337bf215546Sopenharmony_cibool 338bf215546Sopenharmony_citry_coalesce_copy(cssa_ctx& ctx, copy copy, uint32_t block_idx) 339bf215546Sopenharmony_ci{ 340bf215546Sopenharmony_ci /* we can only coalesce temporaries */ 341bf215546Sopenharmony_ci if (!copy.op.isTemp()) 342bf215546Sopenharmony_ci return false; 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci /* try emplace a merge_node for the copy operand */ 345bf215546Sopenharmony_ci merge_node& op_node = ctx.merge_node_table[copy.op.tempId()]; 346bf215546Sopenharmony_ci if (op_node.defined_at == -1u) { 347bf215546Sopenharmony_ci /* find defining block of operand */ 348bf215546Sopenharmony_ci uint32_t pred = block_idx; 349bf215546Sopenharmony_ci do { 350bf215546Sopenharmony_ci block_idx = pred; 351bf215546Sopenharmony_ci pred = copy.op.regClass().type() == RegType::vgpr ? ctx.program->blocks[pred].logical_idom 352bf215546Sopenharmony_ci : ctx.program->blocks[pred].linear_idom; 353bf215546Sopenharmony_ci } while (block_idx != pred && ctx.live_out[pred].count(copy.op.tempId())); 354bf215546Sopenharmony_ci op_node.defined_at = block_idx; 355bf215546Sopenharmony_ci op_node.value = copy.op; 356bf215546Sopenharmony_ci } 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci /* we can only coalesce copies of the same register class */ 359bf215546Sopenharmony_ci if (copy.op.regClass() != copy.def.regClass()) 360bf215546Sopenharmony_ci return false; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci /* check if this operand has not yet been coalesced */ 363bf215546Sopenharmony_ci if (op_node.index == -1u) { 364bf215546Sopenharmony_ci merge_set op_set = merge_set{copy.op.getTemp()}; 365bf215546Sopenharmony_ci return try_merge_merge_set(ctx, copy.def.getTemp(), op_set); 366bf215546Sopenharmony_ci } 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci /* check if this operand has been coalesced into the same set */ 369bf215546Sopenharmony_ci assert(ctx.merge_node_table.count(copy.def.tempId())); 370bf215546Sopenharmony_ci if (op_node.index == ctx.merge_node_table[copy.def.tempId()].index) 371bf215546Sopenharmony_ci return true; 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci /* otherwise, try to coalesce both merge sets */ 374bf215546Sopenharmony_ci return try_merge_merge_set(ctx, copy.def.getTemp(), ctx.merge_sets[op_node.index]); 375bf215546Sopenharmony_ci} 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci/* node in the location-transfer-graph */ 378bf215546Sopenharmony_cistruct ltg_node { 379bf215546Sopenharmony_ci copy cp; 380bf215546Sopenharmony_ci uint32_t read_idx; 381bf215546Sopenharmony_ci uint32_t num_uses = 0; 382bf215546Sopenharmony_ci}; 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci/* emit the copies in an order that does not 385bf215546Sopenharmony_ci * create interferences within a merge-set */ 386bf215546Sopenharmony_civoid 387bf215546Sopenharmony_ciemit_copies_block(Builder& bld, std::map<uint32_t, ltg_node>& ltg, RegType type) 388bf215546Sopenharmony_ci{ 389bf215546Sopenharmony_ci auto&& it = ltg.begin(); 390bf215546Sopenharmony_ci while (it != ltg.end()) { 391bf215546Sopenharmony_ci const copy& cp = it->second.cp; 392bf215546Sopenharmony_ci /* wrong regclass or still needed as operand */ 393bf215546Sopenharmony_ci if (cp.def.regClass().type() != type || it->second.num_uses > 0) { 394bf215546Sopenharmony_ci ++it; 395bf215546Sopenharmony_ci continue; 396bf215546Sopenharmony_ci } 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci /* emit the copy */ 399bf215546Sopenharmony_ci bld.copy(cp.def, it->second.cp.op); 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci /* update the location transfer graph */ 402bf215546Sopenharmony_ci if (it->second.read_idx != -1u) { 403bf215546Sopenharmony_ci auto&& other = ltg.find(it->second.read_idx); 404bf215546Sopenharmony_ci if (other != ltg.end()) 405bf215546Sopenharmony_ci other->second.num_uses--; 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci ltg.erase(it); 408bf215546Sopenharmony_ci it = ltg.begin(); 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci /* count the number of remaining circular dependencies */ 412bf215546Sopenharmony_ci unsigned num = std::count_if(ltg.begin(), ltg.end(), 413bf215546Sopenharmony_ci [&](auto& n) { return n.second.cp.def.regClass().type() == type; }); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci /* if there are circular dependencies, we just emit them as single parallelcopy */ 416bf215546Sopenharmony_ci if (num) { 417bf215546Sopenharmony_ci // TODO: this should be restricted to a feasible number of registers 418bf215546Sopenharmony_ci // and otherwise use a temporary to avoid having to reload more (spilled) 419bf215546Sopenharmony_ci // variables than we have registers. 420bf215546Sopenharmony_ci aco_ptr<Pseudo_instruction> copy{create_instruction<Pseudo_instruction>( 421bf215546Sopenharmony_ci aco_opcode::p_parallelcopy, Format::PSEUDO, num, num)}; 422bf215546Sopenharmony_ci it = ltg.begin(); 423bf215546Sopenharmony_ci for (unsigned i = 0; i < num; i++) { 424bf215546Sopenharmony_ci while (it->second.cp.def.regClass().type() != type) 425bf215546Sopenharmony_ci ++it; 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci copy->definitions[i] = it->second.cp.def; 428bf215546Sopenharmony_ci copy->operands[i] = it->second.cp.op; 429bf215546Sopenharmony_ci it = ltg.erase(it); 430bf215546Sopenharmony_ci } 431bf215546Sopenharmony_ci bld.insert(std::move(copy)); 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci} 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci/* either emits or coalesces all parallelcopies and 436bf215546Sopenharmony_ci * renames the phi-operands accordingly. */ 437bf215546Sopenharmony_civoid 438bf215546Sopenharmony_ciemit_parallelcopies(cssa_ctx& ctx) 439bf215546Sopenharmony_ci{ 440bf215546Sopenharmony_ci std::unordered_map<uint32_t, Operand> renames; 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci /* we iterate backwards to prioritize coalescing in else-blocks */ 443bf215546Sopenharmony_ci for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) { 444bf215546Sopenharmony_ci if (ctx.parallelcopies[i].empty()) 445bf215546Sopenharmony_ci continue; 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci std::map<uint32_t, ltg_node> ltg; 448bf215546Sopenharmony_ci bool has_vgpr_copy = false; 449bf215546Sopenharmony_ci bool has_sgpr_copy = false; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci /* first, try to coalesce all parallelcopies */ 452bf215546Sopenharmony_ci for (const copy& cp : ctx.parallelcopies[i]) { 453bf215546Sopenharmony_ci if (try_coalesce_copy(ctx, cp, i)) { 454bf215546Sopenharmony_ci renames.emplace(cp.def.tempId(), cp.op); 455bf215546Sopenharmony_ci /* update liveness info */ 456bf215546Sopenharmony_ci ctx.live_out[i].erase(cp.def.tempId()); 457bf215546Sopenharmony_ci ctx.live_out[i].insert(cp.op.tempId()); 458bf215546Sopenharmony_ci } else { 459bf215546Sopenharmony_ci uint32_t read_idx = -1u; 460bf215546Sopenharmony_ci if (cp.op.isTemp()) 461bf215546Sopenharmony_ci read_idx = ctx.merge_node_table[cp.op.tempId()].index; 462bf215546Sopenharmony_ci uint32_t write_idx = ctx.merge_node_table[cp.def.tempId()].index; 463bf215546Sopenharmony_ci assert(write_idx != -1u); 464bf215546Sopenharmony_ci ltg[write_idx] = {cp, read_idx}; 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci bool is_vgpr = cp.def.regClass().type() == RegType::vgpr; 467bf215546Sopenharmony_ci has_vgpr_copy |= is_vgpr; 468bf215546Sopenharmony_ci has_sgpr_copy |= !is_vgpr; 469bf215546Sopenharmony_ci } 470bf215546Sopenharmony_ci } 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci /* build location-transfer-graph */ 473bf215546Sopenharmony_ci for (auto& pair : ltg) { 474bf215546Sopenharmony_ci if (pair.second.read_idx == -1u) 475bf215546Sopenharmony_ci continue; 476bf215546Sopenharmony_ci auto&& it = ltg.find(pair.second.read_idx); 477bf215546Sopenharmony_ci if (it != ltg.end()) 478bf215546Sopenharmony_ci it->second.num_uses++; 479bf215546Sopenharmony_ci } 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci /* emit parallelcopies ordered */ 482bf215546Sopenharmony_ci Builder bld(ctx.program); 483bf215546Sopenharmony_ci Block& block = ctx.program->blocks[i]; 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci if (has_vgpr_copy) { 486bf215546Sopenharmony_ci /* emit VGPR copies */ 487bf215546Sopenharmony_ci auto IsLogicalEnd = [](const aco_ptr<Instruction>& inst) -> bool 488bf215546Sopenharmony_ci { return inst->opcode == aco_opcode::p_logical_end; }; 489bf215546Sopenharmony_ci auto it = 490bf215546Sopenharmony_ci std::find_if(block.instructions.rbegin(), block.instructions.rend(), IsLogicalEnd); 491bf215546Sopenharmony_ci bld.reset(&block.instructions, std::prev(it.base())); 492bf215546Sopenharmony_ci emit_copies_block(bld, ltg, RegType::vgpr); 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci if (has_sgpr_copy) { 496bf215546Sopenharmony_ci /* emit SGPR copies */ 497bf215546Sopenharmony_ci aco_ptr<Instruction> branch = std::move(block.instructions.back()); 498bf215546Sopenharmony_ci block.instructions.pop_back(); 499bf215546Sopenharmony_ci bld.reset(&block.instructions); 500bf215546Sopenharmony_ci emit_copies_block(bld, ltg, RegType::sgpr); 501bf215546Sopenharmony_ci bld.insert(std::move(branch)); 502bf215546Sopenharmony_ci } 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci /* finally, rename coalesced phi operands */ 506bf215546Sopenharmony_ci for (Block& block : ctx.program->blocks) { 507bf215546Sopenharmony_ci for (aco_ptr<Instruction>& phi : block.instructions) { 508bf215546Sopenharmony_ci if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi) 509bf215546Sopenharmony_ci break; 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci for (Operand& op : phi->operands) { 512bf215546Sopenharmony_ci if (!op.isTemp()) 513bf215546Sopenharmony_ci continue; 514bf215546Sopenharmony_ci auto&& it = renames.find(op.tempId()); 515bf215546Sopenharmony_ci if (it != renames.end()) { 516bf215546Sopenharmony_ci op = it->second; 517bf215546Sopenharmony_ci renames.erase(it); 518bf215546Sopenharmony_ci } 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci } 521bf215546Sopenharmony_ci } 522bf215546Sopenharmony_ci assert(renames.empty()); 523bf215546Sopenharmony_ci} 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci} /* end namespace */ 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_civoid 528bf215546Sopenharmony_cilower_to_cssa(Program* program, live& live_vars) 529bf215546Sopenharmony_ci{ 530bf215546Sopenharmony_ci reindex_ssa(program, live_vars.live_out); 531bf215546Sopenharmony_ci cssa_ctx ctx = {program, live_vars.live_out}; 532bf215546Sopenharmony_ci collect_parallelcopies(ctx); 533bf215546Sopenharmony_ci emit_parallelcopies(ctx); 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci /* update live variable information */ 536bf215546Sopenharmony_ci live_vars = live_var_analysis(program); 537bf215546Sopenharmony_ci} 538bf215546Sopenharmony_ci} // namespace aco 539