1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "aco_ir.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <algorithm> 28bf215546Sopenharmony_ci#include <array> 29bf215546Sopenharmony_ci#include <bitset> 30bf215546Sopenharmony_ci#include <map> 31bf215546Sopenharmony_ci#include <set> 32bf215546Sopenharmony_ci#include <unordered_map> 33bf215546Sopenharmony_ci#include <vector> 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cinamespace aco { 36bf215546Sopenharmony_cinamespace { 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cistruct ra_ctx; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ciunsigned get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, 41bf215546Sopenharmony_ci unsigned idx, RegClass rc); 42bf215546Sopenharmony_civoid add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte, 43bf215546Sopenharmony_ci RegClass rc); 44bf215546Sopenharmony_cistd::pair<unsigned, unsigned> 45bf215546Sopenharmony_ciget_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr, RegClass rc); 46bf215546Sopenharmony_civoid add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg); 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistruct assignment { 49bf215546Sopenharmony_ci PhysReg reg; 50bf215546Sopenharmony_ci RegClass rc; 51bf215546Sopenharmony_ci union { 52bf215546Sopenharmony_ci struct { 53bf215546Sopenharmony_ci bool assigned : 1; 54bf215546Sopenharmony_ci bool vcc : 1; 55bf215546Sopenharmony_ci }; 56bf215546Sopenharmony_ci uint8_t _ = 0; 57bf215546Sopenharmony_ci }; 58bf215546Sopenharmony_ci uint32_t affinity = 0; 59bf215546Sopenharmony_ci assignment() = default; 60bf215546Sopenharmony_ci assignment(PhysReg reg_, RegClass rc_) : reg(reg_), rc(rc_), assigned(-1) {} 61bf215546Sopenharmony_ci void set(const Definition& def) 62bf215546Sopenharmony_ci { 63bf215546Sopenharmony_ci assigned = true; 64bf215546Sopenharmony_ci reg = def.physReg(); 65bf215546Sopenharmony_ci rc = def.regClass(); 66bf215546Sopenharmony_ci } 67bf215546Sopenharmony_ci}; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_cistruct ra_ctx { 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci Program* program; 72bf215546Sopenharmony_ci Block* block = NULL; 73bf215546Sopenharmony_ci std::vector<assignment> assignments; 74bf215546Sopenharmony_ci std::vector<std::unordered_map<unsigned, Temp>> renames; 75bf215546Sopenharmony_ci std::vector<uint32_t> loop_header; 76bf215546Sopenharmony_ci std::unordered_map<unsigned, Temp> orig_names; 77bf215546Sopenharmony_ci std::unordered_map<unsigned, Instruction*> vectors; 78bf215546Sopenharmony_ci std::unordered_map<unsigned, Instruction*> split_vectors; 79bf215546Sopenharmony_ci aco_ptr<Instruction> pseudo_dummy; 80bf215546Sopenharmony_ci aco_ptr<Instruction> phi_dummy; 81bf215546Sopenharmony_ci uint16_t max_used_sgpr = 0; 82bf215546Sopenharmony_ci uint16_t max_used_vgpr = 0; 83bf215546Sopenharmony_ci uint16_t sgpr_limit; 84bf215546Sopenharmony_ci uint16_t vgpr_limit; 85bf215546Sopenharmony_ci std::bitset<512> war_hint; 86bf215546Sopenharmony_ci std::bitset<64> defs_done; /* see MAX_ARGS in aco_instruction_selection_setup.cpp */ 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci ra_test_policy policy; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci ra_ctx(Program* program_, ra_test_policy policy_) 91bf215546Sopenharmony_ci : program(program_), assignments(program->peekAllocationId()), 92bf215546Sopenharmony_ci renames(program->blocks.size()), policy(policy_) 93bf215546Sopenharmony_ci { 94bf215546Sopenharmony_ci pseudo_dummy.reset( 95bf215546Sopenharmony_ci create_instruction<Instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 0, 0)); 96bf215546Sopenharmony_ci phi_dummy.reset( 97bf215546Sopenharmony_ci create_instruction<Instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, 0, 0)); 98bf215546Sopenharmony_ci sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves); 99bf215546Sopenharmony_ci vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves); 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci}; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci/* Iterator type for making PhysRegInterval compatible with range-based for */ 104bf215546Sopenharmony_cistruct PhysRegIterator { 105bf215546Sopenharmony_ci using difference_type = int; 106bf215546Sopenharmony_ci using value_type = unsigned; 107bf215546Sopenharmony_ci using reference = const unsigned&; 108bf215546Sopenharmony_ci using pointer = const unsigned*; 109bf215546Sopenharmony_ci using iterator_category = std::bidirectional_iterator_tag; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci PhysReg reg; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci PhysReg operator*() const { return reg; } 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci PhysRegIterator& operator++() 116bf215546Sopenharmony_ci { 117bf215546Sopenharmony_ci reg.reg_b += 4; 118bf215546Sopenharmony_ci return *this; 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci PhysRegIterator& operator--() 122bf215546Sopenharmony_ci { 123bf215546Sopenharmony_ci reg.reg_b -= 4; 124bf215546Sopenharmony_ci return *this; 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci bool operator==(PhysRegIterator oth) const { return reg == oth.reg; } 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci bool operator!=(PhysRegIterator oth) const { return reg != oth.reg; } 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci bool operator<(PhysRegIterator oth) const { return reg < oth.reg; } 132bf215546Sopenharmony_ci}; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci/* Half-open register interval used in "sliding window"-style for-loops */ 135bf215546Sopenharmony_cistruct PhysRegInterval { 136bf215546Sopenharmony_ci PhysReg lo_; 137bf215546Sopenharmony_ci unsigned size; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci /* Inclusive lower bound */ 140bf215546Sopenharmony_ci PhysReg lo() const { return lo_; } 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci /* Exclusive upper bound */ 143bf215546Sopenharmony_ci PhysReg hi() const { return PhysReg{lo() + size}; } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci PhysRegInterval& operator+=(uint32_t stride) 146bf215546Sopenharmony_ci { 147bf215546Sopenharmony_ci lo_ = PhysReg{lo_.reg() + stride}; 148bf215546Sopenharmony_ci return *this; 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci bool operator!=(const PhysRegInterval& oth) const { return lo_ != oth.lo_ || size != oth.size; } 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci /* Construct a half-open interval, excluding the end register */ 154bf215546Sopenharmony_ci static PhysRegInterval from_until(PhysReg first, PhysReg end) { return {first, end - first}; } 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci bool contains(PhysReg reg) const { return lo() <= reg && reg < hi(); } 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci bool contains(const PhysRegInterval& needle) const 159bf215546Sopenharmony_ci { 160bf215546Sopenharmony_ci return needle.lo() >= lo() && needle.hi() <= hi(); 161bf215546Sopenharmony_ci } 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci PhysRegIterator begin() const { return {lo_}; } 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci PhysRegIterator end() const { return {PhysReg{lo_ + size}}; } 166bf215546Sopenharmony_ci}; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_cibool 169bf215546Sopenharmony_ciintersects(const PhysRegInterval& a, const PhysRegInterval& b) 170bf215546Sopenharmony_ci{ 171bf215546Sopenharmony_ci return a.hi() > b.lo() && b.hi() > a.lo(); 172bf215546Sopenharmony_ci} 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci/* Gets the stride for full (non-subdword) registers */ 175bf215546Sopenharmony_ciuint32_t 176bf215546Sopenharmony_ciget_stride(RegClass rc) 177bf215546Sopenharmony_ci{ 178bf215546Sopenharmony_ci if (rc.type() == RegType::vgpr) { 179bf215546Sopenharmony_ci return 1; 180bf215546Sopenharmony_ci } else { 181bf215546Sopenharmony_ci uint32_t size = rc.size(); 182bf215546Sopenharmony_ci if (size == 2) { 183bf215546Sopenharmony_ci return 2; 184bf215546Sopenharmony_ci } else if (size >= 4) { 185bf215546Sopenharmony_ci return 4; 186bf215546Sopenharmony_ci } else { 187bf215546Sopenharmony_ci return 1; 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci} 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ciPhysRegInterval 193bf215546Sopenharmony_ciget_reg_bounds(Program* program, RegType type) 194bf215546Sopenharmony_ci{ 195bf215546Sopenharmony_ci if (type == RegType::vgpr) { 196bf215546Sopenharmony_ci return {PhysReg{256}, (unsigned)program->max_reg_demand.vgpr}; 197bf215546Sopenharmony_ci } else { 198bf215546Sopenharmony_ci return {PhysReg{0}, (unsigned)program->max_reg_demand.sgpr}; 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci} 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_cistruct DefInfo { 203bf215546Sopenharmony_ci PhysRegInterval bounds; 204bf215546Sopenharmony_ci uint8_t size; 205bf215546Sopenharmony_ci uint8_t stride; 206bf215546Sopenharmony_ci RegClass rc; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci DefInfo(ra_ctx& ctx, aco_ptr<Instruction>& instr, RegClass rc_, int operand) : rc(rc_) 209bf215546Sopenharmony_ci { 210bf215546Sopenharmony_ci size = rc.size(); 211bf215546Sopenharmony_ci stride = get_stride(rc); 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci bounds = get_reg_bounds(ctx.program, rc.type()); 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci if (rc.is_subdword() && operand >= 0) { 216bf215546Sopenharmony_ci /* stride in bytes */ 217bf215546Sopenharmony_ci stride = get_subdword_operand_stride(ctx.program->gfx_level, instr, operand, rc); 218bf215546Sopenharmony_ci } else if (rc.is_subdword()) { 219bf215546Sopenharmony_ci std::pair<unsigned, unsigned> info = get_subdword_definition_info(ctx.program, instr, rc); 220bf215546Sopenharmony_ci stride = info.first; 221bf215546Sopenharmony_ci if (info.second > rc.bytes()) { 222bf215546Sopenharmony_ci rc = RegClass::get(rc.type(), info.second); 223bf215546Sopenharmony_ci size = rc.size(); 224bf215546Sopenharmony_ci /* we might still be able to put the definition in the high half, 225bf215546Sopenharmony_ci * but that's only useful for affinities and this information isn't 226bf215546Sopenharmony_ci * used for them */ 227bf215546Sopenharmony_ci stride = align(stride, info.second); 228bf215546Sopenharmony_ci if (!rc.is_subdword()) 229bf215546Sopenharmony_ci stride = DIV_ROUND_UP(stride, 4); 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci assert(stride > 0); 232bf215546Sopenharmony_ci } else if (instr->isMIMG() && instr->mimg().d16 && ctx.program->gfx_level <= GFX9) { 233bf215546Sopenharmony_ci /* Workaround GFX9 hardware bug for D16 image instructions: FeatureImageGather4D16Bug 234bf215546Sopenharmony_ci * 235bf215546Sopenharmony_ci * The register use is not calculated correctly, and the hardware assumes a 236bf215546Sopenharmony_ci * full dword per component. Don't use the last registers of the register file. 237bf215546Sopenharmony_ci * Otherwise, the instruction will be skipped. 238bf215546Sopenharmony_ci * 239bf215546Sopenharmony_ci * https://reviews.llvm.org/D81172 240bf215546Sopenharmony_ci */ 241bf215546Sopenharmony_ci bool imageGather4D16Bug = operand == -1 && rc == v2 && instr->mimg().dmask != 0xF; 242bf215546Sopenharmony_ci assert(ctx.program->gfx_level == GFX9 && "Image D16 on GFX8 not supported."); 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci if (imageGather4D16Bug) 245bf215546Sopenharmony_ci bounds.size -= rc.bytes() / 4; 246bf215546Sopenharmony_ci } 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci}; 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ciclass RegisterFile { 251bf215546Sopenharmony_cipublic: 252bf215546Sopenharmony_ci RegisterFile() { regs.fill(0); } 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci std::array<uint32_t, 512> regs; 255bf215546Sopenharmony_ci std::map<uint32_t, std::array<uint32_t, 4>> subdword_regs; 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci const uint32_t& operator[](PhysReg index) const { return regs[index]; } 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci uint32_t& operator[](PhysReg index) { return regs[index]; } 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci unsigned count_zero(PhysRegInterval reg_interval) 262bf215546Sopenharmony_ci { 263bf215546Sopenharmony_ci unsigned res = 0; 264bf215546Sopenharmony_ci for (PhysReg reg : reg_interval) 265bf215546Sopenharmony_ci res += !regs[reg]; 266bf215546Sopenharmony_ci return res; 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci /* Returns true if any of the bytes in the given range are allocated or blocked */ 270bf215546Sopenharmony_ci bool test(PhysReg start, unsigned num_bytes) 271bf215546Sopenharmony_ci { 272bf215546Sopenharmony_ci for (PhysReg i = start; i.reg_b < start.reg_b + num_bytes; i = PhysReg(i + 1)) { 273bf215546Sopenharmony_ci assert(i <= 511); 274bf215546Sopenharmony_ci if (regs[i] & 0x0FFFFFFF) 275bf215546Sopenharmony_ci return true; 276bf215546Sopenharmony_ci if (regs[i] == 0xF0000000) { 277bf215546Sopenharmony_ci assert(subdword_regs.find(i) != subdword_regs.end()); 278bf215546Sopenharmony_ci for (unsigned j = i.byte(); i * 4 + j < start.reg_b + num_bytes && j < 4; j++) { 279bf215546Sopenharmony_ci if (subdword_regs[i][j]) 280bf215546Sopenharmony_ci return true; 281bf215546Sopenharmony_ci } 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci return false; 285bf215546Sopenharmony_ci } 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci void block(PhysReg start, RegClass rc) 288bf215546Sopenharmony_ci { 289bf215546Sopenharmony_ci if (rc.is_subdword()) 290bf215546Sopenharmony_ci fill_subdword(start, rc.bytes(), 0xFFFFFFFF); 291bf215546Sopenharmony_ci else 292bf215546Sopenharmony_ci fill(start, rc.size(), 0xFFFFFFFF); 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci bool is_blocked(PhysReg start) 296bf215546Sopenharmony_ci { 297bf215546Sopenharmony_ci if (regs[start] == 0xFFFFFFFF) 298bf215546Sopenharmony_ci return true; 299bf215546Sopenharmony_ci if (regs[start] == 0xF0000000) { 300bf215546Sopenharmony_ci for (unsigned i = start.byte(); i < 4; i++) 301bf215546Sopenharmony_ci if (subdword_regs[start][i] == 0xFFFFFFFF) 302bf215546Sopenharmony_ci return true; 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci return false; 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci bool is_empty_or_blocked(PhysReg start) 308bf215546Sopenharmony_ci { 309bf215546Sopenharmony_ci /* Empty is 0, blocked is 0xFFFFFFFF, so to check both we compare the 310bf215546Sopenharmony_ci * incremented value to 1 */ 311bf215546Sopenharmony_ci if (regs[start] == 0xF0000000) { 312bf215546Sopenharmony_ci return subdword_regs[start][start.byte()] + 1 <= 1; 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci return regs[start] + 1 <= 1; 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci void clear(PhysReg start, RegClass rc) 318bf215546Sopenharmony_ci { 319bf215546Sopenharmony_ci if (rc.is_subdword()) 320bf215546Sopenharmony_ci fill_subdword(start, rc.bytes(), 0); 321bf215546Sopenharmony_ci else 322bf215546Sopenharmony_ci fill(start, rc.size(), 0); 323bf215546Sopenharmony_ci } 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci void fill(Operand op) 326bf215546Sopenharmony_ci { 327bf215546Sopenharmony_ci if (op.regClass().is_subdword()) 328bf215546Sopenharmony_ci fill_subdword(op.physReg(), op.bytes(), op.tempId()); 329bf215546Sopenharmony_ci else 330bf215546Sopenharmony_ci fill(op.physReg(), op.size(), op.tempId()); 331bf215546Sopenharmony_ci } 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci void clear(Operand op) { clear(op.physReg(), op.regClass()); } 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci void fill(Definition def) 336bf215546Sopenharmony_ci { 337bf215546Sopenharmony_ci if (def.regClass().is_subdword()) 338bf215546Sopenharmony_ci fill_subdword(def.physReg(), def.bytes(), def.tempId()); 339bf215546Sopenharmony_ci else 340bf215546Sopenharmony_ci fill(def.physReg(), def.size(), def.tempId()); 341bf215546Sopenharmony_ci } 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci void clear(Definition def) { clear(def.physReg(), def.regClass()); } 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci unsigned get_id(PhysReg reg) 346bf215546Sopenharmony_ci { 347bf215546Sopenharmony_ci return regs[reg] == 0xF0000000 ? subdword_regs[reg][reg.byte()] : regs[reg]; 348bf215546Sopenharmony_ci } 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ciprivate: 351bf215546Sopenharmony_ci void fill(PhysReg start, unsigned size, uint32_t val) 352bf215546Sopenharmony_ci { 353bf215546Sopenharmony_ci for (unsigned i = 0; i < size; i++) 354bf215546Sopenharmony_ci regs[start + i] = val; 355bf215546Sopenharmony_ci } 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci void fill_subdword(PhysReg start, unsigned num_bytes, uint32_t val) 358bf215546Sopenharmony_ci { 359bf215546Sopenharmony_ci fill(start, DIV_ROUND_UP(num_bytes, 4), 0xF0000000); 360bf215546Sopenharmony_ci for (PhysReg i = start; i.reg_b < start.reg_b + num_bytes; i = PhysReg(i + 1)) { 361bf215546Sopenharmony_ci /* emplace or get */ 362bf215546Sopenharmony_ci std::array<uint32_t, 4>& sub = 363bf215546Sopenharmony_ci subdword_regs.emplace(i, std::array<uint32_t, 4>{0, 0, 0, 0}).first->second; 364bf215546Sopenharmony_ci for (unsigned j = i.byte(); i * 4 + j < start.reg_b + num_bytes && j < 4; j++) 365bf215546Sopenharmony_ci sub[j] = val; 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci if (sub == std::array<uint32_t, 4>{0, 0, 0, 0}) { 368bf215546Sopenharmony_ci subdword_regs.erase(i); 369bf215546Sopenharmony_ci regs[i] = 0; 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci } 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci}; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_cistd::vector<unsigned> find_vars(ra_ctx& ctx, RegisterFile& reg_file, 376bf215546Sopenharmony_ci const PhysRegInterval reg_interval); 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci/* helper function for debugging */ 379bf215546Sopenharmony_ciUNUSED void 380bf215546Sopenharmony_ciprint_reg(const RegisterFile& reg_file, PhysReg reg, bool has_adjacent_variable) 381bf215546Sopenharmony_ci{ 382bf215546Sopenharmony_ci if (reg_file[reg] == 0xFFFFFFFF) { 383bf215546Sopenharmony_ci printf("☐"); 384bf215546Sopenharmony_ci } else if (reg_file[reg]) { 385bf215546Sopenharmony_ci const bool show_subdword_alloc = (reg_file[reg] == 0xF0000000); 386bf215546Sopenharmony_ci if (show_subdword_alloc) { 387bf215546Sopenharmony_ci const char* block_chars[] = { 388bf215546Sopenharmony_ci // clang-format off 389bf215546Sopenharmony_ci "?", "▘", "▝", "▀", 390bf215546Sopenharmony_ci "▖", "▌", "▞", "▛", 391bf215546Sopenharmony_ci "▗", "▚", "▐", "▜", 392bf215546Sopenharmony_ci "▄", "▙", "▟", "▉" 393bf215546Sopenharmony_ci // clang-format on 394bf215546Sopenharmony_ci }; 395bf215546Sopenharmony_ci unsigned index = 0; 396bf215546Sopenharmony_ci for (int i = 0; i < 4; ++i) { 397bf215546Sopenharmony_ci if (reg_file.subdword_regs.at(reg)[i]) { 398bf215546Sopenharmony_ci index |= 1 << i; 399bf215546Sopenharmony_ci } 400bf215546Sopenharmony_ci } 401bf215546Sopenharmony_ci printf("%s", block_chars[index]); 402bf215546Sopenharmony_ci } else { 403bf215546Sopenharmony_ci /* Indicate filled register slot */ 404bf215546Sopenharmony_ci if (!has_adjacent_variable) { 405bf215546Sopenharmony_ci printf("█"); 406bf215546Sopenharmony_ci } else { 407bf215546Sopenharmony_ci /* Use a slightly shorter box to leave a small gap between adjacent variables */ 408bf215546Sopenharmony_ci printf("▉"); 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci } else { 412bf215546Sopenharmony_ci printf("·"); 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci} 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci/* helper function for debugging */ 417bf215546Sopenharmony_ciUNUSED void 418bf215546Sopenharmony_ciprint_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file) 419bf215546Sopenharmony_ci{ 420bf215546Sopenharmony_ci PhysRegInterval regs = get_reg_bounds(ctx.program, vgprs ? RegType::vgpr : RegType::sgpr); 421bf215546Sopenharmony_ci char reg_char = vgprs ? 'v' : 's'; 422bf215546Sopenharmony_ci const int max_regs_per_line = 64; 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci /* print markers */ 425bf215546Sopenharmony_ci printf(" "); 426bf215546Sopenharmony_ci for (int i = 0; i < std::min<int>(max_regs_per_line, ROUND_DOWN_TO(regs.size, 4)); i += 4) { 427bf215546Sopenharmony_ci printf("%-3.2u ", i); 428bf215546Sopenharmony_ci } 429bf215546Sopenharmony_ci printf("\n"); 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci /* print usage */ 432bf215546Sopenharmony_ci auto line_begin_it = regs.begin(); 433bf215546Sopenharmony_ci while (line_begin_it != regs.end()) { 434bf215546Sopenharmony_ci const int regs_in_line = 435bf215546Sopenharmony_ci std::min<int>(max_regs_per_line, std::distance(line_begin_it, regs.end())); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci if (line_begin_it == regs.begin()) { 438bf215546Sopenharmony_ci printf("%cgprs: ", reg_char); 439bf215546Sopenharmony_ci } else { 440bf215546Sopenharmony_ci printf(" %+4d ", std::distance(regs.begin(), line_begin_it)); 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci const auto line_end_it = std::next(line_begin_it, regs_in_line); 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci for (auto reg_it = line_begin_it; reg_it != line_end_it; ++reg_it) { 445bf215546Sopenharmony_ci bool has_adjacent_variable = 446bf215546Sopenharmony_ci (std::next(reg_it) != line_end_it && 447bf215546Sopenharmony_ci reg_file[*reg_it] != reg_file[*std::next(reg_it)] && reg_file[*std::next(reg_it)]); 448bf215546Sopenharmony_ci print_reg(reg_file, *reg_it, has_adjacent_variable); 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci line_begin_it = line_end_it; 452bf215546Sopenharmony_ci printf("\n"); 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci const unsigned free_regs = 456bf215546Sopenharmony_ci std::count_if(regs.begin(), regs.end(), [&](auto reg) { return !reg_file[reg]; }); 457bf215546Sopenharmony_ci printf("%u/%u used, %u/%u free\n", regs.size - free_regs, regs.size, free_regs, regs.size); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci /* print assignments ordered by registers */ 460bf215546Sopenharmony_ci std::map<PhysReg, std::pair<unsigned, unsigned>> 461bf215546Sopenharmony_ci regs_to_vars; /* maps to byte size and temp id */ 462bf215546Sopenharmony_ci for (unsigned id : find_vars(ctx, reg_file, regs)) { 463bf215546Sopenharmony_ci const assignment& var = ctx.assignments[id]; 464bf215546Sopenharmony_ci PhysReg reg = var.reg; 465bf215546Sopenharmony_ci ASSERTED auto inserted = regs_to_vars.emplace(reg, std::make_pair(var.rc.bytes(), id)); 466bf215546Sopenharmony_ci assert(inserted.second); 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci for (const auto& reg_and_var : regs_to_vars) { 470bf215546Sopenharmony_ci const auto& first_reg = reg_and_var.first; 471bf215546Sopenharmony_ci const auto& size_id = reg_and_var.second; 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci printf("%%%u ", size_id.second); 474bf215546Sopenharmony_ci if (ctx.orig_names.count(size_id.second) && 475bf215546Sopenharmony_ci ctx.orig_names[size_id.second].id() != size_id.second) { 476bf215546Sopenharmony_ci printf("(was %%%d) ", ctx.orig_names[size_id.second].id()); 477bf215546Sopenharmony_ci } 478bf215546Sopenharmony_ci printf("= %c[%d", reg_char, first_reg.reg() - regs.lo()); 479bf215546Sopenharmony_ci PhysReg last_reg = first_reg.advance(size_id.first - 1); 480bf215546Sopenharmony_ci if (first_reg.reg() != last_reg.reg()) { 481bf215546Sopenharmony_ci assert(first_reg.byte() == 0 && last_reg.byte() == 3); 482bf215546Sopenharmony_ci printf("-%d", last_reg.reg() - regs.lo()); 483bf215546Sopenharmony_ci } 484bf215546Sopenharmony_ci printf("]"); 485bf215546Sopenharmony_ci if (first_reg.byte() != 0 || last_reg.byte() != 3) { 486bf215546Sopenharmony_ci printf("[%d:%d]", first_reg.byte() * 8, (last_reg.byte() + 1) * 8); 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci printf("\n"); 489bf215546Sopenharmony_ci } 490bf215546Sopenharmony_ci} 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ciunsigned 493bf215546Sopenharmony_ciget_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, 494bf215546Sopenharmony_ci unsigned idx, RegClass rc) 495bf215546Sopenharmony_ci{ 496bf215546Sopenharmony_ci if (instr->isPseudo()) { 497bf215546Sopenharmony_ci /* v_readfirstlane_b32 cannot use SDWA */ 498bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_as_uniform) 499bf215546Sopenharmony_ci return 4; 500bf215546Sopenharmony_ci else if (gfx_level >= GFX8) 501bf215546Sopenharmony_ci return rc.bytes() % 2 == 0 ? 2 : 1; 502bf215546Sopenharmony_ci else 503bf215546Sopenharmony_ci return 4; 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci assert(rc.bytes() <= 2); 507bf215546Sopenharmony_ci if (instr->isVALU()) { 508bf215546Sopenharmony_ci if (can_use_SDWA(gfx_level, instr, false)) 509bf215546Sopenharmony_ci return rc.bytes(); 510bf215546Sopenharmony_ci if (can_use_opsel(gfx_level, instr->opcode, idx)) 511bf215546Sopenharmony_ci return 2; 512bf215546Sopenharmony_ci if (instr->format == Format::VOP3P) 513bf215546Sopenharmony_ci return 2; 514bf215546Sopenharmony_ci } 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci switch (instr->opcode) { 517bf215546Sopenharmony_ci case aco_opcode::v_cvt_f32_ubyte0: return 1; 518bf215546Sopenharmony_ci case aco_opcode::ds_write_b8: 519bf215546Sopenharmony_ci case aco_opcode::ds_write_b16: return gfx_level >= GFX9 ? 2 : 4; 520bf215546Sopenharmony_ci case aco_opcode::buffer_store_byte: 521bf215546Sopenharmony_ci case aco_opcode::buffer_store_short: 522bf215546Sopenharmony_ci case aco_opcode::buffer_store_format_d16_x: 523bf215546Sopenharmony_ci case aco_opcode::flat_store_byte: 524bf215546Sopenharmony_ci case aco_opcode::flat_store_short: 525bf215546Sopenharmony_ci case aco_opcode::scratch_store_byte: 526bf215546Sopenharmony_ci case aco_opcode::scratch_store_short: 527bf215546Sopenharmony_ci case aco_opcode::global_store_byte: 528bf215546Sopenharmony_ci case aco_opcode::global_store_short: return gfx_level >= GFX9 ? 2 : 4; 529bf215546Sopenharmony_ci default: return 4; 530bf215546Sopenharmony_ci } 531bf215546Sopenharmony_ci} 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_civoid 534bf215546Sopenharmony_ciadd_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte, 535bf215546Sopenharmony_ci RegClass rc) 536bf215546Sopenharmony_ci{ 537bf215546Sopenharmony_ci amd_gfx_level gfx_level = ctx.program->gfx_level; 538bf215546Sopenharmony_ci if (instr->isPseudo() || byte == 0) 539bf215546Sopenharmony_ci return; 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_ci assert(rc.bytes() <= 2); 542bf215546Sopenharmony_ci if (instr->isVALU()) { 543bf215546Sopenharmony_ci /* check if we can use opsel */ 544bf215546Sopenharmony_ci if (instr->format == Format::VOP3) { 545bf215546Sopenharmony_ci assert(byte == 2); 546bf215546Sopenharmony_ci instr->vop3().opsel |= 1 << idx; 547bf215546Sopenharmony_ci return; 548bf215546Sopenharmony_ci } 549bf215546Sopenharmony_ci if (instr->isVOP3P()) { 550bf215546Sopenharmony_ci assert(byte == 2 && !(instr->vop3p().opsel_lo & (1 << idx))); 551bf215546Sopenharmony_ci instr->vop3p().opsel_lo |= 1 << idx; 552bf215546Sopenharmony_ci instr->vop3p().opsel_hi |= 1 << idx; 553bf215546Sopenharmony_ci return; 554bf215546Sopenharmony_ci } 555bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) { 556bf215546Sopenharmony_ci switch (byte) { 557bf215546Sopenharmony_ci case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break; 558bf215546Sopenharmony_ci case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break; 559bf215546Sopenharmony_ci case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break; 560bf215546Sopenharmony_ci case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break; 561bf215546Sopenharmony_ci } 562bf215546Sopenharmony_ci return; 563bf215546Sopenharmony_ci } 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci /* use SDWA */ 566bf215546Sopenharmony_ci assert(can_use_SDWA(gfx_level, instr, false)); 567bf215546Sopenharmony_ci convert_to_SDWA(gfx_level, instr); 568bf215546Sopenharmony_ci return; 569bf215546Sopenharmony_ci } 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci assert(byte == 2); 572bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::ds_write_b8) 573bf215546Sopenharmony_ci instr->opcode = aco_opcode::ds_write_b8_d16_hi; 574bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::ds_write_b16) 575bf215546Sopenharmony_ci instr->opcode = aco_opcode::ds_write_b16_d16_hi; 576bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::buffer_store_byte) 577bf215546Sopenharmony_ci instr->opcode = aco_opcode::buffer_store_byte_d16_hi; 578bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::buffer_store_short) 579bf215546Sopenharmony_ci instr->opcode = aco_opcode::buffer_store_short_d16_hi; 580bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::buffer_store_format_d16_x) 581bf215546Sopenharmony_ci instr->opcode = aco_opcode::buffer_store_format_d16_hi_x; 582bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::flat_store_byte) 583bf215546Sopenharmony_ci instr->opcode = aco_opcode::flat_store_byte_d16_hi; 584bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::flat_store_short) 585bf215546Sopenharmony_ci instr->opcode = aco_opcode::flat_store_short_d16_hi; 586bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::scratch_store_byte) 587bf215546Sopenharmony_ci instr->opcode = aco_opcode::scratch_store_byte_d16_hi; 588bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::scratch_store_short) 589bf215546Sopenharmony_ci instr->opcode = aco_opcode::scratch_store_short_d16_hi; 590bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::global_store_byte) 591bf215546Sopenharmony_ci instr->opcode = aco_opcode::global_store_byte_d16_hi; 592bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::global_store_short) 593bf215546Sopenharmony_ci instr->opcode = aco_opcode::global_store_short_d16_hi; 594bf215546Sopenharmony_ci else 595bf215546Sopenharmony_ci unreachable("Something went wrong: Impossible register assignment."); 596bf215546Sopenharmony_ci return; 597bf215546Sopenharmony_ci} 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci/* minimum_stride, bytes_written */ 600bf215546Sopenharmony_cistd::pair<unsigned, unsigned> 601bf215546Sopenharmony_ciget_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr, RegClass rc) 602bf215546Sopenharmony_ci{ 603bf215546Sopenharmony_ci amd_gfx_level gfx_level = program->gfx_level; 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci if (instr->isPseudo()) { 606bf215546Sopenharmony_ci if (gfx_level >= GFX8) 607bf215546Sopenharmony_ci return std::make_pair(rc.bytes() % 2 == 0 ? 2 : 1, rc.bytes()); 608bf215546Sopenharmony_ci else 609bf215546Sopenharmony_ci return std::make_pair(4, rc.size() * 4u); 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci if (instr->isVALU() || instr->isVINTRP()) { 613bf215546Sopenharmony_ci assert(rc.bytes() <= 2); 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci if (can_use_SDWA(gfx_level, instr, false)) 616bf215546Sopenharmony_ci return std::make_pair(rc.bytes(), rc.bytes()); 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci unsigned bytes_written = 4u; 619bf215546Sopenharmony_ci if (instr_is_16bit(gfx_level, instr->opcode)) 620bf215546Sopenharmony_ci bytes_written = 2u; 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci unsigned stride = 4u; 623bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_fma_mixlo_f16 || 624bf215546Sopenharmony_ci can_use_opsel(gfx_level, instr->opcode, -1)) 625bf215546Sopenharmony_ci stride = 2u; 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci return std::make_pair(stride, bytes_written); 628bf215546Sopenharmony_ci } 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci switch (instr->opcode) { 631bf215546Sopenharmony_ci /* D16 loads with _hi version */ 632bf215546Sopenharmony_ci case aco_opcode::ds_read_u8_d16: 633bf215546Sopenharmony_ci case aco_opcode::ds_read_i8_d16: 634bf215546Sopenharmony_ci case aco_opcode::ds_read_u16_d16: 635bf215546Sopenharmony_ci case aco_opcode::flat_load_ubyte_d16: 636bf215546Sopenharmony_ci case aco_opcode::flat_load_sbyte_d16: 637bf215546Sopenharmony_ci case aco_opcode::flat_load_short_d16: 638bf215546Sopenharmony_ci case aco_opcode::global_load_ubyte_d16: 639bf215546Sopenharmony_ci case aco_opcode::global_load_sbyte_d16: 640bf215546Sopenharmony_ci case aco_opcode::global_load_short_d16: 641bf215546Sopenharmony_ci case aco_opcode::scratch_load_ubyte_d16: 642bf215546Sopenharmony_ci case aco_opcode::scratch_load_sbyte_d16: 643bf215546Sopenharmony_ci case aco_opcode::scratch_load_short_d16: 644bf215546Sopenharmony_ci case aco_opcode::buffer_load_ubyte_d16: 645bf215546Sopenharmony_ci case aco_opcode::buffer_load_sbyte_d16: 646bf215546Sopenharmony_ci case aco_opcode::buffer_load_short_d16: 647bf215546Sopenharmony_ci case aco_opcode::buffer_load_format_d16_x: { 648bf215546Sopenharmony_ci assert(gfx_level >= GFX9); 649bf215546Sopenharmony_ci if (!program->dev.sram_ecc_enabled) 650bf215546Sopenharmony_ci return std::make_pair(2u, 2u); 651bf215546Sopenharmony_ci else 652bf215546Sopenharmony_ci return std::make_pair(2u, 4u); 653bf215546Sopenharmony_ci } 654bf215546Sopenharmony_ci /* 3-component D16 loads */ 655bf215546Sopenharmony_ci case aco_opcode::buffer_load_format_d16_xyz: 656bf215546Sopenharmony_ci case aco_opcode::tbuffer_load_format_d16_xyz: { 657bf215546Sopenharmony_ci assert(gfx_level >= GFX9); 658bf215546Sopenharmony_ci if (!program->dev.sram_ecc_enabled) 659bf215546Sopenharmony_ci return std::make_pair(4u, 6u); 660bf215546Sopenharmony_ci break; 661bf215546Sopenharmony_ci } 662bf215546Sopenharmony_ci 663bf215546Sopenharmony_ci default: break; 664bf215546Sopenharmony_ci } 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci if (instr->isMIMG() && instr->mimg().d16 && !program->dev.sram_ecc_enabled) { 667bf215546Sopenharmony_ci assert(gfx_level >= GFX9); 668bf215546Sopenharmony_ci return std::make_pair(4u, rc.bytes()); 669bf215546Sopenharmony_ci } 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci return std::make_pair(4, rc.size() * 4u); 672bf215546Sopenharmony_ci} 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_civoid 675bf215546Sopenharmony_ciadd_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg) 676bf215546Sopenharmony_ci{ 677bf215546Sopenharmony_ci if (instr->isPseudo()) 678bf215546Sopenharmony_ci return; 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci if (instr->isVALU()) { 681bf215546Sopenharmony_ci amd_gfx_level gfx_level = program->gfx_level; 682bf215546Sopenharmony_ci assert(instr->definitions[0].bytes() <= 2); 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_ci if (reg.byte() == 0 && instr_is_16bit(gfx_level, instr->opcode)) 685bf215546Sopenharmony_ci return; 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci /* check if we can use opsel */ 688bf215546Sopenharmony_ci if (instr->format == Format::VOP3) { 689bf215546Sopenharmony_ci assert(reg.byte() == 2); 690bf215546Sopenharmony_ci assert(can_use_opsel(gfx_level, instr->opcode, -1)); 691bf215546Sopenharmony_ci instr->vop3().opsel |= (1 << 3); /* dst in high half */ 692bf215546Sopenharmony_ci return; 693bf215546Sopenharmony_ci } 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_fma_mixlo_f16) { 696bf215546Sopenharmony_ci instr->opcode = aco_opcode::v_fma_mixhi_f16; 697bf215546Sopenharmony_ci return; 698bf215546Sopenharmony_ci } 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci /* use SDWA */ 701bf215546Sopenharmony_ci assert(can_use_SDWA(gfx_level, instr, false)); 702bf215546Sopenharmony_ci convert_to_SDWA(gfx_level, instr); 703bf215546Sopenharmony_ci return; 704bf215546Sopenharmony_ci } 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci if (reg.byte() == 0) 707bf215546Sopenharmony_ci return; 708bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::buffer_load_ubyte_d16) 709bf215546Sopenharmony_ci instr->opcode = aco_opcode::buffer_load_ubyte_d16_hi; 710bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::buffer_load_sbyte_d16) 711bf215546Sopenharmony_ci instr->opcode = aco_opcode::buffer_load_sbyte_d16_hi; 712bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::buffer_load_short_d16) 713bf215546Sopenharmony_ci instr->opcode = aco_opcode::buffer_load_short_d16_hi; 714bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::buffer_load_format_d16_x) 715bf215546Sopenharmony_ci instr->opcode = aco_opcode::buffer_load_format_d16_hi_x; 716bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::flat_load_ubyte_d16) 717bf215546Sopenharmony_ci instr->opcode = aco_opcode::flat_load_ubyte_d16_hi; 718bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::flat_load_sbyte_d16) 719bf215546Sopenharmony_ci instr->opcode = aco_opcode::flat_load_sbyte_d16_hi; 720bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::flat_load_short_d16) 721bf215546Sopenharmony_ci instr->opcode = aco_opcode::flat_load_short_d16_hi; 722bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::scratch_load_ubyte_d16) 723bf215546Sopenharmony_ci instr->opcode = aco_opcode::scratch_load_ubyte_d16_hi; 724bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::scratch_load_sbyte_d16) 725bf215546Sopenharmony_ci instr->opcode = aco_opcode::scratch_load_sbyte_d16_hi; 726bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::scratch_load_short_d16) 727bf215546Sopenharmony_ci instr->opcode = aco_opcode::scratch_load_short_d16_hi; 728bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::global_load_ubyte_d16) 729bf215546Sopenharmony_ci instr->opcode = aco_opcode::global_load_ubyte_d16_hi; 730bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::global_load_sbyte_d16) 731bf215546Sopenharmony_ci instr->opcode = aco_opcode::global_load_sbyte_d16_hi; 732bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::global_load_short_d16) 733bf215546Sopenharmony_ci instr->opcode = aco_opcode::global_load_short_d16_hi; 734bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::ds_read_u8_d16) 735bf215546Sopenharmony_ci instr->opcode = aco_opcode::ds_read_u8_d16_hi; 736bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::ds_read_i8_d16) 737bf215546Sopenharmony_ci instr->opcode = aco_opcode::ds_read_i8_d16_hi; 738bf215546Sopenharmony_ci else if (instr->opcode == aco_opcode::ds_read_u16_d16) 739bf215546Sopenharmony_ci instr->opcode = aco_opcode::ds_read_u16_d16_hi; 740bf215546Sopenharmony_ci else 741bf215546Sopenharmony_ci unreachable("Something went wrong: Impossible register assignment."); 742bf215546Sopenharmony_ci} 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_civoid 745bf215546Sopenharmony_ciadjust_max_used_regs(ra_ctx& ctx, RegClass rc, unsigned reg) 746bf215546Sopenharmony_ci{ 747bf215546Sopenharmony_ci uint16_t max_addressible_sgpr = ctx.sgpr_limit; 748bf215546Sopenharmony_ci unsigned size = rc.size(); 749bf215546Sopenharmony_ci if (rc.type() == RegType::vgpr) { 750bf215546Sopenharmony_ci assert(reg >= 256); 751bf215546Sopenharmony_ci uint16_t hi = reg - 256 + size - 1; 752bf215546Sopenharmony_ci ctx.max_used_vgpr = std::max(ctx.max_used_vgpr, hi); 753bf215546Sopenharmony_ci } else if (reg + rc.size() <= max_addressible_sgpr) { 754bf215546Sopenharmony_ci uint16_t hi = reg + size - 1; 755bf215546Sopenharmony_ci ctx.max_used_sgpr = std::max(ctx.max_used_sgpr, std::min(hi, max_addressible_sgpr)); 756bf215546Sopenharmony_ci } 757bf215546Sopenharmony_ci} 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_cienum UpdateRenames { 760bf215546Sopenharmony_ci rename_not_killed_ops = 0x1, 761bf215546Sopenharmony_ci fill_killed_ops = 0x2, 762bf215546Sopenharmony_ci}; 763bf215546Sopenharmony_ciMESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(UpdateRenames); 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_civoid 766bf215546Sopenharmony_ciupdate_renames(ra_ctx& ctx, RegisterFile& reg_file, 767bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>>& parallelcopies, 768bf215546Sopenharmony_ci aco_ptr<Instruction>& instr, UpdateRenames flags) 769bf215546Sopenharmony_ci{ 770bf215546Sopenharmony_ci /* clear operands */ 771bf215546Sopenharmony_ci for (std::pair<Operand, Definition>& copy : parallelcopies) { 772bf215546Sopenharmony_ci /* the definitions with id are not from this function and already handled */ 773bf215546Sopenharmony_ci if (copy.second.isTemp()) 774bf215546Sopenharmony_ci continue; 775bf215546Sopenharmony_ci reg_file.clear(copy.first); 776bf215546Sopenharmony_ci } 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci /* allocate id's and rename operands: this is done transparently here */ 779bf215546Sopenharmony_ci auto it = parallelcopies.begin(); 780bf215546Sopenharmony_ci while (it != parallelcopies.end()) { 781bf215546Sopenharmony_ci if (it->second.isTemp()) { 782bf215546Sopenharmony_ci ++it; 783bf215546Sopenharmony_ci continue; 784bf215546Sopenharmony_ci } 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci /* check if we moved a definition: change the register and remove copy */ 787bf215546Sopenharmony_ci bool is_def = false; 788bf215546Sopenharmony_ci for (Definition& def : instr->definitions) { 789bf215546Sopenharmony_ci if (def.isTemp() && def.getTemp() == it->first.getTemp()) { 790bf215546Sopenharmony_ci // FIXME: ensure that the definition can use this reg 791bf215546Sopenharmony_ci def.setFixed(it->second.physReg()); 792bf215546Sopenharmony_ci reg_file.fill(def); 793bf215546Sopenharmony_ci ctx.assignments[def.tempId()].reg = def.physReg(); 794bf215546Sopenharmony_ci it = parallelcopies.erase(it); 795bf215546Sopenharmony_ci is_def = true; 796bf215546Sopenharmony_ci break; 797bf215546Sopenharmony_ci } 798bf215546Sopenharmony_ci } 799bf215546Sopenharmony_ci if (is_def) 800bf215546Sopenharmony_ci continue; 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci /* check if we moved another parallelcopy definition */ 803bf215546Sopenharmony_ci for (std::pair<Operand, Definition>& other : parallelcopies) { 804bf215546Sopenharmony_ci if (!other.second.isTemp()) 805bf215546Sopenharmony_ci continue; 806bf215546Sopenharmony_ci if (it->first.getTemp() == other.second.getTemp()) { 807bf215546Sopenharmony_ci other.second.setFixed(it->second.physReg()); 808bf215546Sopenharmony_ci ctx.assignments[other.second.tempId()].reg = other.second.physReg(); 809bf215546Sopenharmony_ci it = parallelcopies.erase(it); 810bf215546Sopenharmony_ci is_def = true; 811bf215546Sopenharmony_ci /* check if we moved an operand, again */ 812bf215546Sopenharmony_ci bool fill = true; 813bf215546Sopenharmony_ci for (Operand& op : instr->operands) { 814bf215546Sopenharmony_ci if (op.isTemp() && op.tempId() == other.second.tempId()) { 815bf215546Sopenharmony_ci // FIXME: ensure that the operand can use this reg 816bf215546Sopenharmony_ci op.setFixed(other.second.physReg()); 817bf215546Sopenharmony_ci fill = (flags & fill_killed_ops) || !op.isKillBeforeDef(); 818bf215546Sopenharmony_ci } 819bf215546Sopenharmony_ci } 820bf215546Sopenharmony_ci if (fill) 821bf215546Sopenharmony_ci reg_file.fill(other.second); 822bf215546Sopenharmony_ci break; 823bf215546Sopenharmony_ci } 824bf215546Sopenharmony_ci } 825bf215546Sopenharmony_ci if (is_def) 826bf215546Sopenharmony_ci continue; 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci std::pair<Operand, Definition>& copy = *it; 829bf215546Sopenharmony_ci copy.second.setTemp(ctx.program->allocateTmp(copy.second.regClass())); 830bf215546Sopenharmony_ci ctx.assignments.emplace_back(copy.second.physReg(), copy.second.regClass()); 831bf215546Sopenharmony_ci assert(ctx.assignments.size() == ctx.program->peekAllocationId()); 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci /* check if we moved an operand */ 834bf215546Sopenharmony_ci bool first = true; 835bf215546Sopenharmony_ci bool fill = true; 836bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 837bf215546Sopenharmony_ci Operand& op = instr->operands[i]; 838bf215546Sopenharmony_ci if (!op.isTemp()) 839bf215546Sopenharmony_ci continue; 840bf215546Sopenharmony_ci if (op.tempId() == copy.first.tempId()) { 841bf215546Sopenharmony_ci bool omit_renaming = !(flags & rename_not_killed_ops) && !op.isKillBeforeDef(); 842bf215546Sopenharmony_ci for (std::pair<Operand, Definition>& pc : parallelcopies) { 843bf215546Sopenharmony_ci PhysReg def_reg = pc.second.physReg(); 844bf215546Sopenharmony_ci omit_renaming &= def_reg > copy.first.physReg() 845bf215546Sopenharmony_ci ? (copy.first.physReg() + copy.first.size() <= def_reg.reg()) 846bf215546Sopenharmony_ci : (def_reg + pc.second.size() <= copy.first.physReg().reg()); 847bf215546Sopenharmony_ci } 848bf215546Sopenharmony_ci if (omit_renaming) { 849bf215546Sopenharmony_ci if (first) 850bf215546Sopenharmony_ci op.setFirstKill(true); 851bf215546Sopenharmony_ci else 852bf215546Sopenharmony_ci op.setKill(true); 853bf215546Sopenharmony_ci first = false; 854bf215546Sopenharmony_ci continue; 855bf215546Sopenharmony_ci } 856bf215546Sopenharmony_ci op.setTemp(copy.second.getTemp()); 857bf215546Sopenharmony_ci op.setFixed(copy.second.physReg()); 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci fill = (flags & fill_killed_ops) || !op.isKillBeforeDef(); 860bf215546Sopenharmony_ci } 861bf215546Sopenharmony_ci } 862bf215546Sopenharmony_ci 863bf215546Sopenharmony_ci if (fill) 864bf215546Sopenharmony_ci reg_file.fill(copy.second); 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci ++it; 867bf215546Sopenharmony_ci } 868bf215546Sopenharmony_ci} 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_cistd::pair<PhysReg, bool> 871bf215546Sopenharmony_ciget_reg_simple(ra_ctx& ctx, RegisterFile& reg_file, DefInfo info) 872bf215546Sopenharmony_ci{ 873bf215546Sopenharmony_ci const PhysRegInterval& bounds = info.bounds; 874bf215546Sopenharmony_ci uint32_t size = info.size; 875bf215546Sopenharmony_ci uint32_t stride = info.rc.is_subdword() ? DIV_ROUND_UP(info.stride, 4) : info.stride; 876bf215546Sopenharmony_ci RegClass rc = info.rc; 877bf215546Sopenharmony_ci 878bf215546Sopenharmony_ci DefInfo new_info = info; 879bf215546Sopenharmony_ci new_info.rc = RegClass(rc.type(), size); 880bf215546Sopenharmony_ci for (unsigned new_stride = 16; new_stride > stride; new_stride /= 2) { 881bf215546Sopenharmony_ci if (size % new_stride) 882bf215546Sopenharmony_ci continue; 883bf215546Sopenharmony_ci new_info.stride = new_stride; 884bf215546Sopenharmony_ci std::pair<PhysReg, bool> res = get_reg_simple(ctx, reg_file, new_info); 885bf215546Sopenharmony_ci if (res.second) 886bf215546Sopenharmony_ci return res; 887bf215546Sopenharmony_ci } 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci auto is_free = [&](PhysReg reg_index) 890bf215546Sopenharmony_ci { return reg_file[reg_index] == 0 && !ctx.war_hint[reg_index]; }; 891bf215546Sopenharmony_ci 892bf215546Sopenharmony_ci if (stride == 1) { 893bf215546Sopenharmony_ci /* best fit algorithm: find the smallest gap to fit in the variable */ 894bf215546Sopenharmony_ci PhysRegInterval best_gap{PhysReg{0}, UINT_MAX}; 895bf215546Sopenharmony_ci const unsigned max_gpr = 896bf215546Sopenharmony_ci (rc.type() == RegType::vgpr) ? (256 + ctx.max_used_vgpr) : ctx.max_used_sgpr; 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci PhysRegIterator reg_it = bounds.begin(); 899bf215546Sopenharmony_ci const PhysRegIterator end_it = 900bf215546Sopenharmony_ci std::min(bounds.end(), std::max(PhysRegIterator{PhysReg{max_gpr + 1}}, reg_it)); 901bf215546Sopenharmony_ci while (reg_it != bounds.end()) { 902bf215546Sopenharmony_ci /* Find the next chunk of available register slots */ 903bf215546Sopenharmony_ci reg_it = std::find_if(reg_it, end_it, is_free); 904bf215546Sopenharmony_ci auto next_nonfree_it = std::find_if_not(reg_it, end_it, is_free); 905bf215546Sopenharmony_ci if (reg_it == bounds.end()) { 906bf215546Sopenharmony_ci break; 907bf215546Sopenharmony_ci } 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci if (next_nonfree_it == end_it) { 910bf215546Sopenharmony_ci /* All registers past max_used_gpr are free */ 911bf215546Sopenharmony_ci next_nonfree_it = bounds.end(); 912bf215546Sopenharmony_ci } 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci PhysRegInterval gap = PhysRegInterval::from_until(*reg_it, *next_nonfree_it); 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci /* early return on exact matches */ 917bf215546Sopenharmony_ci if (size == gap.size) { 918bf215546Sopenharmony_ci adjust_max_used_regs(ctx, rc, gap.lo()); 919bf215546Sopenharmony_ci return {gap.lo(), true}; 920bf215546Sopenharmony_ci } 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci /* check if it fits and the gap size is smaller */ 923bf215546Sopenharmony_ci if (size < gap.size && gap.size < best_gap.size) { 924bf215546Sopenharmony_ci best_gap = gap; 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci /* Move past the processed chunk */ 928bf215546Sopenharmony_ci reg_it = next_nonfree_it; 929bf215546Sopenharmony_ci } 930bf215546Sopenharmony_ci 931bf215546Sopenharmony_ci if (best_gap.size == UINT_MAX) 932bf215546Sopenharmony_ci return {{}, false}; 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci /* find best position within gap by leaving a good stride for other variables*/ 935bf215546Sopenharmony_ci unsigned buffer = best_gap.size - size; 936bf215546Sopenharmony_ci if (buffer > 1) { 937bf215546Sopenharmony_ci if (((best_gap.lo() + size) % 8 != 0 && (best_gap.lo() + buffer) % 8 == 0) || 938bf215546Sopenharmony_ci ((best_gap.lo() + size) % 4 != 0 && (best_gap.lo() + buffer) % 4 == 0) || 939bf215546Sopenharmony_ci ((best_gap.lo() + size) % 2 != 0 && (best_gap.lo() + buffer) % 2 == 0)) 940bf215546Sopenharmony_ci best_gap = {PhysReg{best_gap.lo() + buffer}, best_gap.size - buffer}; 941bf215546Sopenharmony_ci } 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci adjust_max_used_regs(ctx, rc, best_gap.lo()); 944bf215546Sopenharmony_ci return {best_gap.lo(), true}; 945bf215546Sopenharmony_ci } 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci for (PhysRegInterval reg_win = {bounds.lo(), size}; reg_win.hi() <= bounds.hi(); 948bf215546Sopenharmony_ci reg_win += stride) { 949bf215546Sopenharmony_ci if (reg_file[reg_win.lo()] != 0) { 950bf215546Sopenharmony_ci continue; 951bf215546Sopenharmony_ci } 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci bool is_valid = std::all_of(std::next(reg_win.begin()), reg_win.end(), is_free); 954bf215546Sopenharmony_ci if (is_valid) { 955bf215546Sopenharmony_ci adjust_max_used_regs(ctx, rc, reg_win.lo()); 956bf215546Sopenharmony_ci return {reg_win.lo(), true}; 957bf215546Sopenharmony_ci } 958bf215546Sopenharmony_ci } 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci /* do this late because using the upper bytes of a register can require 961bf215546Sopenharmony_ci * larger instruction encodings or copies 962bf215546Sopenharmony_ci * TODO: don't do this in situations where it doesn't benefit */ 963bf215546Sopenharmony_ci if (rc.is_subdword()) { 964bf215546Sopenharmony_ci for (std::pair<const uint32_t, std::array<uint32_t, 4>>& entry : reg_file.subdword_regs) { 965bf215546Sopenharmony_ci assert(reg_file[PhysReg{entry.first}] == 0xF0000000); 966bf215546Sopenharmony_ci if (!bounds.contains({PhysReg{entry.first}, rc.size()})) 967bf215546Sopenharmony_ci continue; 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i += info.stride) { 970bf215546Sopenharmony_ci /* check if there's a block of free bytes large enough to hold the register */ 971bf215546Sopenharmony_ci bool reg_found = 972bf215546Sopenharmony_ci std::all_of(&entry.second[i], &entry.second[std::min(4u, i + rc.bytes())], 973bf215546Sopenharmony_ci [](unsigned v) { return v == 0; }); 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci /* check if also the neighboring reg is free if needed */ 976bf215546Sopenharmony_ci if (reg_found && i + rc.bytes() > 4) 977bf215546Sopenharmony_ci reg_found = (reg_file[PhysReg{entry.first + 1}] == 0); 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci if (reg_found) { 980bf215546Sopenharmony_ci PhysReg res{entry.first}; 981bf215546Sopenharmony_ci res.reg_b += i; 982bf215546Sopenharmony_ci adjust_max_used_regs(ctx, rc, entry.first); 983bf215546Sopenharmony_ci return {res, true}; 984bf215546Sopenharmony_ci } 985bf215546Sopenharmony_ci } 986bf215546Sopenharmony_ci } 987bf215546Sopenharmony_ci } 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci return {{}, false}; 990bf215546Sopenharmony_ci} 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_ci/* collect variables from a register area */ 993bf215546Sopenharmony_cistd::vector<unsigned> 994bf215546Sopenharmony_cifind_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_interval) 995bf215546Sopenharmony_ci{ 996bf215546Sopenharmony_ci std::vector<unsigned> vars; 997bf215546Sopenharmony_ci for (PhysReg j : reg_interval) { 998bf215546Sopenharmony_ci if (reg_file.is_blocked(j)) 999bf215546Sopenharmony_ci continue; 1000bf215546Sopenharmony_ci if (reg_file[j] == 0xF0000000) { 1001bf215546Sopenharmony_ci for (unsigned k = 0; k < 4; k++) { 1002bf215546Sopenharmony_ci unsigned id = reg_file.subdword_regs[j][k]; 1003bf215546Sopenharmony_ci if (id && (vars.empty() || id != vars.back())) 1004bf215546Sopenharmony_ci vars.emplace_back(id); 1005bf215546Sopenharmony_ci } 1006bf215546Sopenharmony_ci } else { 1007bf215546Sopenharmony_ci unsigned id = reg_file[j]; 1008bf215546Sopenharmony_ci if (id && (vars.empty() || id != vars.back())) 1009bf215546Sopenharmony_ci vars.emplace_back(id); 1010bf215546Sopenharmony_ci } 1011bf215546Sopenharmony_ci } 1012bf215546Sopenharmony_ci return vars; 1013bf215546Sopenharmony_ci} 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci/* collect variables from a register area and clear reg_file 1016bf215546Sopenharmony_ci * variables are sorted in decreasing size and 1017bf215546Sopenharmony_ci * increasing assigned register 1018bf215546Sopenharmony_ci */ 1019bf215546Sopenharmony_cistd::vector<unsigned> 1020bf215546Sopenharmony_cicollect_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_interval) 1021bf215546Sopenharmony_ci{ 1022bf215546Sopenharmony_ci std::vector<unsigned> ids = find_vars(ctx, reg_file, reg_interval); 1023bf215546Sopenharmony_ci std::sort(ids.begin(), ids.end(), 1024bf215546Sopenharmony_ci [&](unsigned a, unsigned b) 1025bf215546Sopenharmony_ci { 1026bf215546Sopenharmony_ci assignment& var_a = ctx.assignments[a]; 1027bf215546Sopenharmony_ci assignment& var_b = ctx.assignments[b]; 1028bf215546Sopenharmony_ci return var_a.rc.bytes() > var_b.rc.bytes() || 1029bf215546Sopenharmony_ci (var_a.rc.bytes() == var_b.rc.bytes() && var_a.reg < var_b.reg); 1030bf215546Sopenharmony_ci }); 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci for (unsigned id : ids) { 1033bf215546Sopenharmony_ci assignment& var = ctx.assignments[id]; 1034bf215546Sopenharmony_ci reg_file.clear(var.reg, var.rc); 1035bf215546Sopenharmony_ci } 1036bf215546Sopenharmony_ci return ids; 1037bf215546Sopenharmony_ci} 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_cistd::pair<PhysReg, bool> 1040bf215546Sopenharmony_ciget_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file, 1041bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>>& parallelcopies, 1042bf215546Sopenharmony_ci aco_ptr<Instruction>& instr, const PhysRegInterval def_reg, 1043bf215546Sopenharmony_ci DefInfo info, unsigned id) 1044bf215546Sopenharmony_ci{ 1045bf215546Sopenharmony_ci PhysReg reg = def_reg.lo(); 1046bf215546Sopenharmony_ci /* dead operand: return position in vector */ 1047bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 1048bf215546Sopenharmony_ci if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id && 1049bf215546Sopenharmony_ci instr->operands[i].isKillBeforeDef()) { 1050bf215546Sopenharmony_ci assert(!reg_file.test(reg, instr->operands[i].bytes())); 1051bf215546Sopenharmony_ci return {reg, info.rc.is_subdword() || reg.byte() == 0}; 1052bf215546Sopenharmony_ci } 1053bf215546Sopenharmony_ci reg.reg_b += instr->operands[i].bytes(); 1054bf215546Sopenharmony_ci } 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci if (ctx.program->gfx_level <= GFX8) 1057bf215546Sopenharmony_ci return {PhysReg(), false}; 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci /* check if the previous position was in vector */ 1060bf215546Sopenharmony_ci assignment& var = ctx.assignments[id]; 1061bf215546Sopenharmony_ci if (def_reg.contains(PhysRegInterval{var.reg, info.size})) { 1062bf215546Sopenharmony_ci reg = def_reg.lo(); 1063bf215546Sopenharmony_ci /* try to use the previous register of the operand */ 1064bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 1065bf215546Sopenharmony_ci if (reg != var.reg) { 1066bf215546Sopenharmony_ci reg.reg_b += instr->operands[i].bytes(); 1067bf215546Sopenharmony_ci continue; 1068bf215546Sopenharmony_ci } 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci /* check if we can swap positions */ 1071bf215546Sopenharmony_ci if (instr->operands[i].isTemp() && instr->operands[i].isFirstKill() && 1072bf215546Sopenharmony_ci instr->operands[i].regClass() == info.rc) { 1073bf215546Sopenharmony_ci assignment& op = ctx.assignments[instr->operands[i].tempId()]; 1074bf215546Sopenharmony_ci /* if everything matches, create parallelcopy for the killed operand */ 1075bf215546Sopenharmony_ci if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) && 1076bf215546Sopenharmony_ci reg_file.get_id(op.reg) == instr->operands[i].tempId()) { 1077bf215546Sopenharmony_ci Definition pc_def = Definition(reg, info.rc); 1078bf215546Sopenharmony_ci parallelcopies.emplace_back(instr->operands[i], pc_def); 1079bf215546Sopenharmony_ci return {op.reg, true}; 1080bf215546Sopenharmony_ci } 1081bf215546Sopenharmony_ci } 1082bf215546Sopenharmony_ci return {PhysReg(), false}; 1083bf215546Sopenharmony_ci } 1084bf215546Sopenharmony_ci } 1085bf215546Sopenharmony_ci return {PhysReg(), false}; 1086bf215546Sopenharmony_ci} 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_cibool 1089bf215546Sopenharmony_ciget_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file, 1090bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>>& parallelcopies, 1091bf215546Sopenharmony_ci const std::vector<unsigned>& vars, const PhysRegInterval bounds, 1092bf215546Sopenharmony_ci aco_ptr<Instruction>& instr, const PhysRegInterval def_reg) 1093bf215546Sopenharmony_ci{ 1094bf215546Sopenharmony_ci /* Variables are sorted from large to small and with increasing assigned register */ 1095bf215546Sopenharmony_ci for (unsigned id : vars) { 1096bf215546Sopenharmony_ci assignment& var = ctx.assignments[id]; 1097bf215546Sopenharmony_ci DefInfo info = DefInfo(ctx, ctx.pseudo_dummy, var.rc, -1); 1098bf215546Sopenharmony_ci uint32_t size = info.size; 1099bf215546Sopenharmony_ci 1100bf215546Sopenharmony_ci /* check if this is a dead operand, then we can re-use the space from the definition 1101bf215546Sopenharmony_ci * also use the correct stride for sub-dword operands */ 1102bf215546Sopenharmony_ci bool is_dead_operand = false; 1103bf215546Sopenharmony_ci std::pair<PhysReg, bool> res{PhysReg(), false}; 1104bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_create_vector) { 1105bf215546Sopenharmony_ci res = 1106bf215546Sopenharmony_ci get_reg_for_create_vector_copy(ctx, reg_file, parallelcopies, instr, def_reg, info, id); 1107bf215546Sopenharmony_ci } else { 1108bf215546Sopenharmony_ci for (unsigned i = 0; !is_phi(instr) && i < instr->operands.size(); i++) { 1109bf215546Sopenharmony_ci if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id) { 1110bf215546Sopenharmony_ci info = DefInfo(ctx, instr, var.rc, i); 1111bf215546Sopenharmony_ci if (instr->operands[i].isKillBeforeDef()) { 1112bf215546Sopenharmony_ci info.bounds = def_reg; 1113bf215546Sopenharmony_ci res = get_reg_simple(ctx, reg_file, info); 1114bf215546Sopenharmony_ci is_dead_operand = true; 1115bf215546Sopenharmony_ci } 1116bf215546Sopenharmony_ci break; 1117bf215546Sopenharmony_ci } 1118bf215546Sopenharmony_ci } 1119bf215546Sopenharmony_ci } 1120bf215546Sopenharmony_ci if (!res.second) { 1121bf215546Sopenharmony_ci /* Try to find space within the bounds but outside of the definition */ 1122bf215546Sopenharmony_ci info.bounds = PhysRegInterval::from_until(bounds.lo(), MIN2(def_reg.lo(), bounds.hi())); 1123bf215546Sopenharmony_ci res = get_reg_simple(ctx, reg_file, info); 1124bf215546Sopenharmony_ci if (!res.second && def_reg.hi() <= bounds.hi()) { 1125bf215546Sopenharmony_ci unsigned lo = (def_reg.hi() + info.stride - 1) & ~(info.stride - 1); 1126bf215546Sopenharmony_ci info.bounds = PhysRegInterval::from_until(PhysReg{lo}, bounds.hi()); 1127bf215546Sopenharmony_ci res = get_reg_simple(ctx, reg_file, info); 1128bf215546Sopenharmony_ci } 1129bf215546Sopenharmony_ci } 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci if (res.second) { 1132bf215546Sopenharmony_ci /* mark the area as blocked */ 1133bf215546Sopenharmony_ci reg_file.block(res.first, var.rc); 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_ci /* create parallelcopy pair (without definition id) */ 1136bf215546Sopenharmony_ci Temp tmp = Temp(id, var.rc); 1137bf215546Sopenharmony_ci Operand pc_op = Operand(tmp); 1138bf215546Sopenharmony_ci pc_op.setFixed(var.reg); 1139bf215546Sopenharmony_ci Definition pc_def = Definition(res.first, pc_op.regClass()); 1140bf215546Sopenharmony_ci parallelcopies.emplace_back(pc_op, pc_def); 1141bf215546Sopenharmony_ci continue; 1142bf215546Sopenharmony_ci } 1143bf215546Sopenharmony_ci 1144bf215546Sopenharmony_ci PhysReg best_pos = bounds.lo(); 1145bf215546Sopenharmony_ci unsigned num_moves = 0xFF; 1146bf215546Sopenharmony_ci unsigned num_vars = 0; 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci /* we use a sliding window to find potential positions */ 1149bf215546Sopenharmony_ci unsigned stride = var.rc.is_subdword() ? 1 : info.stride; 1150bf215546Sopenharmony_ci for (PhysRegInterval reg_win{bounds.lo(), size}; reg_win.hi() <= bounds.hi(); 1151bf215546Sopenharmony_ci reg_win += stride) { 1152bf215546Sopenharmony_ci if (!is_dead_operand && intersects(reg_win, def_reg)) 1153bf215546Sopenharmony_ci continue; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci /* second, check that we have at most k=num_moves elements in the window 1156bf215546Sopenharmony_ci * and no element is larger than the currently processed one */ 1157bf215546Sopenharmony_ci unsigned k = 0; 1158bf215546Sopenharmony_ci unsigned n = 0; 1159bf215546Sopenharmony_ci unsigned last_var = 0; 1160bf215546Sopenharmony_ci bool found = true; 1161bf215546Sopenharmony_ci for (PhysReg j : reg_win) { 1162bf215546Sopenharmony_ci if (reg_file[j] == 0 || reg_file[j] == last_var) 1163bf215546Sopenharmony_ci continue; 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_ci if (reg_file.is_blocked(j) || k > num_moves) { 1166bf215546Sopenharmony_ci found = false; 1167bf215546Sopenharmony_ci break; 1168bf215546Sopenharmony_ci } 1169bf215546Sopenharmony_ci if (reg_file[j] == 0xF0000000) { 1170bf215546Sopenharmony_ci k += 1; 1171bf215546Sopenharmony_ci n++; 1172bf215546Sopenharmony_ci continue; 1173bf215546Sopenharmony_ci } 1174bf215546Sopenharmony_ci /* we cannot split live ranges of linear vgprs inside control flow */ 1175bf215546Sopenharmony_ci if (!(ctx.block->kind & block_kind_top_level) && 1176bf215546Sopenharmony_ci ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) { 1177bf215546Sopenharmony_ci found = false; 1178bf215546Sopenharmony_ci break; 1179bf215546Sopenharmony_ci } 1180bf215546Sopenharmony_ci bool is_kill = false; 1181bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 1182bf215546Sopenharmony_ci if (op.isTemp() && op.isKillBeforeDef() && op.tempId() == reg_file[j]) { 1183bf215546Sopenharmony_ci is_kill = true; 1184bf215546Sopenharmony_ci break; 1185bf215546Sopenharmony_ci } 1186bf215546Sopenharmony_ci } 1187bf215546Sopenharmony_ci if (!is_kill && ctx.assignments[reg_file[j]].rc.size() >= size) { 1188bf215546Sopenharmony_ci found = false; 1189bf215546Sopenharmony_ci break; 1190bf215546Sopenharmony_ci } 1191bf215546Sopenharmony_ci 1192bf215546Sopenharmony_ci k += ctx.assignments[reg_file[j]].rc.size(); 1193bf215546Sopenharmony_ci last_var = reg_file[j]; 1194bf215546Sopenharmony_ci n++; 1195bf215546Sopenharmony_ci if (k > num_moves || (k == num_moves && n <= num_vars)) { 1196bf215546Sopenharmony_ci found = false; 1197bf215546Sopenharmony_ci break; 1198bf215546Sopenharmony_ci } 1199bf215546Sopenharmony_ci } 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci if (found) { 1202bf215546Sopenharmony_ci best_pos = reg_win.lo(); 1203bf215546Sopenharmony_ci num_moves = k; 1204bf215546Sopenharmony_ci num_vars = n; 1205bf215546Sopenharmony_ci } 1206bf215546Sopenharmony_ci } 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_ci /* FIXME: we messed up and couldn't find space for the variables to be copied */ 1209bf215546Sopenharmony_ci if (num_moves == 0xFF) 1210bf215546Sopenharmony_ci return false; 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ci PhysRegInterval reg_win{best_pos, size}; 1213bf215546Sopenharmony_ci 1214bf215546Sopenharmony_ci /* collect variables and block reg file */ 1215bf215546Sopenharmony_ci std::vector<unsigned> new_vars = collect_vars(ctx, reg_file, reg_win); 1216bf215546Sopenharmony_ci 1217bf215546Sopenharmony_ci /* mark the area as blocked */ 1218bf215546Sopenharmony_ci reg_file.block(reg_win.lo(), var.rc); 1219bf215546Sopenharmony_ci adjust_max_used_regs(ctx, var.rc, reg_win.lo()); 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_ci if (!get_regs_for_copies(ctx, reg_file, parallelcopies, new_vars, bounds, instr, def_reg)) 1222bf215546Sopenharmony_ci return false; 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci /* create parallelcopy pair (without definition id) */ 1225bf215546Sopenharmony_ci Temp tmp = Temp(id, var.rc); 1226bf215546Sopenharmony_ci Operand pc_op = Operand(tmp); 1227bf215546Sopenharmony_ci pc_op.setFixed(var.reg); 1228bf215546Sopenharmony_ci Definition pc_def = Definition(reg_win.lo(), pc_op.regClass()); 1229bf215546Sopenharmony_ci parallelcopies.emplace_back(pc_op, pc_def); 1230bf215546Sopenharmony_ci } 1231bf215546Sopenharmony_ci 1232bf215546Sopenharmony_ci return true; 1233bf215546Sopenharmony_ci} 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_cistd::pair<PhysReg, bool> 1236bf215546Sopenharmony_ciget_reg_impl(ra_ctx& ctx, RegisterFile& reg_file, 1237bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>>& parallelcopies, const DefInfo& info, 1238bf215546Sopenharmony_ci aco_ptr<Instruction>& instr) 1239bf215546Sopenharmony_ci{ 1240bf215546Sopenharmony_ci const PhysRegInterval& bounds = info.bounds; 1241bf215546Sopenharmony_ci uint32_t size = info.size; 1242bf215546Sopenharmony_ci uint32_t stride = info.stride; 1243bf215546Sopenharmony_ci RegClass rc = info.rc; 1244bf215546Sopenharmony_ci 1245bf215546Sopenharmony_ci /* check how many free regs we have */ 1246bf215546Sopenharmony_ci unsigned regs_free = reg_file.count_zero(bounds); 1247bf215546Sopenharmony_ci 1248bf215546Sopenharmony_ci /* mark and count killed operands */ 1249bf215546Sopenharmony_ci unsigned killed_ops = 0; 1250bf215546Sopenharmony_ci std::bitset<256> is_killed_operand; /* per-register */ 1251bf215546Sopenharmony_ci for (unsigned j = 0; !is_phi(instr) && j < instr->operands.size(); j++) { 1252bf215546Sopenharmony_ci Operand& op = instr->operands[j]; 1253bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKillBeforeDef() && bounds.contains(op.physReg()) && 1254bf215546Sopenharmony_ci !reg_file.test(PhysReg{op.physReg().reg()}, align(op.bytes() + op.physReg().byte(), 4))) { 1255bf215546Sopenharmony_ci assert(op.isFixed()); 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_ci for (unsigned i = 0; i < op.size(); ++i) { 1258bf215546Sopenharmony_ci is_killed_operand[(op.physReg() & 0xff) + i] = true; 1259bf215546Sopenharmony_ci } 1260bf215546Sopenharmony_ci 1261bf215546Sopenharmony_ci killed_ops += op.getTemp().size(); 1262bf215546Sopenharmony_ci } 1263bf215546Sopenharmony_ci } 1264bf215546Sopenharmony_ci 1265bf215546Sopenharmony_ci assert(regs_free >= size); 1266bf215546Sopenharmony_ci /* we might have to move dead operands to dst in order to make space */ 1267bf215546Sopenharmony_ci unsigned op_moves = 0; 1268bf215546Sopenharmony_ci 1269bf215546Sopenharmony_ci if (size > (regs_free - killed_ops)) 1270bf215546Sopenharmony_ci op_moves = size - (regs_free - killed_ops); 1271bf215546Sopenharmony_ci 1272bf215546Sopenharmony_ci /* find the best position to place the definition */ 1273bf215546Sopenharmony_ci PhysRegInterval best_win = {bounds.lo(), size}; 1274bf215546Sopenharmony_ci unsigned num_moves = 0xFF; 1275bf215546Sopenharmony_ci unsigned num_vars = 0; 1276bf215546Sopenharmony_ci 1277bf215546Sopenharmony_ci /* we use a sliding window to check potential positions */ 1278bf215546Sopenharmony_ci for (PhysRegInterval reg_win = {bounds.lo(), size}; reg_win.hi() <= bounds.hi(); 1279bf215546Sopenharmony_ci reg_win += stride) { 1280bf215546Sopenharmony_ci /* first check if the register window starts in the middle of an 1281bf215546Sopenharmony_ci * allocated variable: this is what we have to fix to allow for 1282bf215546Sopenharmony_ci * num_moves > size */ 1283bf215546Sopenharmony_ci if (reg_win.lo() > bounds.lo() && !reg_file.is_empty_or_blocked(reg_win.lo()) && 1284bf215546Sopenharmony_ci reg_file.get_id(reg_win.lo()) == reg_file.get_id(reg_win.lo().advance(-1))) 1285bf215546Sopenharmony_ci continue; 1286bf215546Sopenharmony_ci if (reg_win.hi() < bounds.hi() && !reg_file.is_empty_or_blocked(reg_win.hi().advance(-1)) && 1287bf215546Sopenharmony_ci reg_file.get_id(reg_win.hi().advance(-1)) == reg_file.get_id(reg_win.hi())) 1288bf215546Sopenharmony_ci continue; 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_ci /* second, check that we have at most k=num_moves elements in the window 1291bf215546Sopenharmony_ci * and no element is larger than the currently processed one */ 1292bf215546Sopenharmony_ci unsigned k = op_moves; 1293bf215546Sopenharmony_ci unsigned n = 0; 1294bf215546Sopenharmony_ci unsigned remaining_op_moves = op_moves; 1295bf215546Sopenharmony_ci unsigned last_var = 0; 1296bf215546Sopenharmony_ci bool found = true; 1297bf215546Sopenharmony_ci bool aligned = rc == RegClass::v4 && reg_win.lo() % 4 == 0; 1298bf215546Sopenharmony_ci for (const PhysReg j : reg_win) { 1299bf215546Sopenharmony_ci /* dead operands effectively reduce the number of estimated moves */ 1300bf215546Sopenharmony_ci if (is_killed_operand[j & 0xFF]) { 1301bf215546Sopenharmony_ci if (remaining_op_moves) { 1302bf215546Sopenharmony_ci k--; 1303bf215546Sopenharmony_ci remaining_op_moves--; 1304bf215546Sopenharmony_ci } 1305bf215546Sopenharmony_ci continue; 1306bf215546Sopenharmony_ci } 1307bf215546Sopenharmony_ci 1308bf215546Sopenharmony_ci if (reg_file[j] == 0 || reg_file[j] == last_var) 1309bf215546Sopenharmony_ci continue; 1310bf215546Sopenharmony_ci 1311bf215546Sopenharmony_ci if (reg_file[j] == 0xF0000000) { 1312bf215546Sopenharmony_ci k += 1; 1313bf215546Sopenharmony_ci n++; 1314bf215546Sopenharmony_ci continue; 1315bf215546Sopenharmony_ci } 1316bf215546Sopenharmony_ci 1317bf215546Sopenharmony_ci if (ctx.assignments[reg_file[j]].rc.size() >= size) { 1318bf215546Sopenharmony_ci found = false; 1319bf215546Sopenharmony_ci break; 1320bf215546Sopenharmony_ci } 1321bf215546Sopenharmony_ci 1322bf215546Sopenharmony_ci /* we cannot split live ranges of linear vgprs inside control flow */ 1323bf215546Sopenharmony_ci // TODO: ensure that live range splits inside control flow are never necessary 1324bf215546Sopenharmony_ci if (!(ctx.block->kind & block_kind_top_level) && 1325bf215546Sopenharmony_ci ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) { 1326bf215546Sopenharmony_ci found = false; 1327bf215546Sopenharmony_ci break; 1328bf215546Sopenharmony_ci } 1329bf215546Sopenharmony_ci 1330bf215546Sopenharmony_ci k += ctx.assignments[reg_file[j]].rc.size(); 1331bf215546Sopenharmony_ci n++; 1332bf215546Sopenharmony_ci last_var = reg_file[j]; 1333bf215546Sopenharmony_ci } 1334bf215546Sopenharmony_ci 1335bf215546Sopenharmony_ci if (!found || k > num_moves) 1336bf215546Sopenharmony_ci continue; 1337bf215546Sopenharmony_ci if (k == num_moves && n < num_vars) 1338bf215546Sopenharmony_ci continue; 1339bf215546Sopenharmony_ci if (!aligned && k == num_moves && n == num_vars) 1340bf215546Sopenharmony_ci continue; 1341bf215546Sopenharmony_ci 1342bf215546Sopenharmony_ci if (found) { 1343bf215546Sopenharmony_ci best_win = reg_win; 1344bf215546Sopenharmony_ci num_moves = k; 1345bf215546Sopenharmony_ci num_vars = n; 1346bf215546Sopenharmony_ci } 1347bf215546Sopenharmony_ci } 1348bf215546Sopenharmony_ci 1349bf215546Sopenharmony_ci if (num_moves == 0xFF) 1350bf215546Sopenharmony_ci return {{}, false}; 1351bf215546Sopenharmony_ci 1352bf215546Sopenharmony_ci /* now, we figured the placement for our definition */ 1353bf215546Sopenharmony_ci RegisterFile tmp_file(reg_file); 1354bf215546Sopenharmony_ci 1355bf215546Sopenharmony_ci /* p_create_vector: also re-place killed operands in the definition space */ 1356bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_create_vector) { 1357bf215546Sopenharmony_ci for (Operand& op : instr->operands) { 1358bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKillBeforeDef()) 1359bf215546Sopenharmony_ci tmp_file.fill(op); 1360bf215546Sopenharmony_ci } 1361bf215546Sopenharmony_ci } 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_ci std::vector<unsigned> vars = collect_vars(ctx, tmp_file, best_win); 1364bf215546Sopenharmony_ci 1365bf215546Sopenharmony_ci /* re-enable killed operands */ 1366bf215546Sopenharmony_ci if (!is_phi(instr) && instr->opcode != aco_opcode::p_create_vector) { 1367bf215546Sopenharmony_ci for (Operand& op : instr->operands) { 1368bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKillBeforeDef()) 1369bf215546Sopenharmony_ci tmp_file.fill(op); 1370bf215546Sopenharmony_ci } 1371bf215546Sopenharmony_ci } 1372bf215546Sopenharmony_ci 1373bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>> pc; 1374bf215546Sopenharmony_ci if (!get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, best_win)) 1375bf215546Sopenharmony_ci return {{}, false}; 1376bf215546Sopenharmony_ci 1377bf215546Sopenharmony_ci parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end()); 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci adjust_max_used_regs(ctx, rc, best_win.lo()); 1380bf215546Sopenharmony_ci return {best_win.lo(), true}; 1381bf215546Sopenharmony_ci} 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_cibool 1384bf215546Sopenharmony_ciget_reg_specified(ra_ctx& ctx, RegisterFile& reg_file, RegClass rc, aco_ptr<Instruction>& instr, 1385bf215546Sopenharmony_ci PhysReg reg) 1386bf215546Sopenharmony_ci{ 1387bf215546Sopenharmony_ci /* catch out-of-range registers */ 1388bf215546Sopenharmony_ci if (reg >= PhysReg{512}) 1389bf215546Sopenharmony_ci return false; 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci std::pair<unsigned, unsigned> sdw_def_info; 1392bf215546Sopenharmony_ci if (rc.is_subdword()) 1393bf215546Sopenharmony_ci sdw_def_info = get_subdword_definition_info(ctx.program, instr, rc); 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci if (rc.is_subdword() && reg.byte() % sdw_def_info.first) 1396bf215546Sopenharmony_ci return false; 1397bf215546Sopenharmony_ci if (!rc.is_subdword() && reg.byte()) 1398bf215546Sopenharmony_ci return false; 1399bf215546Sopenharmony_ci 1400bf215546Sopenharmony_ci if (rc.type() == RegType::sgpr && reg % get_stride(rc) != 0) 1401bf215546Sopenharmony_ci return false; 1402bf215546Sopenharmony_ci 1403bf215546Sopenharmony_ci PhysRegInterval reg_win = {reg, rc.size()}; 1404bf215546Sopenharmony_ci PhysRegInterval bounds = get_reg_bounds(ctx.program, rc.type()); 1405bf215546Sopenharmony_ci PhysRegInterval vcc_win = {vcc, 2}; 1406bf215546Sopenharmony_ci /* VCC is outside the bounds */ 1407bf215546Sopenharmony_ci bool is_vcc = rc.type() == RegType::sgpr && vcc_win.contains(reg_win) && ctx.program->needs_vcc; 1408bf215546Sopenharmony_ci bool is_m0 = rc == s1 && reg == m0; 1409bf215546Sopenharmony_ci if (!bounds.contains(reg_win) && !is_vcc && !is_m0) 1410bf215546Sopenharmony_ci return false; 1411bf215546Sopenharmony_ci 1412bf215546Sopenharmony_ci if (rc.is_subdword()) { 1413bf215546Sopenharmony_ci PhysReg test_reg; 1414bf215546Sopenharmony_ci test_reg.reg_b = reg.reg_b & ~(sdw_def_info.second - 1); 1415bf215546Sopenharmony_ci if (reg_file.test(test_reg, sdw_def_info.second)) 1416bf215546Sopenharmony_ci return false; 1417bf215546Sopenharmony_ci } else { 1418bf215546Sopenharmony_ci if (reg_file.test(reg, rc.bytes())) 1419bf215546Sopenharmony_ci return false; 1420bf215546Sopenharmony_ci } 1421bf215546Sopenharmony_ci 1422bf215546Sopenharmony_ci adjust_max_used_regs(ctx, rc, reg_win.lo()); 1423bf215546Sopenharmony_ci return true; 1424bf215546Sopenharmony_ci} 1425bf215546Sopenharmony_ci 1426bf215546Sopenharmony_cibool 1427bf215546Sopenharmony_ciincrease_register_file(ra_ctx& ctx, RegType type) 1428bf215546Sopenharmony_ci{ 1429bf215546Sopenharmony_ci if (type == RegType::vgpr && ctx.program->max_reg_demand.vgpr < ctx.vgpr_limit) { 1430bf215546Sopenharmony_ci update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr + 1, 1431bf215546Sopenharmony_ci ctx.program->max_reg_demand.sgpr)); 1432bf215546Sopenharmony_ci } else if (type == RegType::sgpr && ctx.program->max_reg_demand.sgpr < ctx.sgpr_limit) { 1433bf215546Sopenharmony_ci update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr, 1434bf215546Sopenharmony_ci ctx.program->max_reg_demand.sgpr + 1)); 1435bf215546Sopenharmony_ci } else { 1436bf215546Sopenharmony_ci return false; 1437bf215546Sopenharmony_ci } 1438bf215546Sopenharmony_ci return true; 1439bf215546Sopenharmony_ci} 1440bf215546Sopenharmony_ci 1441bf215546Sopenharmony_cistruct IDAndRegClass { 1442bf215546Sopenharmony_ci IDAndRegClass(unsigned id_, RegClass rc_) : id(id_), rc(rc_) {} 1443bf215546Sopenharmony_ci 1444bf215546Sopenharmony_ci unsigned id; 1445bf215546Sopenharmony_ci RegClass rc; 1446bf215546Sopenharmony_ci}; 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_cistruct IDAndInfo { 1449bf215546Sopenharmony_ci IDAndInfo(unsigned id_, DefInfo info_) : id(id_), info(info_) {} 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_ci unsigned id; 1452bf215546Sopenharmony_ci DefInfo info; 1453bf215546Sopenharmony_ci}; 1454bf215546Sopenharmony_ci 1455bf215546Sopenharmony_ci/* Reallocates vars by sorting them and placing each variable after the previous 1456bf215546Sopenharmony_ci * one. If one of the variables has 0xffffffff as an ID, the register assigned 1457bf215546Sopenharmony_ci * for that variable will be returned. 1458bf215546Sopenharmony_ci */ 1459bf215546Sopenharmony_ciPhysReg 1460bf215546Sopenharmony_cicompact_relocate_vars(ra_ctx& ctx, const std::vector<IDAndRegClass>& vars, 1461bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>>& parallelcopies, PhysReg start) 1462bf215546Sopenharmony_ci{ 1463bf215546Sopenharmony_ci /* This function assumes RegisterDemand/live_var_analysis rounds up sub-dword 1464bf215546Sopenharmony_ci * temporary sizes to dwords. 1465bf215546Sopenharmony_ci */ 1466bf215546Sopenharmony_ci std::vector<IDAndInfo> sorted; 1467bf215546Sopenharmony_ci for (IDAndRegClass var : vars) { 1468bf215546Sopenharmony_ci DefInfo info(ctx, ctx.pseudo_dummy, var.rc, -1); 1469bf215546Sopenharmony_ci sorted.emplace_back(var.id, info); 1470bf215546Sopenharmony_ci } 1471bf215546Sopenharmony_ci 1472bf215546Sopenharmony_ci std::sort( 1473bf215546Sopenharmony_ci sorted.begin(), sorted.end(), 1474bf215546Sopenharmony_ci [&ctx](const IDAndInfo& a, const IDAndInfo& b) 1475bf215546Sopenharmony_ci { 1476bf215546Sopenharmony_ci unsigned a_stride = a.info.stride * (a.info.rc.is_subdword() ? 1 : 4); 1477bf215546Sopenharmony_ci unsigned b_stride = b.info.stride * (b.info.rc.is_subdword() ? 1 : 4); 1478bf215546Sopenharmony_ci if (a_stride > b_stride) 1479bf215546Sopenharmony_ci return true; 1480bf215546Sopenharmony_ci if (a_stride < b_stride) 1481bf215546Sopenharmony_ci return false; 1482bf215546Sopenharmony_ci if (a.id == 0xffffffff || b.id == 0xffffffff) 1483bf215546Sopenharmony_ci return a.id == 1484bf215546Sopenharmony_ci 0xffffffff; /* place 0xffffffff before others if possible, not for any reason */ 1485bf215546Sopenharmony_ci return ctx.assignments[a.id].reg < ctx.assignments[b.id].reg; 1486bf215546Sopenharmony_ci }); 1487bf215546Sopenharmony_ci 1488bf215546Sopenharmony_ci PhysReg next_reg = start; 1489bf215546Sopenharmony_ci PhysReg space_reg; 1490bf215546Sopenharmony_ci for (IDAndInfo& var : sorted) { 1491bf215546Sopenharmony_ci unsigned stride = var.info.rc.is_subdword() ? var.info.stride : var.info.stride * 4; 1492bf215546Sopenharmony_ci next_reg.reg_b = align(next_reg.reg_b, MAX2(stride, 4)); 1493bf215546Sopenharmony_ci 1494bf215546Sopenharmony_ci /* 0xffffffff is a special variable ID used reserve a space for killed 1495bf215546Sopenharmony_ci * operands and definitions. 1496bf215546Sopenharmony_ci */ 1497bf215546Sopenharmony_ci if (var.id != 0xffffffff) { 1498bf215546Sopenharmony_ci if (next_reg != ctx.assignments[var.id].reg) { 1499bf215546Sopenharmony_ci RegClass rc = ctx.assignments[var.id].rc; 1500bf215546Sopenharmony_ci Temp tmp(var.id, rc); 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci Operand pc_op(tmp); 1503bf215546Sopenharmony_ci pc_op.setFixed(ctx.assignments[var.id].reg); 1504bf215546Sopenharmony_ci Definition pc_def(next_reg, rc); 1505bf215546Sopenharmony_ci parallelcopies.emplace_back(pc_op, pc_def); 1506bf215546Sopenharmony_ci } 1507bf215546Sopenharmony_ci } else { 1508bf215546Sopenharmony_ci space_reg = next_reg; 1509bf215546Sopenharmony_ci } 1510bf215546Sopenharmony_ci 1511bf215546Sopenharmony_ci adjust_max_used_regs(ctx, var.info.rc, next_reg); 1512bf215546Sopenharmony_ci 1513bf215546Sopenharmony_ci next_reg = next_reg.advance(var.info.rc.size() * 4); 1514bf215546Sopenharmony_ci } 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci return space_reg; 1517bf215546Sopenharmony_ci} 1518bf215546Sopenharmony_ci 1519bf215546Sopenharmony_cibool 1520bf215546Sopenharmony_ciis_mimg_vaddr_intact(ra_ctx& ctx, RegisterFile& reg_file, Instruction* instr) 1521bf215546Sopenharmony_ci{ 1522bf215546Sopenharmony_ci PhysReg first{512}; 1523bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size() - 3u; i++) { 1524bf215546Sopenharmony_ci Operand op = instr->operands[i + 3]; 1525bf215546Sopenharmony_ci 1526bf215546Sopenharmony_ci if (ctx.assignments[op.tempId()].assigned) { 1527bf215546Sopenharmony_ci PhysReg reg = ctx.assignments[op.tempId()].reg; 1528bf215546Sopenharmony_ci 1529bf215546Sopenharmony_ci if (first.reg() == 512) { 1530bf215546Sopenharmony_ci PhysRegInterval bounds = get_reg_bounds(ctx.program, RegType::vgpr); 1531bf215546Sopenharmony_ci first = reg.advance(i * -4); 1532bf215546Sopenharmony_ci PhysRegInterval vec = PhysRegInterval{first, instr->operands.size() - 3u}; 1533bf215546Sopenharmony_ci if (!bounds.contains(vec)) /* not enough space for other operands */ 1534bf215546Sopenharmony_ci return false; 1535bf215546Sopenharmony_ci } else { 1536bf215546Sopenharmony_ci if (reg != first.advance(i * 4)) /* not at the best position */ 1537bf215546Sopenharmony_ci return false; 1538bf215546Sopenharmony_ci } 1539bf215546Sopenharmony_ci } else { 1540bf215546Sopenharmony_ci /* If there's an unexpected temporary, this operand is unlikely to be 1541bf215546Sopenharmony_ci * placed in the best position. 1542bf215546Sopenharmony_ci */ 1543bf215546Sopenharmony_ci if (first.reg() != 512 && reg_file.test(first.advance(i * 4), 4)) 1544bf215546Sopenharmony_ci return false; 1545bf215546Sopenharmony_ci } 1546bf215546Sopenharmony_ci } 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci return true; 1549bf215546Sopenharmony_ci} 1550bf215546Sopenharmony_ci 1551bf215546Sopenharmony_cistd::pair<PhysReg, bool> 1552bf215546Sopenharmony_ciget_reg_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, aco_ptr<Instruction>& instr) 1553bf215546Sopenharmony_ci{ 1554bf215546Sopenharmony_ci Instruction* vec = ctx.vectors[temp.id()]; 1555bf215546Sopenharmony_ci unsigned first_operand = vec->format == Format::MIMG ? 3 : 0; 1556bf215546Sopenharmony_ci unsigned our_offset = 0; 1557bf215546Sopenharmony_ci for (unsigned i = first_operand; i < vec->operands.size(); i++) { 1558bf215546Sopenharmony_ci Operand& op = vec->operands[i]; 1559bf215546Sopenharmony_ci if (op.isTemp() && op.tempId() == temp.id()) 1560bf215546Sopenharmony_ci break; 1561bf215546Sopenharmony_ci else 1562bf215546Sopenharmony_ci our_offset += op.bytes(); 1563bf215546Sopenharmony_ci } 1564bf215546Sopenharmony_ci 1565bf215546Sopenharmony_ci if (vec->format != Format::MIMG || is_mimg_vaddr_intact(ctx, reg_file, vec)) { 1566bf215546Sopenharmony_ci unsigned their_offset = 0; 1567bf215546Sopenharmony_ci /* check for every operand of the vector 1568bf215546Sopenharmony_ci * - whether the operand is assigned and 1569bf215546Sopenharmony_ci * - we can use the register relative to that operand 1570bf215546Sopenharmony_ci */ 1571bf215546Sopenharmony_ci for (unsigned i = first_operand; i < vec->operands.size(); i++) { 1572bf215546Sopenharmony_ci Operand& op = vec->operands[i]; 1573bf215546Sopenharmony_ci if (op.isTemp() && op.tempId() != temp.id() && op.getTemp().type() == temp.type() && 1574bf215546Sopenharmony_ci ctx.assignments[op.tempId()].assigned) { 1575bf215546Sopenharmony_ci PhysReg reg = ctx.assignments[op.tempId()].reg; 1576bf215546Sopenharmony_ci reg.reg_b += (our_offset - their_offset); 1577bf215546Sopenharmony_ci if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg)) 1578bf215546Sopenharmony_ci return {reg, true}; 1579bf215546Sopenharmony_ci 1580bf215546Sopenharmony_ci /* return if MIMG vaddr components don't remain vector-aligned */ 1581bf215546Sopenharmony_ci if (vec->format == Format::MIMG) 1582bf215546Sopenharmony_ci return {{}, false}; 1583bf215546Sopenharmony_ci } 1584bf215546Sopenharmony_ci their_offset += op.bytes(); 1585bf215546Sopenharmony_ci } 1586bf215546Sopenharmony_ci 1587bf215546Sopenharmony_ci /* We didn't find a register relative to other vector operands. 1588bf215546Sopenharmony_ci * Try to find new space which fits the whole vector. 1589bf215546Sopenharmony_ci */ 1590bf215546Sopenharmony_ci RegClass vec_rc = RegClass::get(temp.type(), their_offset); 1591bf215546Sopenharmony_ci DefInfo info(ctx, ctx.pseudo_dummy, vec_rc, -1); 1592bf215546Sopenharmony_ci std::pair<PhysReg, bool> res = get_reg_simple(ctx, reg_file, info); 1593bf215546Sopenharmony_ci PhysReg reg = res.first; 1594bf215546Sopenharmony_ci if (res.second) { 1595bf215546Sopenharmony_ci reg.reg_b += our_offset; 1596bf215546Sopenharmony_ci /* make sure to only use byte offset if the instruction supports it */ 1597bf215546Sopenharmony_ci if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg)) 1598bf215546Sopenharmony_ci return {reg, true}; 1599bf215546Sopenharmony_ci } 1600bf215546Sopenharmony_ci } 1601bf215546Sopenharmony_ci return {{}, false}; 1602bf215546Sopenharmony_ci} 1603bf215546Sopenharmony_ci 1604bf215546Sopenharmony_ciPhysReg 1605bf215546Sopenharmony_ciget_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, 1606bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>>& parallelcopies, aco_ptr<Instruction>& instr, 1607bf215546Sopenharmony_ci int operand_index = -1) 1608bf215546Sopenharmony_ci{ 1609bf215546Sopenharmony_ci auto split_vec = ctx.split_vectors.find(temp.id()); 1610bf215546Sopenharmony_ci if (split_vec != ctx.split_vectors.end()) { 1611bf215546Sopenharmony_ci unsigned offset = 0; 1612bf215546Sopenharmony_ci for (Definition def : split_vec->second->definitions) { 1613bf215546Sopenharmony_ci if (ctx.assignments[def.tempId()].affinity) { 1614bf215546Sopenharmony_ci assignment& affinity = ctx.assignments[ctx.assignments[def.tempId()].affinity]; 1615bf215546Sopenharmony_ci if (affinity.assigned) { 1616bf215546Sopenharmony_ci PhysReg reg = affinity.reg; 1617bf215546Sopenharmony_ci reg.reg_b -= offset; 1618bf215546Sopenharmony_ci if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg)) 1619bf215546Sopenharmony_ci return reg; 1620bf215546Sopenharmony_ci } 1621bf215546Sopenharmony_ci } 1622bf215546Sopenharmony_ci offset += def.bytes(); 1623bf215546Sopenharmony_ci } 1624bf215546Sopenharmony_ci } 1625bf215546Sopenharmony_ci 1626bf215546Sopenharmony_ci if (ctx.assignments[temp.id()].affinity) { 1627bf215546Sopenharmony_ci assignment& affinity = ctx.assignments[ctx.assignments[temp.id()].affinity]; 1628bf215546Sopenharmony_ci if (affinity.assigned) { 1629bf215546Sopenharmony_ci if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, affinity.reg)) 1630bf215546Sopenharmony_ci return affinity.reg; 1631bf215546Sopenharmony_ci } 1632bf215546Sopenharmony_ci } 1633bf215546Sopenharmony_ci if (ctx.assignments[temp.id()].vcc) { 1634bf215546Sopenharmony_ci if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, vcc)) 1635bf215546Sopenharmony_ci return vcc; 1636bf215546Sopenharmony_ci } 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci std::pair<PhysReg, bool> res; 1639bf215546Sopenharmony_ci 1640bf215546Sopenharmony_ci if (ctx.vectors.find(temp.id()) != ctx.vectors.end()) { 1641bf215546Sopenharmony_ci res = get_reg_vector(ctx, reg_file, temp, instr); 1642bf215546Sopenharmony_ci if (res.second) 1643bf215546Sopenharmony_ci return res.first; 1644bf215546Sopenharmony_ci } 1645bf215546Sopenharmony_ci 1646bf215546Sopenharmony_ci DefInfo info(ctx, instr, temp.regClass(), operand_index); 1647bf215546Sopenharmony_ci 1648bf215546Sopenharmony_ci if (!ctx.policy.skip_optimistic_path) { 1649bf215546Sopenharmony_ci /* try to find space without live-range splits */ 1650bf215546Sopenharmony_ci res = get_reg_simple(ctx, reg_file, info); 1651bf215546Sopenharmony_ci 1652bf215546Sopenharmony_ci if (res.second) 1653bf215546Sopenharmony_ci return res.first; 1654bf215546Sopenharmony_ci } 1655bf215546Sopenharmony_ci 1656bf215546Sopenharmony_ci /* try to find space with live-range splits */ 1657bf215546Sopenharmony_ci res = get_reg_impl(ctx, reg_file, parallelcopies, info, instr); 1658bf215546Sopenharmony_ci 1659bf215546Sopenharmony_ci if (res.second) 1660bf215546Sopenharmony_ci return res.first; 1661bf215546Sopenharmony_ci 1662bf215546Sopenharmony_ci /* try using more registers */ 1663bf215546Sopenharmony_ci 1664bf215546Sopenharmony_ci /* We should only fail here because keeping under the limit would require 1665bf215546Sopenharmony_ci * too many moves. */ 1666bf215546Sopenharmony_ci assert(reg_file.count_zero(info.bounds) >= info.size); 1667bf215546Sopenharmony_ci 1668bf215546Sopenharmony_ci if (!increase_register_file(ctx, info.rc.type())) { 1669bf215546Sopenharmony_ci /* fallback algorithm: reallocate all variables at once */ 1670bf215546Sopenharmony_ci unsigned def_size = info.rc.size(); 1671bf215546Sopenharmony_ci for (Definition def : instr->definitions) { 1672bf215546Sopenharmony_ci if (ctx.assignments[def.tempId()].assigned && def.regClass().type() == info.rc.type()) 1673bf215546Sopenharmony_ci def_size += def.regClass().size(); 1674bf215546Sopenharmony_ci } 1675bf215546Sopenharmony_ci 1676bf215546Sopenharmony_ci unsigned killed_op_size = 0; 1677bf215546Sopenharmony_ci for (Operand op : instr->operands) { 1678bf215546Sopenharmony_ci if (op.isTemp() && op.isKillBeforeDef() && op.regClass().type() == info.rc.type()) 1679bf215546Sopenharmony_ci killed_op_size += op.regClass().size(); 1680bf215546Sopenharmony_ci } 1681bf215546Sopenharmony_ci 1682bf215546Sopenharmony_ci const PhysRegInterval regs = get_reg_bounds(ctx.program, info.rc.type()); 1683bf215546Sopenharmony_ci 1684bf215546Sopenharmony_ci /* reallocate passthrough variables and non-killed operands */ 1685bf215546Sopenharmony_ci std::vector<IDAndRegClass> vars; 1686bf215546Sopenharmony_ci for (unsigned id : find_vars(ctx, reg_file, regs)) 1687bf215546Sopenharmony_ci vars.emplace_back(id, ctx.assignments[id].rc); 1688bf215546Sopenharmony_ci vars.emplace_back(0xffffffff, RegClass(info.rc.type(), MAX2(def_size, killed_op_size))); 1689bf215546Sopenharmony_ci 1690bf215546Sopenharmony_ci PhysReg space = compact_relocate_vars(ctx, vars, parallelcopies, regs.lo()); 1691bf215546Sopenharmony_ci 1692bf215546Sopenharmony_ci /* reallocate killed operands */ 1693bf215546Sopenharmony_ci std::vector<IDAndRegClass> killed_op_vars; 1694bf215546Sopenharmony_ci for (Operand op : instr->operands) { 1695bf215546Sopenharmony_ci if (op.isKillBeforeDef() && op.regClass().type() == info.rc.type()) 1696bf215546Sopenharmony_ci killed_op_vars.emplace_back(op.tempId(), op.regClass()); 1697bf215546Sopenharmony_ci } 1698bf215546Sopenharmony_ci compact_relocate_vars(ctx, killed_op_vars, parallelcopies, space); 1699bf215546Sopenharmony_ci 1700bf215546Sopenharmony_ci /* reallocate definitions */ 1701bf215546Sopenharmony_ci std::vector<IDAndRegClass> def_vars; 1702bf215546Sopenharmony_ci for (Definition def : instr->definitions) { 1703bf215546Sopenharmony_ci if (ctx.assignments[def.tempId()].assigned && def.regClass().type() == info.rc.type()) 1704bf215546Sopenharmony_ci def_vars.emplace_back(def.tempId(), def.regClass()); 1705bf215546Sopenharmony_ci } 1706bf215546Sopenharmony_ci def_vars.emplace_back(0xffffffff, info.rc); 1707bf215546Sopenharmony_ci return compact_relocate_vars(ctx, def_vars, parallelcopies, space); 1708bf215546Sopenharmony_ci } 1709bf215546Sopenharmony_ci 1710bf215546Sopenharmony_ci return get_reg(ctx, reg_file, temp, parallelcopies, instr, operand_index); 1711bf215546Sopenharmony_ci} 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ciPhysReg 1714bf215546Sopenharmony_ciget_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, 1715bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>>& parallelcopies, 1716bf215546Sopenharmony_ci aco_ptr<Instruction>& instr) 1717bf215546Sopenharmony_ci{ 1718bf215546Sopenharmony_ci RegClass rc = temp.regClass(); 1719bf215546Sopenharmony_ci /* create_vector instructions have different costs w.r.t. register coalescing */ 1720bf215546Sopenharmony_ci uint32_t size = rc.size(); 1721bf215546Sopenharmony_ci uint32_t bytes = rc.bytes(); 1722bf215546Sopenharmony_ci uint32_t stride = get_stride(rc); 1723bf215546Sopenharmony_ci PhysRegInterval bounds = get_reg_bounds(ctx.program, rc.type()); 1724bf215546Sopenharmony_ci 1725bf215546Sopenharmony_ci // TODO: improve p_create_vector for sub-dword vectors 1726bf215546Sopenharmony_ci 1727bf215546Sopenharmony_ci PhysReg best_pos{0xFFF}; 1728bf215546Sopenharmony_ci unsigned num_moves = 0xFF; 1729bf215546Sopenharmony_ci bool best_avoid = true; 1730bf215546Sopenharmony_ci uint32_t correct_pos_mask = 0; 1731bf215546Sopenharmony_ci 1732bf215546Sopenharmony_ci /* test for each operand which definition placement causes the least shuffle instructions */ 1733bf215546Sopenharmony_ci for (unsigned i = 0, offset = 0; i < instr->operands.size(); 1734bf215546Sopenharmony_ci offset += instr->operands[i].bytes(), i++) { 1735bf215546Sopenharmony_ci // TODO: think about, if we can alias live operands on the same register 1736bf215546Sopenharmony_ci if (!instr->operands[i].isTemp() || !instr->operands[i].isKillBeforeDef() || 1737bf215546Sopenharmony_ci instr->operands[i].getTemp().type() != rc.type()) 1738bf215546Sopenharmony_ci continue; 1739bf215546Sopenharmony_ci 1740bf215546Sopenharmony_ci if (offset > instr->operands[i].physReg().reg_b) 1741bf215546Sopenharmony_ci continue; 1742bf215546Sopenharmony_ci 1743bf215546Sopenharmony_ci unsigned reg_lower = instr->operands[i].physReg().reg_b - offset; 1744bf215546Sopenharmony_ci if (reg_lower % 4) 1745bf215546Sopenharmony_ci continue; 1746bf215546Sopenharmony_ci PhysRegInterval reg_win = {PhysReg{reg_lower / 4}, size}; 1747bf215546Sopenharmony_ci unsigned k = 0; 1748bf215546Sopenharmony_ci 1749bf215546Sopenharmony_ci /* no need to check multiple times */ 1750bf215546Sopenharmony_ci if (reg_win.lo() == best_pos) 1751bf215546Sopenharmony_ci continue; 1752bf215546Sopenharmony_ci 1753bf215546Sopenharmony_ci /* check borders */ 1754bf215546Sopenharmony_ci // TODO: this can be improved */ 1755bf215546Sopenharmony_ci if (!bounds.contains(reg_win) || reg_win.lo() % stride != 0) 1756bf215546Sopenharmony_ci continue; 1757bf215546Sopenharmony_ci if (reg_win.lo() > bounds.lo() && reg_file[reg_win.lo()] != 0 && 1758bf215546Sopenharmony_ci reg_file.get_id(reg_win.lo()) == reg_file.get_id(reg_win.lo().advance(-1))) 1759bf215546Sopenharmony_ci continue; 1760bf215546Sopenharmony_ci if (reg_win.hi() < bounds.hi() && reg_file[reg_win.hi().advance(-4)] != 0 && 1761bf215546Sopenharmony_ci reg_file.get_id(reg_win.hi().advance(-1)) == reg_file.get_id(reg_win.hi())) 1762bf215546Sopenharmony_ci continue; 1763bf215546Sopenharmony_ci 1764bf215546Sopenharmony_ci /* count variables to be moved and check "avoid" */ 1765bf215546Sopenharmony_ci bool avoid = false; 1766bf215546Sopenharmony_ci bool linear_vgpr = false; 1767bf215546Sopenharmony_ci for (PhysReg j : reg_win) { 1768bf215546Sopenharmony_ci if (reg_file[j] != 0) { 1769bf215546Sopenharmony_ci if (reg_file[j] == 0xF0000000) { 1770bf215546Sopenharmony_ci PhysReg reg; 1771bf215546Sopenharmony_ci reg.reg_b = j * 4; 1772bf215546Sopenharmony_ci unsigned bytes_left = bytes - ((unsigned)j - reg_win.lo()) * 4; 1773bf215546Sopenharmony_ci for (unsigned byte_idx = 0; byte_idx < MIN2(bytes_left, 4); byte_idx++, reg.reg_b++) 1774bf215546Sopenharmony_ci k += reg_file.test(reg, 1); 1775bf215546Sopenharmony_ci } else { 1776bf215546Sopenharmony_ci k += 4; 1777bf215546Sopenharmony_ci linear_vgpr |= ctx.assignments[reg_file[j]].rc.is_linear_vgpr(); 1778bf215546Sopenharmony_ci } 1779bf215546Sopenharmony_ci } 1780bf215546Sopenharmony_ci avoid |= ctx.war_hint[j]; 1781bf215546Sopenharmony_ci } 1782bf215546Sopenharmony_ci 1783bf215546Sopenharmony_ci if (linear_vgpr) { 1784bf215546Sopenharmony_ci /* we cannot split live ranges of linear vgprs inside control flow */ 1785bf215546Sopenharmony_ci if (ctx.block->kind & block_kind_top_level) 1786bf215546Sopenharmony_ci avoid = true; 1787bf215546Sopenharmony_ci else 1788bf215546Sopenharmony_ci continue; 1789bf215546Sopenharmony_ci } 1790bf215546Sopenharmony_ci 1791bf215546Sopenharmony_ci if (avoid && !best_avoid) 1792bf215546Sopenharmony_ci continue; 1793bf215546Sopenharmony_ci 1794bf215546Sopenharmony_ci /* count operands in wrong positions */ 1795bf215546Sopenharmony_ci uint32_t correct_pos_mask_new = 0; 1796bf215546Sopenharmony_ci for (unsigned j = 0, offset2 = 0; j < instr->operands.size(); 1797bf215546Sopenharmony_ci offset2 += instr->operands[j].bytes(), j++) { 1798bf215546Sopenharmony_ci Operand& op = instr->operands[j]; 1799bf215546Sopenharmony_ci if (op.isTemp() && op.physReg().reg_b == reg_win.lo() * 4 + offset2) 1800bf215546Sopenharmony_ci correct_pos_mask_new |= 1 << j; 1801bf215546Sopenharmony_ci else 1802bf215546Sopenharmony_ci k += op.bytes(); 1803bf215546Sopenharmony_ci } 1804bf215546Sopenharmony_ci bool aligned = rc == RegClass::v4 && reg_win.lo() % 4 == 0; 1805bf215546Sopenharmony_ci if (k > num_moves || (!aligned && k == num_moves)) 1806bf215546Sopenharmony_ci continue; 1807bf215546Sopenharmony_ci 1808bf215546Sopenharmony_ci best_pos = reg_win.lo(); 1809bf215546Sopenharmony_ci num_moves = k; 1810bf215546Sopenharmony_ci best_avoid = avoid; 1811bf215546Sopenharmony_ci correct_pos_mask = correct_pos_mask_new; 1812bf215546Sopenharmony_ci } 1813bf215546Sopenharmony_ci 1814bf215546Sopenharmony_ci /* too many moves: try the generic get_reg() function */ 1815bf215546Sopenharmony_ci if (num_moves >= 2 * bytes) { 1816bf215546Sopenharmony_ci return get_reg(ctx, reg_file, temp, parallelcopies, instr); 1817bf215546Sopenharmony_ci } else if (num_moves > bytes) { 1818bf215546Sopenharmony_ci DefInfo info(ctx, instr, rc, -1); 1819bf215546Sopenharmony_ci std::pair<PhysReg, bool> res = get_reg_simple(ctx, reg_file, info); 1820bf215546Sopenharmony_ci if (res.second) 1821bf215546Sopenharmony_ci return res.first; 1822bf215546Sopenharmony_ci } 1823bf215546Sopenharmony_ci 1824bf215546Sopenharmony_ci /* re-enable killed operands which are in the wrong position */ 1825bf215546Sopenharmony_ci RegisterFile tmp_file(reg_file); 1826bf215546Sopenharmony_ci for (Operand& op : instr->operands) { 1827bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKillBeforeDef()) 1828bf215546Sopenharmony_ci tmp_file.fill(op); 1829bf215546Sopenharmony_ci } 1830bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 1831bf215546Sopenharmony_ci if ((correct_pos_mask >> i) & 1u && instr->operands[i].isKill()) 1832bf215546Sopenharmony_ci tmp_file.clear(instr->operands[i]); 1833bf215546Sopenharmony_ci } 1834bf215546Sopenharmony_ci 1835bf215546Sopenharmony_ci /* collect variables to be moved */ 1836bf215546Sopenharmony_ci std::vector<unsigned> vars = collect_vars(ctx, tmp_file, PhysRegInterval{best_pos, size}); 1837bf215546Sopenharmony_ci 1838bf215546Sopenharmony_ci bool success = false; 1839bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>> pc; 1840bf215546Sopenharmony_ci success = 1841bf215546Sopenharmony_ci get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, PhysRegInterval{best_pos, size}); 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_ci if (!success) { 1844bf215546Sopenharmony_ci if (!increase_register_file(ctx, temp.type())) { 1845bf215546Sopenharmony_ci /* use the fallback algorithm in get_reg() */ 1846bf215546Sopenharmony_ci return get_reg(ctx, reg_file, temp, parallelcopies, instr); 1847bf215546Sopenharmony_ci } 1848bf215546Sopenharmony_ci return get_reg_create_vector(ctx, reg_file, temp, parallelcopies, instr); 1849bf215546Sopenharmony_ci } 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end()); 1852bf215546Sopenharmony_ci adjust_max_used_regs(ctx, rc, best_pos); 1853bf215546Sopenharmony_ci 1854bf215546Sopenharmony_ci return best_pos; 1855bf215546Sopenharmony_ci} 1856bf215546Sopenharmony_ci 1857bf215546Sopenharmony_civoid 1858bf215546Sopenharmony_cihandle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr) 1859bf215546Sopenharmony_ci{ 1860bf215546Sopenharmony_ci if (instr->format != Format::PSEUDO) 1861bf215546Sopenharmony_ci return; 1862bf215546Sopenharmony_ci 1863bf215546Sopenharmony_ci /* all instructions which use handle_operands() need this information */ 1864bf215546Sopenharmony_ci switch (instr->opcode) { 1865bf215546Sopenharmony_ci case aco_opcode::p_extract_vector: 1866bf215546Sopenharmony_ci case aco_opcode::p_create_vector: 1867bf215546Sopenharmony_ci case aco_opcode::p_split_vector: 1868bf215546Sopenharmony_ci case aco_opcode::p_parallelcopy: 1869bf215546Sopenharmony_ci case aco_opcode::p_wqm: break; 1870bf215546Sopenharmony_ci default: return; 1871bf215546Sopenharmony_ci } 1872bf215546Sopenharmony_ci 1873bf215546Sopenharmony_ci bool writes_linear = false; 1874bf215546Sopenharmony_ci /* if all definitions are logical vgpr, no need to care for SCC */ 1875bf215546Sopenharmony_ci for (Definition& def : instr->definitions) { 1876bf215546Sopenharmony_ci if (def.getTemp().regClass().is_linear()) 1877bf215546Sopenharmony_ci writes_linear = true; 1878bf215546Sopenharmony_ci } 1879bf215546Sopenharmony_ci /* if all operands are constant, no need to care either */ 1880bf215546Sopenharmony_ci bool reads_linear = false; 1881bf215546Sopenharmony_ci bool reads_subdword = false; 1882bf215546Sopenharmony_ci for (Operand& op : instr->operands) { 1883bf215546Sopenharmony_ci if (op.isTemp() && op.getTemp().regClass().is_linear()) 1884bf215546Sopenharmony_ci reads_linear = true; 1885bf215546Sopenharmony_ci if (op.isTemp() && op.regClass().is_subdword()) 1886bf215546Sopenharmony_ci reads_subdword = true; 1887bf215546Sopenharmony_ci } 1888bf215546Sopenharmony_ci bool needs_scratch_reg = (writes_linear && reads_linear && reg_file[scc]) || 1889bf215546Sopenharmony_ci (ctx.program->gfx_level <= GFX7 && reads_subdword); 1890bf215546Sopenharmony_ci if (!needs_scratch_reg) 1891bf215546Sopenharmony_ci return; 1892bf215546Sopenharmony_ci 1893bf215546Sopenharmony_ci instr->pseudo().tmp_in_scc = reg_file[scc]; 1894bf215546Sopenharmony_ci 1895bf215546Sopenharmony_ci int reg = ctx.max_used_sgpr; 1896bf215546Sopenharmony_ci for (; reg >= 0 && reg_file[PhysReg{(unsigned)reg}]; reg--) 1897bf215546Sopenharmony_ci ; 1898bf215546Sopenharmony_ci if (reg < 0) { 1899bf215546Sopenharmony_ci reg = ctx.max_used_sgpr + 1; 1900bf215546Sopenharmony_ci for (; reg < ctx.program->max_reg_demand.sgpr && reg_file[PhysReg{(unsigned)reg}]; reg++) 1901bf215546Sopenharmony_ci ; 1902bf215546Sopenharmony_ci if (reg == ctx.program->max_reg_demand.sgpr) { 1903bf215546Sopenharmony_ci assert(reads_subdword && reg_file[m0] == 0); 1904bf215546Sopenharmony_ci reg = m0; 1905bf215546Sopenharmony_ci } 1906bf215546Sopenharmony_ci } 1907bf215546Sopenharmony_ci 1908bf215546Sopenharmony_ci adjust_max_used_regs(ctx, s1, reg); 1909bf215546Sopenharmony_ci instr->pseudo().scratch_sgpr = PhysReg{(unsigned)reg}; 1910bf215546Sopenharmony_ci} 1911bf215546Sopenharmony_ci 1912bf215546Sopenharmony_cibool 1913bf215546Sopenharmony_cioperand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg, 1914bf215546Sopenharmony_ci RegClass rc) 1915bf215546Sopenharmony_ci{ 1916bf215546Sopenharmony_ci if (instr->operands[idx].isFixed()) 1917bf215546Sopenharmony_ci return instr->operands[idx].physReg() == reg; 1918bf215546Sopenharmony_ci 1919bf215546Sopenharmony_ci bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 || 1920bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_writelane_b32_e64; 1921bf215546Sopenharmony_ci if (gfx_level <= GFX9 && is_writelane && idx <= 1) { 1922bf215546Sopenharmony_ci /* v_writelane_b32 can take two sgprs but only if one is m0. */ 1923bf215546Sopenharmony_ci bool is_other_sgpr = 1924bf215546Sopenharmony_ci instr->operands[!idx].isTemp() && 1925bf215546Sopenharmony_ci (!instr->operands[!idx].isFixed() || instr->operands[!idx].physReg() != m0); 1926bf215546Sopenharmony_ci if (is_other_sgpr && instr->operands[!idx].tempId() != instr->operands[idx].tempId()) { 1927bf215546Sopenharmony_ci instr->operands[idx].setFixed(m0); 1928bf215546Sopenharmony_ci return reg == m0; 1929bf215546Sopenharmony_ci } 1930bf215546Sopenharmony_ci } 1931bf215546Sopenharmony_ci 1932bf215546Sopenharmony_ci if (reg.byte()) { 1933bf215546Sopenharmony_ci unsigned stride = get_subdword_operand_stride(gfx_level, instr, idx, rc); 1934bf215546Sopenharmony_ci if (reg.byte() % stride) 1935bf215546Sopenharmony_ci return false; 1936bf215546Sopenharmony_ci } 1937bf215546Sopenharmony_ci 1938bf215546Sopenharmony_ci switch (instr->format) { 1939bf215546Sopenharmony_ci case Format::SMEM: 1940bf215546Sopenharmony_ci return reg != scc && reg != exec && 1941bf215546Sopenharmony_ci (reg != m0 || idx == 1 || idx == 3) && /* offset can be m0 */ 1942bf215546Sopenharmony_ci (reg != vcc || (instr->definitions.empty() && idx == 2) || 1943bf215546Sopenharmony_ci gfx_level >= GFX10); /* sdata can be vcc */ 1944bf215546Sopenharmony_ci default: 1945bf215546Sopenharmony_ci // TODO: there are more instructions with restrictions on registers 1946bf215546Sopenharmony_ci return true; 1947bf215546Sopenharmony_ci } 1948bf215546Sopenharmony_ci} 1949bf215546Sopenharmony_ci 1950bf215546Sopenharmony_civoid 1951bf215546Sopenharmony_ciget_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file, 1952bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>>& parallelcopy, 1953bf215546Sopenharmony_ci aco_ptr<Instruction>& instr, Operand& operand, unsigned operand_index) 1954bf215546Sopenharmony_ci{ 1955bf215546Sopenharmony_ci /* check if the operand is fixed */ 1956bf215546Sopenharmony_ci PhysReg src = ctx.assignments[operand.tempId()].reg; 1957bf215546Sopenharmony_ci PhysReg dst; 1958bf215546Sopenharmony_ci if (operand.isFixed()) { 1959bf215546Sopenharmony_ci assert(operand.physReg() != src); 1960bf215546Sopenharmony_ci 1961bf215546Sopenharmony_ci /* check if target reg is blocked, and move away the blocking var */ 1962bf215546Sopenharmony_ci if (register_file.test(operand.physReg(), operand.bytes())) { 1963bf215546Sopenharmony_ci PhysRegInterval target{operand.physReg(), operand.size()}; 1964bf215546Sopenharmony_ci 1965bf215546Sopenharmony_ci RegisterFile tmp_file(register_file); 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_ci std::vector<unsigned> blocking_vars = collect_vars(ctx, tmp_file, target); 1968bf215546Sopenharmony_ci 1969bf215546Sopenharmony_ci tmp_file.clear(src, operand.regClass()); // TODO: try to avoid moving block vars to src 1970bf215546Sopenharmony_ci tmp_file.block(operand.physReg(), operand.regClass()); 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci DefInfo info(ctx, instr, operand.regClass(), -1); 1973bf215546Sopenharmony_ci get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, info.bounds, instr, 1974bf215546Sopenharmony_ci PhysRegInterval()); 1975bf215546Sopenharmony_ci } 1976bf215546Sopenharmony_ci dst = operand.physReg(); 1977bf215546Sopenharmony_ci 1978bf215546Sopenharmony_ci } else { 1979bf215546Sopenharmony_ci /* clear the operand in case it's only a stride mismatch */ 1980bf215546Sopenharmony_ci register_file.clear(src, operand.regClass()); 1981bf215546Sopenharmony_ci dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index); 1982bf215546Sopenharmony_ci } 1983bf215546Sopenharmony_ci 1984bf215546Sopenharmony_ci Operand pc_op = operand; 1985bf215546Sopenharmony_ci pc_op.setFixed(src); 1986bf215546Sopenharmony_ci Definition pc_def = Definition(dst, pc_op.regClass()); 1987bf215546Sopenharmony_ci parallelcopy.emplace_back(pc_op, pc_def); 1988bf215546Sopenharmony_ci update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops | fill_killed_ops); 1989bf215546Sopenharmony_ci} 1990bf215546Sopenharmony_ci 1991bf215546Sopenharmony_ciPhysReg 1992bf215546Sopenharmony_ciget_reg_phi(ra_ctx& ctx, IDSet& live_in, RegisterFile& register_file, 1993bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>& instructions, Block& block, 1994bf215546Sopenharmony_ci aco_ptr<Instruction>& phi, Temp tmp) 1995bf215546Sopenharmony_ci{ 1996bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>> parallelcopy; 1997bf215546Sopenharmony_ci PhysReg reg = get_reg(ctx, register_file, tmp, parallelcopy, phi); 1998bf215546Sopenharmony_ci update_renames(ctx, register_file, parallelcopy, phi, rename_not_killed_ops); 1999bf215546Sopenharmony_ci 2000bf215546Sopenharmony_ci /* process parallelcopy */ 2001bf215546Sopenharmony_ci for (std::pair<Operand, Definition> pc : parallelcopy) { 2002bf215546Sopenharmony_ci /* see if it's a copy from a different phi */ 2003bf215546Sopenharmony_ci // TODO: prefer moving some previous phis over live-ins 2004bf215546Sopenharmony_ci // TODO: somehow prevent phis fixed before the RA from being updated (shouldn't be a 2005bf215546Sopenharmony_ci // problem in practice since they can only be fixed to exec) 2006bf215546Sopenharmony_ci Instruction* prev_phi = NULL; 2007bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>::iterator phi_it; 2008bf215546Sopenharmony_ci for (phi_it = instructions.begin(); phi_it != instructions.end(); ++phi_it) { 2009bf215546Sopenharmony_ci if ((*phi_it)->definitions[0].tempId() == pc.first.tempId()) 2010bf215546Sopenharmony_ci prev_phi = phi_it->get(); 2011bf215546Sopenharmony_ci } 2012bf215546Sopenharmony_ci if (prev_phi) { 2013bf215546Sopenharmony_ci /* if so, just update that phi's register */ 2014bf215546Sopenharmony_ci prev_phi->definitions[0].setFixed(pc.second.physReg()); 2015bf215546Sopenharmony_ci register_file.fill(prev_phi->definitions[0]); 2016bf215546Sopenharmony_ci ctx.assignments[prev_phi->definitions[0].tempId()] = {pc.second.physReg(), 2017bf215546Sopenharmony_ci pc.second.regClass()}; 2018bf215546Sopenharmony_ci continue; 2019bf215546Sopenharmony_ci } 2020bf215546Sopenharmony_ci 2021bf215546Sopenharmony_ci /* rename */ 2022bf215546Sopenharmony_ci std::unordered_map<unsigned, Temp>::iterator orig_it = ctx.orig_names.find(pc.first.tempId()); 2023bf215546Sopenharmony_ci Temp orig = pc.first.getTemp(); 2024bf215546Sopenharmony_ci if (orig_it != ctx.orig_names.end()) 2025bf215546Sopenharmony_ci orig = orig_it->second; 2026bf215546Sopenharmony_ci else 2027bf215546Sopenharmony_ci ctx.orig_names[pc.second.tempId()] = orig; 2028bf215546Sopenharmony_ci ctx.renames[block.index][orig.id()] = pc.second.getTemp(); 2029bf215546Sopenharmony_ci 2030bf215546Sopenharmony_ci /* otherwise, this is a live-in and we need to create a new phi 2031bf215546Sopenharmony_ci * to move it in this block's predecessors */ 2032bf215546Sopenharmony_ci aco_opcode opcode = 2033bf215546Sopenharmony_ci pc.first.getTemp().is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi; 2034bf215546Sopenharmony_ci std::vector<unsigned>& preds = 2035bf215546Sopenharmony_ci pc.first.getTemp().is_linear() ? block.linear_preds : block.logical_preds; 2036bf215546Sopenharmony_ci aco_ptr<Instruction> new_phi{ 2037bf215546Sopenharmony_ci create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)}; 2038bf215546Sopenharmony_ci new_phi->definitions[0] = pc.second; 2039bf215546Sopenharmony_ci for (unsigned i = 0; i < preds.size(); i++) 2040bf215546Sopenharmony_ci new_phi->operands[i] = Operand(pc.first); 2041bf215546Sopenharmony_ci instructions.emplace_back(std::move(new_phi)); 2042bf215546Sopenharmony_ci 2043bf215546Sopenharmony_ci /* Remove from live_in, because handle_loop_phis() would re-create this phi later if this is 2044bf215546Sopenharmony_ci * a loop header. 2045bf215546Sopenharmony_ci */ 2046bf215546Sopenharmony_ci live_in.erase(orig.id()); 2047bf215546Sopenharmony_ci } 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_ci return reg; 2050bf215546Sopenharmony_ci} 2051bf215546Sopenharmony_ci 2052bf215546Sopenharmony_civoid 2053bf215546Sopenharmony_ciget_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file, 2054bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>& instructions, IDSet& live_in) 2055bf215546Sopenharmony_ci{ 2056bf215546Sopenharmony_ci /* move all phis to instructions */ 2057bf215546Sopenharmony_ci for (aco_ptr<Instruction>& phi : block.instructions) { 2058bf215546Sopenharmony_ci if (!is_phi(phi)) 2059bf215546Sopenharmony_ci break; 2060bf215546Sopenharmony_ci if (!phi->definitions[0].isKill()) 2061bf215546Sopenharmony_ci instructions.emplace_back(std::move(phi)); 2062bf215546Sopenharmony_ci } 2063bf215546Sopenharmony_ci 2064bf215546Sopenharmony_ci /* assign phis with all-matching registers to that register */ 2065bf215546Sopenharmony_ci for (aco_ptr<Instruction>& phi : instructions) { 2066bf215546Sopenharmony_ci Definition& definition = phi->definitions[0]; 2067bf215546Sopenharmony_ci if (definition.isFixed()) 2068bf215546Sopenharmony_ci continue; 2069bf215546Sopenharmony_ci 2070bf215546Sopenharmony_ci if (!phi->operands[0].isTemp()) 2071bf215546Sopenharmony_ci continue; 2072bf215546Sopenharmony_ci 2073bf215546Sopenharmony_ci PhysReg reg = phi->operands[0].physReg(); 2074bf215546Sopenharmony_ci auto OpsSame = [=](const Operand& op) -> bool 2075bf215546Sopenharmony_ci { return op.isTemp() && (!op.isFixed() || op.physReg() == reg); }; 2076bf215546Sopenharmony_ci bool all_same = std::all_of(phi->operands.cbegin() + 1, phi->operands.cend(), OpsSame); 2077bf215546Sopenharmony_ci if (!all_same) 2078bf215546Sopenharmony_ci continue; 2079bf215546Sopenharmony_ci 2080bf215546Sopenharmony_ci if (!get_reg_specified(ctx, register_file, definition.regClass(), phi, reg)) 2081bf215546Sopenharmony_ci continue; 2082bf215546Sopenharmony_ci 2083bf215546Sopenharmony_ci definition.setFixed(reg); 2084bf215546Sopenharmony_ci register_file.fill(definition); 2085bf215546Sopenharmony_ci ctx.assignments[definition.tempId()].set(definition); 2086bf215546Sopenharmony_ci } 2087bf215546Sopenharmony_ci 2088bf215546Sopenharmony_ci /* try to find a register that is used by at least one operand */ 2089bf215546Sopenharmony_ci for (aco_ptr<Instruction>& phi : instructions) { 2090bf215546Sopenharmony_ci Definition& definition = phi->definitions[0]; 2091bf215546Sopenharmony_ci if (definition.isFixed()) 2092bf215546Sopenharmony_ci continue; 2093bf215546Sopenharmony_ci 2094bf215546Sopenharmony_ci /* use affinity if available */ 2095bf215546Sopenharmony_ci if (ctx.assignments[definition.tempId()].affinity && 2096bf215546Sopenharmony_ci ctx.assignments[ctx.assignments[definition.tempId()].affinity].assigned) { 2097bf215546Sopenharmony_ci assignment& affinity = ctx.assignments[ctx.assignments[definition.tempId()].affinity]; 2098bf215546Sopenharmony_ci assert(affinity.rc == definition.regClass()); 2099bf215546Sopenharmony_ci if (get_reg_specified(ctx, register_file, definition.regClass(), phi, affinity.reg)) { 2100bf215546Sopenharmony_ci definition.setFixed(affinity.reg); 2101bf215546Sopenharmony_ci register_file.fill(definition); 2102bf215546Sopenharmony_ci ctx.assignments[definition.tempId()].set(definition); 2103bf215546Sopenharmony_ci continue; 2104bf215546Sopenharmony_ci } 2105bf215546Sopenharmony_ci } 2106bf215546Sopenharmony_ci 2107bf215546Sopenharmony_ci /* by going backwards, we aim to avoid copies in else-blocks */ 2108bf215546Sopenharmony_ci for (int i = phi->operands.size() - 1; i >= 0; i--) { 2109bf215546Sopenharmony_ci const Operand& op = phi->operands[i]; 2110bf215546Sopenharmony_ci if (!op.isTemp() || !op.isFixed()) 2111bf215546Sopenharmony_ci continue; 2112bf215546Sopenharmony_ci 2113bf215546Sopenharmony_ci PhysReg reg = op.physReg(); 2114bf215546Sopenharmony_ci if (get_reg_specified(ctx, register_file, definition.regClass(), phi, reg)) { 2115bf215546Sopenharmony_ci definition.setFixed(reg); 2116bf215546Sopenharmony_ci register_file.fill(definition); 2117bf215546Sopenharmony_ci ctx.assignments[definition.tempId()].set(definition); 2118bf215546Sopenharmony_ci break; 2119bf215546Sopenharmony_ci } 2120bf215546Sopenharmony_ci } 2121bf215546Sopenharmony_ci } 2122bf215546Sopenharmony_ci 2123bf215546Sopenharmony_ci /* find registers for phis where the register was blocked or no operand was assigned */ 2124bf215546Sopenharmony_ci 2125bf215546Sopenharmony_ci /* Don't use iterators because get_reg_phi() can add phis to the end of the vector. */ 2126bf215546Sopenharmony_ci for (unsigned i = 0; i < instructions.size(); i++) { 2127bf215546Sopenharmony_ci aco_ptr<Instruction>& phi = instructions[i]; 2128bf215546Sopenharmony_ci Definition& definition = phi->definitions[0]; 2129bf215546Sopenharmony_ci if (definition.isFixed()) 2130bf215546Sopenharmony_ci continue; 2131bf215546Sopenharmony_ci 2132bf215546Sopenharmony_ci definition.setFixed( 2133bf215546Sopenharmony_ci get_reg_phi(ctx, live_in, register_file, instructions, block, phi, definition.getTemp())); 2134bf215546Sopenharmony_ci 2135bf215546Sopenharmony_ci register_file.fill(definition); 2136bf215546Sopenharmony_ci ctx.assignments[definition.tempId()].set(definition); 2137bf215546Sopenharmony_ci } 2138bf215546Sopenharmony_ci} 2139bf215546Sopenharmony_ci 2140bf215546Sopenharmony_ciTemp 2141bf215546Sopenharmony_ciread_variable(ra_ctx& ctx, Temp val, unsigned block_idx) 2142bf215546Sopenharmony_ci{ 2143bf215546Sopenharmony_ci std::unordered_map<unsigned, Temp>::iterator it = ctx.renames[block_idx].find(val.id()); 2144bf215546Sopenharmony_ci if (it == ctx.renames[block_idx].end()) 2145bf215546Sopenharmony_ci return val; 2146bf215546Sopenharmony_ci else 2147bf215546Sopenharmony_ci return it->second; 2148bf215546Sopenharmony_ci} 2149bf215546Sopenharmony_ci 2150bf215546Sopenharmony_ciTemp 2151bf215546Sopenharmony_cihandle_live_in(ra_ctx& ctx, Temp val, Block* block) 2152bf215546Sopenharmony_ci{ 2153bf215546Sopenharmony_ci std::vector<unsigned>& preds = val.is_linear() ? block->linear_preds : block->logical_preds; 2154bf215546Sopenharmony_ci if (preds.size() == 0) 2155bf215546Sopenharmony_ci return val; 2156bf215546Sopenharmony_ci 2157bf215546Sopenharmony_ci if (preds.size() == 1) { 2158bf215546Sopenharmony_ci /* if the block has only one predecessor, just look there for the name */ 2159bf215546Sopenharmony_ci return read_variable(ctx, val, preds[0]); 2160bf215546Sopenharmony_ci } 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_ci /* there are multiple predecessors and the block is sealed */ 2163bf215546Sopenharmony_ci Temp* const ops = (Temp*)alloca(preds.size() * sizeof(Temp)); 2164bf215546Sopenharmony_ci 2165bf215546Sopenharmony_ci /* get the rename from each predecessor and check if they are the same */ 2166bf215546Sopenharmony_ci Temp new_val; 2167bf215546Sopenharmony_ci bool needs_phi = false; 2168bf215546Sopenharmony_ci for (unsigned i = 0; i < preds.size(); i++) { 2169bf215546Sopenharmony_ci ops[i] = read_variable(ctx, val, preds[i]); 2170bf215546Sopenharmony_ci if (i == 0) 2171bf215546Sopenharmony_ci new_val = ops[i]; 2172bf215546Sopenharmony_ci else 2173bf215546Sopenharmony_ci needs_phi |= !(new_val == ops[i]); 2174bf215546Sopenharmony_ci } 2175bf215546Sopenharmony_ci 2176bf215546Sopenharmony_ci if (needs_phi) { 2177bf215546Sopenharmony_ci assert(!val.regClass().is_linear_vgpr()); 2178bf215546Sopenharmony_ci 2179bf215546Sopenharmony_ci /* the variable has been renamed differently in the predecessors: we need to insert a phi */ 2180bf215546Sopenharmony_ci aco_opcode opcode = val.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi; 2181bf215546Sopenharmony_ci aco_ptr<Instruction> phi{ 2182bf215546Sopenharmony_ci create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)}; 2183bf215546Sopenharmony_ci new_val = ctx.program->allocateTmp(val.regClass()); 2184bf215546Sopenharmony_ci phi->definitions[0] = Definition(new_val); 2185bf215546Sopenharmony_ci ctx.assignments.emplace_back(); 2186bf215546Sopenharmony_ci assert(ctx.assignments.size() == ctx.program->peekAllocationId()); 2187bf215546Sopenharmony_ci for (unsigned i = 0; i < preds.size(); i++) { 2188bf215546Sopenharmony_ci /* update the operands so that it uses the new affinity */ 2189bf215546Sopenharmony_ci phi->operands[i] = Operand(ops[i]); 2190bf215546Sopenharmony_ci assert(ctx.assignments[ops[i].id()].assigned); 2191bf215546Sopenharmony_ci assert(ops[i].regClass() == new_val.regClass()); 2192bf215546Sopenharmony_ci phi->operands[i].setFixed(ctx.assignments[ops[i].id()].reg); 2193bf215546Sopenharmony_ci } 2194bf215546Sopenharmony_ci block->instructions.insert(block->instructions.begin(), std::move(phi)); 2195bf215546Sopenharmony_ci } 2196bf215546Sopenharmony_ci 2197bf215546Sopenharmony_ci return new_val; 2198bf215546Sopenharmony_ci} 2199bf215546Sopenharmony_ci 2200bf215546Sopenharmony_civoid 2201bf215546Sopenharmony_cihandle_loop_phis(ra_ctx& ctx, const IDSet& live_in, uint32_t loop_header_idx, 2202bf215546Sopenharmony_ci uint32_t loop_exit_idx) 2203bf215546Sopenharmony_ci{ 2204bf215546Sopenharmony_ci Block& loop_header = ctx.program->blocks[loop_header_idx]; 2205bf215546Sopenharmony_ci std::unordered_map<unsigned, Temp> renames; 2206bf215546Sopenharmony_ci 2207bf215546Sopenharmony_ci /* create phis for variables renamed during the loop */ 2208bf215546Sopenharmony_ci for (unsigned t : live_in) { 2209bf215546Sopenharmony_ci Temp val = Temp(t, ctx.program->temp_rc[t]); 2210bf215546Sopenharmony_ci Temp prev = read_variable(ctx, val, loop_header_idx - 1); 2211bf215546Sopenharmony_ci Temp renamed = handle_live_in(ctx, val, &loop_header); 2212bf215546Sopenharmony_ci if (renamed == prev) 2213bf215546Sopenharmony_ci continue; 2214bf215546Sopenharmony_ci 2215bf215546Sopenharmony_ci /* insert additional renames at block end, but don't overwrite */ 2216bf215546Sopenharmony_ci renames[prev.id()] = renamed; 2217bf215546Sopenharmony_ci ctx.orig_names[renamed.id()] = val; 2218bf215546Sopenharmony_ci for (unsigned idx = loop_header_idx; idx < loop_exit_idx; idx++) { 2219bf215546Sopenharmony_ci auto it = ctx.renames[idx].emplace(val.id(), renamed); 2220bf215546Sopenharmony_ci /* if insertion is unsuccessful, update if necessary */ 2221bf215546Sopenharmony_ci if (!it.second && it.first->second == prev) 2222bf215546Sopenharmony_ci it.first->second = renamed; 2223bf215546Sopenharmony_ci } 2224bf215546Sopenharmony_ci 2225bf215546Sopenharmony_ci /* update loop-carried values of the phi created by handle_live_in() */ 2226bf215546Sopenharmony_ci for (unsigned i = 1; i < loop_header.instructions[0]->operands.size(); i++) { 2227bf215546Sopenharmony_ci Operand& op = loop_header.instructions[0]->operands[i]; 2228bf215546Sopenharmony_ci if (op.getTemp() == prev) 2229bf215546Sopenharmony_ci op.setTemp(renamed); 2230bf215546Sopenharmony_ci } 2231bf215546Sopenharmony_ci 2232bf215546Sopenharmony_ci /* use the assignment from the loop preheader and fix def reg */ 2233bf215546Sopenharmony_ci assignment& var = ctx.assignments[prev.id()]; 2234bf215546Sopenharmony_ci ctx.assignments[renamed.id()] = var; 2235bf215546Sopenharmony_ci loop_header.instructions[0]->definitions[0].setFixed(var.reg); 2236bf215546Sopenharmony_ci } 2237bf215546Sopenharmony_ci 2238bf215546Sopenharmony_ci /* rename loop carried phi operands */ 2239bf215546Sopenharmony_ci for (unsigned i = renames.size(); i < loop_header.instructions.size(); i++) { 2240bf215546Sopenharmony_ci aco_ptr<Instruction>& phi = loop_header.instructions[i]; 2241bf215546Sopenharmony_ci if (!is_phi(phi)) 2242bf215546Sopenharmony_ci break; 2243bf215546Sopenharmony_ci const std::vector<unsigned>& preds = 2244bf215546Sopenharmony_ci phi->opcode == aco_opcode::p_phi ? loop_header.logical_preds : loop_header.linear_preds; 2245bf215546Sopenharmony_ci for (unsigned j = 1; j < phi->operands.size(); j++) { 2246bf215546Sopenharmony_ci Operand& op = phi->operands[j]; 2247bf215546Sopenharmony_ci if (!op.isTemp()) 2248bf215546Sopenharmony_ci continue; 2249bf215546Sopenharmony_ci 2250bf215546Sopenharmony_ci /* Find the original name, since this operand might not use the original name if the phi 2251bf215546Sopenharmony_ci * was created after init_reg_file(). 2252bf215546Sopenharmony_ci */ 2253bf215546Sopenharmony_ci std::unordered_map<unsigned, Temp>::iterator it = ctx.orig_names.find(op.tempId()); 2254bf215546Sopenharmony_ci Temp orig = it != ctx.orig_names.end() ? it->second : op.getTemp(); 2255bf215546Sopenharmony_ci 2256bf215546Sopenharmony_ci op.setTemp(read_variable(ctx, orig, preds[j])); 2257bf215546Sopenharmony_ci op.setFixed(ctx.assignments[op.tempId()].reg); 2258bf215546Sopenharmony_ci } 2259bf215546Sopenharmony_ci } 2260bf215546Sopenharmony_ci 2261bf215546Sopenharmony_ci /* return early if no new phi was created */ 2262bf215546Sopenharmony_ci if (renames.empty()) 2263bf215546Sopenharmony_ci return; 2264bf215546Sopenharmony_ci 2265bf215546Sopenharmony_ci /* propagate new renames through loop */ 2266bf215546Sopenharmony_ci for (unsigned idx = loop_header_idx; idx < loop_exit_idx; idx++) { 2267bf215546Sopenharmony_ci Block& current = ctx.program->blocks[idx]; 2268bf215546Sopenharmony_ci /* rename all uses in this block */ 2269bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : current.instructions) { 2270bf215546Sopenharmony_ci /* phis are renamed after RA */ 2271bf215546Sopenharmony_ci if (idx == loop_header_idx && is_phi(instr)) 2272bf215546Sopenharmony_ci continue; 2273bf215546Sopenharmony_ci 2274bf215546Sopenharmony_ci for (Operand& op : instr->operands) { 2275bf215546Sopenharmony_ci if (!op.isTemp()) 2276bf215546Sopenharmony_ci continue; 2277bf215546Sopenharmony_ci 2278bf215546Sopenharmony_ci auto rename = renames.find(op.tempId()); 2279bf215546Sopenharmony_ci if (rename != renames.end()) { 2280bf215546Sopenharmony_ci assert(rename->second.id()); 2281bf215546Sopenharmony_ci op.setTemp(rename->second); 2282bf215546Sopenharmony_ci } 2283bf215546Sopenharmony_ci } 2284bf215546Sopenharmony_ci } 2285bf215546Sopenharmony_ci } 2286bf215546Sopenharmony_ci} 2287bf215546Sopenharmony_ci 2288bf215546Sopenharmony_ci/** 2289bf215546Sopenharmony_ci * This function serves the purpose to correctly initialize the register file 2290bf215546Sopenharmony_ci * at the beginning of a block (before any existing phis). 2291bf215546Sopenharmony_ci * In order to do so, all live-in variables are entered into the RegisterFile. 2292bf215546Sopenharmony_ci * Reg-to-reg moves (renames) from previous blocks are taken into account and 2293bf215546Sopenharmony_ci * the SSA is repaired by inserting corresponding phi-nodes. 2294bf215546Sopenharmony_ci */ 2295bf215546Sopenharmony_ciRegisterFile 2296bf215546Sopenharmony_ciinit_reg_file(ra_ctx& ctx, const std::vector<IDSet>& live_out_per_block, Block& block) 2297bf215546Sopenharmony_ci{ 2298bf215546Sopenharmony_ci if (block.kind & block_kind_loop_exit) { 2299bf215546Sopenharmony_ci uint32_t header = ctx.loop_header.back(); 2300bf215546Sopenharmony_ci ctx.loop_header.pop_back(); 2301bf215546Sopenharmony_ci handle_loop_phis(ctx, live_out_per_block[header], header, block.index); 2302bf215546Sopenharmony_ci } 2303bf215546Sopenharmony_ci 2304bf215546Sopenharmony_ci RegisterFile register_file; 2305bf215546Sopenharmony_ci const IDSet& live_in = live_out_per_block[block.index]; 2306bf215546Sopenharmony_ci assert(block.index != 0 || live_in.empty()); 2307bf215546Sopenharmony_ci 2308bf215546Sopenharmony_ci if (block.kind & block_kind_loop_header) { 2309bf215546Sopenharmony_ci ctx.loop_header.emplace_back(block.index); 2310bf215546Sopenharmony_ci /* already rename phis incoming value */ 2311bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : block.instructions) { 2312bf215546Sopenharmony_ci if (!is_phi(instr)) 2313bf215546Sopenharmony_ci break; 2314bf215546Sopenharmony_ci Operand& operand = instr->operands[0]; 2315bf215546Sopenharmony_ci if (operand.isTemp()) { 2316bf215546Sopenharmony_ci operand.setTemp(read_variable(ctx, operand.getTemp(), block.index - 1)); 2317bf215546Sopenharmony_ci operand.setFixed(ctx.assignments[operand.tempId()].reg); 2318bf215546Sopenharmony_ci } 2319bf215546Sopenharmony_ci } 2320bf215546Sopenharmony_ci for (unsigned t : live_in) { 2321bf215546Sopenharmony_ci Temp val = Temp(t, ctx.program->temp_rc[t]); 2322bf215546Sopenharmony_ci Temp renamed = read_variable(ctx, val, block.index - 1); 2323bf215546Sopenharmony_ci if (renamed != val) 2324bf215546Sopenharmony_ci ctx.renames[block.index][val.id()] = renamed; 2325bf215546Sopenharmony_ci assignment& var = ctx.assignments[renamed.id()]; 2326bf215546Sopenharmony_ci assert(var.assigned); 2327bf215546Sopenharmony_ci register_file.fill(Definition(renamed.id(), var.reg, var.rc)); 2328bf215546Sopenharmony_ci } 2329bf215546Sopenharmony_ci } else { 2330bf215546Sopenharmony_ci /* rename phi operands */ 2331bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : block.instructions) { 2332bf215546Sopenharmony_ci if (!is_phi(instr)) 2333bf215546Sopenharmony_ci break; 2334bf215546Sopenharmony_ci const std::vector<unsigned>& preds = 2335bf215546Sopenharmony_ci instr->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds; 2336bf215546Sopenharmony_ci 2337bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 2338bf215546Sopenharmony_ci Operand& operand = instr->operands[i]; 2339bf215546Sopenharmony_ci if (!operand.isTemp()) 2340bf215546Sopenharmony_ci continue; 2341bf215546Sopenharmony_ci operand.setTemp(read_variable(ctx, operand.getTemp(), preds[i])); 2342bf215546Sopenharmony_ci operand.setFixed(ctx.assignments[operand.tempId()].reg); 2343bf215546Sopenharmony_ci } 2344bf215546Sopenharmony_ci } 2345bf215546Sopenharmony_ci for (unsigned t : live_in) { 2346bf215546Sopenharmony_ci Temp val = Temp(t, ctx.program->temp_rc[t]); 2347bf215546Sopenharmony_ci Temp renamed = handle_live_in(ctx, val, &block); 2348bf215546Sopenharmony_ci assignment& var = ctx.assignments[renamed.id()]; 2349bf215546Sopenharmony_ci /* due to live-range splits, the live-in might be a phi, now */ 2350bf215546Sopenharmony_ci if (var.assigned) { 2351bf215546Sopenharmony_ci register_file.fill(Definition(renamed.id(), var.reg, var.rc)); 2352bf215546Sopenharmony_ci } 2353bf215546Sopenharmony_ci if (renamed != val) { 2354bf215546Sopenharmony_ci ctx.renames[block.index].emplace(t, renamed); 2355bf215546Sopenharmony_ci ctx.orig_names[renamed.id()] = val; 2356bf215546Sopenharmony_ci } 2357bf215546Sopenharmony_ci } 2358bf215546Sopenharmony_ci } 2359bf215546Sopenharmony_ci 2360bf215546Sopenharmony_ci return register_file; 2361bf215546Sopenharmony_ci} 2362bf215546Sopenharmony_ci 2363bf215546Sopenharmony_civoid 2364bf215546Sopenharmony_ciget_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block) 2365bf215546Sopenharmony_ci{ 2366bf215546Sopenharmony_ci std::vector<std::vector<Temp>> phi_ressources; 2367bf215546Sopenharmony_ci std::unordered_map<unsigned, unsigned> temp_to_phi_ressources; 2368bf215546Sopenharmony_ci 2369bf215546Sopenharmony_ci for (auto block_rit = ctx.program->blocks.rbegin(); block_rit != ctx.program->blocks.rend(); 2370bf215546Sopenharmony_ci block_rit++) { 2371bf215546Sopenharmony_ci Block& block = *block_rit; 2372bf215546Sopenharmony_ci 2373bf215546Sopenharmony_ci /* first, compute the death points of all live vars within the block */ 2374bf215546Sopenharmony_ci IDSet& live = live_out_per_block[block.index]; 2375bf215546Sopenharmony_ci 2376bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>::reverse_iterator rit; 2377bf215546Sopenharmony_ci for (rit = block.instructions.rbegin(); rit != block.instructions.rend(); ++rit) { 2378bf215546Sopenharmony_ci aco_ptr<Instruction>& instr = *rit; 2379bf215546Sopenharmony_ci if (is_phi(instr)) 2380bf215546Sopenharmony_ci break; 2381bf215546Sopenharmony_ci 2382bf215546Sopenharmony_ci /* add vector affinities */ 2383bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_create_vector) { 2384bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 2385bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKill() && 2386bf215546Sopenharmony_ci op.getTemp().type() == instr->definitions[0].getTemp().type()) 2387bf215546Sopenharmony_ci ctx.vectors[op.tempId()] = instr.get(); 2388bf215546Sopenharmony_ci } 2389bf215546Sopenharmony_ci } else if (instr->format == Format::MIMG && instr->operands.size() > 4) { 2390bf215546Sopenharmony_ci for (unsigned i = 3; i < instr->operands.size(); i++) 2391bf215546Sopenharmony_ci ctx.vectors[instr->operands[i].tempId()] = instr.get(); 2392bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_split_vector && 2393bf215546Sopenharmony_ci instr->operands[0].isFirstKillBeforeDef()) { 2394bf215546Sopenharmony_ci ctx.split_vectors[instr->operands[0].tempId()] = instr.get(); 2395bf215546Sopenharmony_ci } else if (instr->isVOPC() && !instr->isVOP3()) { 2396bf215546Sopenharmony_ci if (!instr->isSDWA() || ctx.program->gfx_level == GFX8) 2397bf215546Sopenharmony_ci ctx.assignments[instr->definitions[0].tempId()].vcc = true; 2398bf215546Sopenharmony_ci } else if (instr->isVOP2() && !instr->isVOP3()) { 2399bf215546Sopenharmony_ci if (instr->operands.size() == 3 && instr->operands[2].isTemp() && 2400bf215546Sopenharmony_ci instr->operands[2].regClass().type() == RegType::sgpr) 2401bf215546Sopenharmony_ci ctx.assignments[instr->operands[2].tempId()].vcc = true; 2402bf215546Sopenharmony_ci if (instr->definitions.size() == 2) 2403bf215546Sopenharmony_ci ctx.assignments[instr->definitions[1].tempId()].vcc = true; 2404bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::s_and_b32 || 2405bf215546Sopenharmony_ci instr->opcode == aco_opcode::s_and_b64) { 2406bf215546Sopenharmony_ci /* If SCC is used by a branch, we might be able to use 2407bf215546Sopenharmony_ci * s_cbranch_vccz/s_cbranch_vccnz if the operand is VCC. 2408bf215546Sopenharmony_ci */ 2409bf215546Sopenharmony_ci if (!instr->definitions[1].isKill() && instr->operands[0].isTemp() && 2410bf215546Sopenharmony_ci instr->operands[1].isFixed() && instr->operands[1].physReg() == exec) 2411bf215546Sopenharmony_ci ctx.assignments[instr->operands[0].tempId()].vcc = true; 2412bf215546Sopenharmony_ci } 2413bf215546Sopenharmony_ci 2414bf215546Sopenharmony_ci /* add operands to live variables */ 2415bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 2416bf215546Sopenharmony_ci if (op.isTemp()) 2417bf215546Sopenharmony_ci live.insert(op.tempId()); 2418bf215546Sopenharmony_ci } 2419bf215546Sopenharmony_ci 2420bf215546Sopenharmony_ci /* erase definitions from live */ 2421bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->definitions.size(); i++) { 2422bf215546Sopenharmony_ci const Definition& def = instr->definitions[i]; 2423bf215546Sopenharmony_ci if (!def.isTemp()) 2424bf215546Sopenharmony_ci continue; 2425bf215546Sopenharmony_ci live.erase(def.tempId()); 2426bf215546Sopenharmony_ci /* mark last-seen phi operand */ 2427bf215546Sopenharmony_ci std::unordered_map<unsigned, unsigned>::iterator it = 2428bf215546Sopenharmony_ci temp_to_phi_ressources.find(def.tempId()); 2429bf215546Sopenharmony_ci if (it != temp_to_phi_ressources.end() && 2430bf215546Sopenharmony_ci def.regClass() == phi_ressources[it->second][0].regClass()) { 2431bf215546Sopenharmony_ci phi_ressources[it->second][0] = def.getTemp(); 2432bf215546Sopenharmony_ci /* try to coalesce phi affinities with parallelcopies */ 2433bf215546Sopenharmony_ci Operand op = Operand(); 2434bf215546Sopenharmony_ci switch (instr->opcode) { 2435bf215546Sopenharmony_ci case aco_opcode::p_parallelcopy: op = instr->operands[i]; break; 2436bf215546Sopenharmony_ci 2437bf215546Sopenharmony_ci case aco_opcode::v_interp_p2_f32: 2438bf215546Sopenharmony_ci case aco_opcode::v_writelane_b32: 2439bf215546Sopenharmony_ci case aco_opcode::v_writelane_b32_e64: op = instr->operands[2]; break; 2440bf215546Sopenharmony_ci 2441bf215546Sopenharmony_ci case aco_opcode::v_fma_f32: 2442bf215546Sopenharmony_ci case aco_opcode::v_fma_f16: 2443bf215546Sopenharmony_ci case aco_opcode::v_pk_fma_f16: 2444bf215546Sopenharmony_ci if (ctx.program->gfx_level < GFX10) 2445bf215546Sopenharmony_ci continue; 2446bf215546Sopenharmony_ci FALLTHROUGH; 2447bf215546Sopenharmony_ci case aco_opcode::v_mad_f32: 2448bf215546Sopenharmony_ci case aco_opcode::v_mad_f16: 2449bf215546Sopenharmony_ci if (instr->usesModifiers()) 2450bf215546Sopenharmony_ci continue; 2451bf215546Sopenharmony_ci op = instr->operands[2]; 2452bf215546Sopenharmony_ci break; 2453bf215546Sopenharmony_ci 2454bf215546Sopenharmony_ci case aco_opcode::v_mad_legacy_f32: 2455bf215546Sopenharmony_ci case aco_opcode::v_fma_legacy_f32: 2456bf215546Sopenharmony_ci if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32) 2457bf215546Sopenharmony_ci continue; 2458bf215546Sopenharmony_ci op = instr->operands[2]; 2459bf215546Sopenharmony_ci break; 2460bf215546Sopenharmony_ci 2461bf215546Sopenharmony_ci default: continue; 2462bf215546Sopenharmony_ci } 2463bf215546Sopenharmony_ci 2464bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKillBeforeDef() && def.regClass() == op.regClass()) { 2465bf215546Sopenharmony_ci phi_ressources[it->second].emplace_back(op.getTemp()); 2466bf215546Sopenharmony_ci temp_to_phi_ressources[op.tempId()] = it->second; 2467bf215546Sopenharmony_ci } 2468bf215546Sopenharmony_ci } 2469bf215546Sopenharmony_ci } 2470bf215546Sopenharmony_ci } 2471bf215546Sopenharmony_ci 2472bf215546Sopenharmony_ci /* collect phi affinities */ 2473bf215546Sopenharmony_ci for (; rit != block.instructions.rend(); ++rit) { 2474bf215546Sopenharmony_ci aco_ptr<Instruction>& instr = *rit; 2475bf215546Sopenharmony_ci assert(is_phi(instr)); 2476bf215546Sopenharmony_ci 2477bf215546Sopenharmony_ci live.erase(instr->definitions[0].tempId()); 2478bf215546Sopenharmony_ci if (instr->definitions[0].isKill() || instr->definitions[0].isFixed()) 2479bf215546Sopenharmony_ci continue; 2480bf215546Sopenharmony_ci 2481bf215546Sopenharmony_ci assert(instr->definitions[0].isTemp()); 2482bf215546Sopenharmony_ci std::unordered_map<unsigned, unsigned>::iterator it = 2483bf215546Sopenharmony_ci temp_to_phi_ressources.find(instr->definitions[0].tempId()); 2484bf215546Sopenharmony_ci unsigned index = phi_ressources.size(); 2485bf215546Sopenharmony_ci std::vector<Temp>* affinity_related; 2486bf215546Sopenharmony_ci if (it != temp_to_phi_ressources.end()) { 2487bf215546Sopenharmony_ci index = it->second; 2488bf215546Sopenharmony_ci phi_ressources[index][0] = instr->definitions[0].getTemp(); 2489bf215546Sopenharmony_ci affinity_related = &phi_ressources[index]; 2490bf215546Sopenharmony_ci } else { 2491bf215546Sopenharmony_ci phi_ressources.emplace_back(std::vector<Temp>{instr->definitions[0].getTemp()}); 2492bf215546Sopenharmony_ci affinity_related = &phi_ressources.back(); 2493bf215546Sopenharmony_ci } 2494bf215546Sopenharmony_ci 2495bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 2496bf215546Sopenharmony_ci if (op.isTemp() && op.isKill() && op.regClass() == instr->definitions[0].regClass()) { 2497bf215546Sopenharmony_ci affinity_related->emplace_back(op.getTemp()); 2498bf215546Sopenharmony_ci if (block.kind & block_kind_loop_header) 2499bf215546Sopenharmony_ci continue; 2500bf215546Sopenharmony_ci temp_to_phi_ressources[op.tempId()] = index; 2501bf215546Sopenharmony_ci } 2502bf215546Sopenharmony_ci } 2503bf215546Sopenharmony_ci } 2504bf215546Sopenharmony_ci 2505bf215546Sopenharmony_ci /* visit the loop header phis first in order to create nested affinities */ 2506bf215546Sopenharmony_ci if (block.kind & block_kind_loop_exit) { 2507bf215546Sopenharmony_ci /* find loop header */ 2508bf215546Sopenharmony_ci auto header_rit = block_rit; 2509bf215546Sopenharmony_ci while ((header_rit + 1)->loop_nest_depth > block.loop_nest_depth) 2510bf215546Sopenharmony_ci header_rit++; 2511bf215546Sopenharmony_ci 2512bf215546Sopenharmony_ci for (aco_ptr<Instruction>& phi : header_rit->instructions) { 2513bf215546Sopenharmony_ci if (!is_phi(phi)) 2514bf215546Sopenharmony_ci break; 2515bf215546Sopenharmony_ci if (phi->definitions[0].isKill() || phi->definitions[0].isFixed()) 2516bf215546Sopenharmony_ci continue; 2517bf215546Sopenharmony_ci 2518bf215546Sopenharmony_ci /* create an (empty) merge-set for the phi-related variables */ 2519bf215546Sopenharmony_ci auto it = temp_to_phi_ressources.find(phi->definitions[0].tempId()); 2520bf215546Sopenharmony_ci unsigned index = phi_ressources.size(); 2521bf215546Sopenharmony_ci if (it == temp_to_phi_ressources.end()) { 2522bf215546Sopenharmony_ci temp_to_phi_ressources[phi->definitions[0].tempId()] = index; 2523bf215546Sopenharmony_ci phi_ressources.emplace_back(std::vector<Temp>{phi->definitions[0].getTemp()}); 2524bf215546Sopenharmony_ci } else { 2525bf215546Sopenharmony_ci index = it->second; 2526bf215546Sopenharmony_ci } 2527bf215546Sopenharmony_ci for (unsigned i = 1; i < phi->operands.size(); i++) { 2528bf215546Sopenharmony_ci const Operand& op = phi->operands[i]; 2529bf215546Sopenharmony_ci if (op.isTemp() && op.isKill() && op.regClass() == phi->definitions[0].regClass()) { 2530bf215546Sopenharmony_ci temp_to_phi_ressources[op.tempId()] = index; 2531bf215546Sopenharmony_ci } 2532bf215546Sopenharmony_ci } 2533bf215546Sopenharmony_ci } 2534bf215546Sopenharmony_ci } 2535bf215546Sopenharmony_ci } 2536bf215546Sopenharmony_ci /* create affinities */ 2537bf215546Sopenharmony_ci for (std::vector<Temp>& vec : phi_ressources) { 2538bf215546Sopenharmony_ci for (unsigned i = 1; i < vec.size(); i++) 2539bf215546Sopenharmony_ci if (vec[i].id() != vec[0].id()) 2540bf215546Sopenharmony_ci ctx.assignments[vec[i].id()].affinity = vec[0].id(); 2541bf215546Sopenharmony_ci } 2542bf215546Sopenharmony_ci} 2543bf215546Sopenharmony_ci 2544bf215546Sopenharmony_civoid 2545bf215546Sopenharmony_cioptimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_file, 2546bf215546Sopenharmony_ci aco_ptr<Instruction>& instr) 2547bf215546Sopenharmony_ci{ 2548bf215546Sopenharmony_ci /* try to optimize v_mad_f32 -> v_mac_f32 */ 2549bf215546Sopenharmony_ci if ((instr->opcode != aco_opcode::v_mad_f32 && 2550bf215546Sopenharmony_ci (instr->opcode != aco_opcode::v_fma_f32 || program->gfx_level < GFX10) && 2551bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_mad_f16 && instr->opcode != aco_opcode::v_mad_legacy_f16 && 2552bf215546Sopenharmony_ci (instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) && 2553bf215546Sopenharmony_ci (instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) && 2554bf215546Sopenharmony_ci (instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) && 2555bf215546Sopenharmony_ci (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) && 2556bf215546Sopenharmony_ci (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) || 2557bf215546Sopenharmony_ci !instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() || 2558bf215546Sopenharmony_ci instr->operands[2].getTemp().type() != RegType::vgpr || 2559bf215546Sopenharmony_ci ((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) && 2560bf215546Sopenharmony_ci (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) || 2561bf215546Sopenharmony_ci instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 || 2562bf215546Sopenharmony_ci instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0) 2563bf215546Sopenharmony_ci return; 2564bf215546Sopenharmony_ci 2565bf215546Sopenharmony_ci if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr) 2566bf215546Sopenharmony_ci std::swap(instr->operands[0], instr->operands[1]); 2567bf215546Sopenharmony_ci 2568bf215546Sopenharmony_ci unsigned def_id = instr->definitions[0].tempId(); 2569bf215546Sopenharmony_ci if (ctx.assignments[def_id].affinity) { 2570bf215546Sopenharmony_ci assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity]; 2571bf215546Sopenharmony_ci if (affinity.assigned && affinity.reg != instr->operands[2].physReg() && 2572bf215546Sopenharmony_ci !register_file.test(affinity.reg, instr->operands[2].bytes())) 2573bf215546Sopenharmony_ci return; 2574bf215546Sopenharmony_ci } 2575bf215546Sopenharmony_ci 2576bf215546Sopenharmony_ci static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction), 2577bf215546Sopenharmony_ci "Invalid direct instruction cast."); 2578bf215546Sopenharmony_ci static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction), 2579bf215546Sopenharmony_ci "Invalid direct instruction cast."); 2580bf215546Sopenharmony_ci instr->format = Format::VOP2; 2581bf215546Sopenharmony_ci switch (instr->opcode) { 2582bf215546Sopenharmony_ci case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break; 2583bf215546Sopenharmony_ci case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break; 2584bf215546Sopenharmony_ci case aco_opcode::v_mad_f16: 2585bf215546Sopenharmony_ci case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break; 2586bf215546Sopenharmony_ci case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break; 2587bf215546Sopenharmony_ci case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break; 2588bf215546Sopenharmony_ci case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break; 2589bf215546Sopenharmony_ci case aco_opcode::v_mad_legacy_f32: instr->opcode = aco_opcode::v_mac_legacy_f32; break; 2590bf215546Sopenharmony_ci case aco_opcode::v_fma_legacy_f32: instr->opcode = aco_opcode::v_fmac_legacy_f32; break; 2591bf215546Sopenharmony_ci default: break; 2592bf215546Sopenharmony_ci } 2593bf215546Sopenharmony_ci} 2594bf215546Sopenharmony_ci 2595bf215546Sopenharmony_civoid 2596bf215546Sopenharmony_cioptimize_encoding_sopk(Program* program, ra_ctx& ctx, RegisterFile& register_file, 2597bf215546Sopenharmony_ci aco_ptr<Instruction>& instr) 2598bf215546Sopenharmony_ci{ 2599bf215546Sopenharmony_ci /* try to optimize sop2 with literal source to sopk */ 2600bf215546Sopenharmony_ci if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_mul_i32 && 2601bf215546Sopenharmony_ci instr->opcode != aco_opcode::s_cselect_b32) 2602bf215546Sopenharmony_ci return; 2603bf215546Sopenharmony_ci 2604bf215546Sopenharmony_ci uint32_t literal_idx = 0; 2605bf215546Sopenharmony_ci 2606bf215546Sopenharmony_ci if (instr->opcode != aco_opcode::s_cselect_b32 && instr->operands[1].isLiteral()) 2607bf215546Sopenharmony_ci literal_idx = 1; 2608bf215546Sopenharmony_ci 2609bf215546Sopenharmony_ci if (!instr->operands[!literal_idx].isTemp() || 2610bf215546Sopenharmony_ci !instr->operands[!literal_idx].isKillBeforeDef() || 2611bf215546Sopenharmony_ci instr->operands[!literal_idx].getTemp().type() != RegType::sgpr || 2612bf215546Sopenharmony_ci instr->operands[!literal_idx].physReg() >= 128) 2613bf215546Sopenharmony_ci return; 2614bf215546Sopenharmony_ci 2615bf215546Sopenharmony_ci if (!instr->operands[literal_idx].isLiteral()) 2616bf215546Sopenharmony_ci return; 2617bf215546Sopenharmony_ci 2618bf215546Sopenharmony_ci const uint32_t i16_mask = 0xffff8000u; 2619bf215546Sopenharmony_ci uint32_t value = instr->operands[literal_idx].constantValue(); 2620bf215546Sopenharmony_ci if ((value & i16_mask) && (value & i16_mask) != i16_mask) 2621bf215546Sopenharmony_ci return; 2622bf215546Sopenharmony_ci 2623bf215546Sopenharmony_ci unsigned def_id = instr->definitions[0].tempId(); 2624bf215546Sopenharmony_ci if (ctx.assignments[def_id].affinity) { 2625bf215546Sopenharmony_ci assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity]; 2626bf215546Sopenharmony_ci if (affinity.assigned && affinity.reg != instr->operands[!literal_idx].physReg() && 2627bf215546Sopenharmony_ci !register_file.test(affinity.reg, instr->operands[!literal_idx].bytes())) 2628bf215546Sopenharmony_ci return; 2629bf215546Sopenharmony_ci } 2630bf215546Sopenharmony_ci 2631bf215546Sopenharmony_ci static_assert(sizeof(SOPK_instruction) <= sizeof(SOP2_instruction), 2632bf215546Sopenharmony_ci "Invalid direct instruction cast."); 2633bf215546Sopenharmony_ci instr->format = Format::SOPK; 2634bf215546Sopenharmony_ci SOPK_instruction* instr_sopk = &instr->sopk(); 2635bf215546Sopenharmony_ci 2636bf215546Sopenharmony_ci instr_sopk->imm = instr_sopk->operands[literal_idx].constantValue() & 0xffff; 2637bf215546Sopenharmony_ci if (literal_idx == 0) 2638bf215546Sopenharmony_ci std::swap(instr_sopk->operands[0], instr_sopk->operands[1]); 2639bf215546Sopenharmony_ci if (instr_sopk->operands.size() > 2) 2640bf215546Sopenharmony_ci std::swap(instr_sopk->operands[1], instr_sopk->operands[2]); 2641bf215546Sopenharmony_ci instr_sopk->operands.pop_back(); 2642bf215546Sopenharmony_ci 2643bf215546Sopenharmony_ci switch (instr_sopk->opcode) { 2644bf215546Sopenharmony_ci case aco_opcode::s_add_i32: instr_sopk->opcode = aco_opcode::s_addk_i32; break; 2645bf215546Sopenharmony_ci case aco_opcode::s_mul_i32: instr_sopk->opcode = aco_opcode::s_mulk_i32; break; 2646bf215546Sopenharmony_ci case aco_opcode::s_cselect_b32: instr_sopk->opcode = aco_opcode::s_cmovk_i32; break; 2647bf215546Sopenharmony_ci default: unreachable("illegal instruction"); 2648bf215546Sopenharmony_ci } 2649bf215546Sopenharmony_ci} 2650bf215546Sopenharmony_ci 2651bf215546Sopenharmony_civoid 2652bf215546Sopenharmony_cioptimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file, 2653bf215546Sopenharmony_ci aco_ptr<Instruction>& instr) 2654bf215546Sopenharmony_ci{ 2655bf215546Sopenharmony_ci if (instr->isVALU()) 2656bf215546Sopenharmony_ci optimize_encoding_vop2(program, ctx, register_file, instr); 2657bf215546Sopenharmony_ci if (instr->isSALU()) 2658bf215546Sopenharmony_ci optimize_encoding_sopk(program, ctx, register_file, instr); 2659bf215546Sopenharmony_ci} 2660bf215546Sopenharmony_ci 2661bf215546Sopenharmony_ci} /* end namespace */ 2662bf215546Sopenharmony_ci 2663bf215546Sopenharmony_civoid 2664bf215546Sopenharmony_ciregister_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra_test_policy policy) 2665bf215546Sopenharmony_ci{ 2666bf215546Sopenharmony_ci ra_ctx ctx(program, policy); 2667bf215546Sopenharmony_ci get_affinities(ctx, live_out_per_block); 2668bf215546Sopenharmony_ci 2669bf215546Sopenharmony_ci for (Block& block : program->blocks) { 2670bf215546Sopenharmony_ci ctx.block = █ 2671bf215546Sopenharmony_ci 2672bf215546Sopenharmony_ci /* initialize register file */ 2673bf215546Sopenharmony_ci RegisterFile register_file = init_reg_file(ctx, live_out_per_block, block); 2674bf215546Sopenharmony_ci ctx.war_hint.reset(); 2675bf215546Sopenharmony_ci 2676bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>> instructions; 2677bf215546Sopenharmony_ci 2678bf215546Sopenharmony_ci /* this is a slight adjustment from the paper as we already have phi nodes: 2679bf215546Sopenharmony_ci * We consider them incomplete phis and only handle the definition. */ 2680bf215546Sopenharmony_ci get_regs_for_phis(ctx, block, register_file, instructions, live_out_per_block[block.index]); 2681bf215546Sopenharmony_ci 2682bf215546Sopenharmony_ci /* If this is a merge block, the state of the register file at the branch instruction of the 2683bf215546Sopenharmony_ci * predecessors corresponds to the state after phis at the merge block. So, we allocate a 2684bf215546Sopenharmony_ci * register for the predecessor's branch definitions as if there was a phi. 2685bf215546Sopenharmony_ci */ 2686bf215546Sopenharmony_ci if (!block.linear_preds.empty() && 2687bf215546Sopenharmony_ci (block.linear_preds.size() != 1 || 2688bf215546Sopenharmony_ci program->blocks[block.linear_preds[0]].linear_succs.size() == 1)) { 2689bf215546Sopenharmony_ci PhysReg br_reg = get_reg_phi(ctx, live_out_per_block[block.index], register_file, 2690bf215546Sopenharmony_ci instructions, block, ctx.phi_dummy, Temp(0, s2)); 2691bf215546Sopenharmony_ci for (unsigned pred : block.linear_preds) { 2692bf215546Sopenharmony_ci program->blocks[pred].scc_live_out = register_file[scc]; 2693bf215546Sopenharmony_ci aco_ptr<Instruction>& br = program->blocks[pred].instructions.back(); 2694bf215546Sopenharmony_ci 2695bf215546Sopenharmony_ci assert(br->definitions.size() == 1 && br->definitions[0].regClass() == s2 && 2696bf215546Sopenharmony_ci br->definitions[0].isKill()); 2697bf215546Sopenharmony_ci 2698bf215546Sopenharmony_ci br->definitions[0].setFixed(br_reg); 2699bf215546Sopenharmony_ci } 2700bf215546Sopenharmony_ci } 2701bf215546Sopenharmony_ci 2702bf215546Sopenharmony_ci /* Handle all other instructions of the block */ 2703bf215546Sopenharmony_ci auto NonPhi = [](aco_ptr<Instruction>& instr) -> bool { return instr && !is_phi(instr); }; 2704bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>::iterator instr_it = 2705bf215546Sopenharmony_ci std::find_if(block.instructions.begin(), block.instructions.end(), NonPhi); 2706bf215546Sopenharmony_ci for (; instr_it != block.instructions.end(); ++instr_it) { 2707bf215546Sopenharmony_ci aco_ptr<Instruction>& instr = *instr_it; 2708bf215546Sopenharmony_ci 2709bf215546Sopenharmony_ci /* parallelcopies from p_phi are inserted here which means 2710bf215546Sopenharmony_ci * live ranges of killed operands end here as well */ 2711bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_logical_end) { 2712bf215546Sopenharmony_ci /* no need to process this instruction any further */ 2713bf215546Sopenharmony_ci if (block.logical_succs.size() != 1) { 2714bf215546Sopenharmony_ci instructions.emplace_back(std::move(instr)); 2715bf215546Sopenharmony_ci continue; 2716bf215546Sopenharmony_ci } 2717bf215546Sopenharmony_ci 2718bf215546Sopenharmony_ci Block& succ = program->blocks[block.logical_succs[0]]; 2719bf215546Sopenharmony_ci unsigned idx = 0; 2720bf215546Sopenharmony_ci for (; idx < succ.logical_preds.size(); idx++) { 2721bf215546Sopenharmony_ci if (succ.logical_preds[idx] == block.index) 2722bf215546Sopenharmony_ci break; 2723bf215546Sopenharmony_ci } 2724bf215546Sopenharmony_ci for (aco_ptr<Instruction>& phi : succ.instructions) { 2725bf215546Sopenharmony_ci if (phi->opcode == aco_opcode::p_phi) { 2726bf215546Sopenharmony_ci if (phi->operands[idx].isTemp() && 2727bf215546Sopenharmony_ci phi->operands[idx].getTemp().type() == RegType::sgpr && 2728bf215546Sopenharmony_ci phi->operands[idx].isFirstKillBeforeDef()) { 2729bf215546Sopenharmony_ci Definition phi_op( 2730bf215546Sopenharmony_ci read_variable(ctx, phi->operands[idx].getTemp(), block.index)); 2731bf215546Sopenharmony_ci phi_op.setFixed(ctx.assignments[phi_op.tempId()].reg); 2732bf215546Sopenharmony_ci register_file.clear(phi_op); 2733bf215546Sopenharmony_ci } 2734bf215546Sopenharmony_ci } else if (phi->opcode != aco_opcode::p_linear_phi) { 2735bf215546Sopenharmony_ci break; 2736bf215546Sopenharmony_ci } 2737bf215546Sopenharmony_ci } 2738bf215546Sopenharmony_ci instructions.emplace_back(std::move(instr)); 2739bf215546Sopenharmony_ci continue; 2740bf215546Sopenharmony_ci } 2741bf215546Sopenharmony_ci 2742bf215546Sopenharmony_ci /* unconditional branches are handled after phis of the target */ 2743bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_branch) { 2744bf215546Sopenharmony_ci /* last instruction of the block */ 2745bf215546Sopenharmony_ci instructions.emplace_back(std::move(instr)); 2746bf215546Sopenharmony_ci break; 2747bf215546Sopenharmony_ci } 2748bf215546Sopenharmony_ci 2749bf215546Sopenharmony_ci std::vector<std::pair<Operand, Definition>> parallelcopy; 2750bf215546Sopenharmony_ci 2751bf215546Sopenharmony_ci assert(!is_phi(instr)); 2752bf215546Sopenharmony_ci 2753bf215546Sopenharmony_ci bool temp_in_scc = register_file[scc]; 2754bf215546Sopenharmony_ci 2755bf215546Sopenharmony_ci /* handle operands */ 2756bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); ++i) { 2757bf215546Sopenharmony_ci auto& operand = instr->operands[i]; 2758bf215546Sopenharmony_ci if (!operand.isTemp()) 2759bf215546Sopenharmony_ci continue; 2760bf215546Sopenharmony_ci 2761bf215546Sopenharmony_ci /* rename operands */ 2762bf215546Sopenharmony_ci operand.setTemp(read_variable(ctx, operand.getTemp(), block.index)); 2763bf215546Sopenharmony_ci assert(ctx.assignments[operand.tempId()].assigned); 2764bf215546Sopenharmony_ci 2765bf215546Sopenharmony_ci PhysReg reg = ctx.assignments[operand.tempId()].reg; 2766bf215546Sopenharmony_ci if (operand_can_use_reg(program->gfx_level, instr, i, reg, operand.regClass())) 2767bf215546Sopenharmony_ci operand.setFixed(reg); 2768bf215546Sopenharmony_ci else 2769bf215546Sopenharmony_ci get_reg_for_operand(ctx, register_file, parallelcopy, instr, operand, i); 2770bf215546Sopenharmony_ci 2771bf215546Sopenharmony_ci if (instr->isEXP() || (instr->isVMEM() && i == 3 && ctx.program->gfx_level == GFX6) || 2772bf215546Sopenharmony_ci (instr->isDS() && instr->ds().gds)) { 2773bf215546Sopenharmony_ci for (unsigned j = 0; j < operand.size(); j++) 2774bf215546Sopenharmony_ci ctx.war_hint.set(operand.physReg().reg() + j); 2775bf215546Sopenharmony_ci } 2776bf215546Sopenharmony_ci } 2777bf215546Sopenharmony_ci 2778bf215546Sopenharmony_ci /* remove dead vars from register file */ 2779bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 2780bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKillBeforeDef()) 2781bf215546Sopenharmony_ci register_file.clear(op); 2782bf215546Sopenharmony_ci } 2783bf215546Sopenharmony_ci 2784bf215546Sopenharmony_ci optimize_encoding(program, ctx, register_file, instr); 2785bf215546Sopenharmony_ci 2786bf215546Sopenharmony_ci /* Handle definitions which must have the same register as an operand. 2787bf215546Sopenharmony_ci * We expect that the definition has the same size as the operand, otherwise the new 2788bf215546Sopenharmony_ci * location for the operand (if it's not killed) might intersect with the old one. 2789bf215546Sopenharmony_ci * We can't read from the old location because it's corrupted, and we can't write the new 2790bf215546Sopenharmony_ci * location because that's used by a live-through operand. 2791bf215546Sopenharmony_ci */ 2792bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_interp_p2_f32 || 2793bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 || 2794bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 || 2795bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_mac_legacy_f32 || 2796bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_fmac_legacy_f32 || 2797bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_pk_fmac_f16 || 2798bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_writelane_b32 || 2799bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_writelane_b32_e64 || 2800bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_dot4c_i32_i8) { 2801bf215546Sopenharmony_ci assert(instr->definitions[0].bytes() == instr->operands[2].bytes() || 2802bf215546Sopenharmony_ci instr->operands[2].regClass() == v1); 2803bf215546Sopenharmony_ci instr->definitions[0].setFixed(instr->operands[2].physReg()); 2804bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::s_addk_i32 || 2805bf215546Sopenharmony_ci instr->opcode == aco_opcode::s_mulk_i32 || 2806bf215546Sopenharmony_ci instr->opcode == aco_opcode::s_cmovk_i32) { 2807bf215546Sopenharmony_ci assert(instr->definitions[0].bytes() == instr->operands[0].bytes()); 2808bf215546Sopenharmony_ci instr->definitions[0].setFixed(instr->operands[0].physReg()); 2809bf215546Sopenharmony_ci } else if (instr->isMUBUF() && instr->definitions.size() == 1 && 2810bf215546Sopenharmony_ci instr->operands.size() == 4) { 2811bf215546Sopenharmony_ci assert(instr->definitions[0].bytes() == instr->operands[3].bytes()); 2812bf215546Sopenharmony_ci instr->definitions[0].setFixed(instr->operands[3].physReg()); 2813bf215546Sopenharmony_ci } else if (instr->isMIMG() && instr->definitions.size() == 1 && 2814bf215546Sopenharmony_ci !instr->operands[2].isUndefined()) { 2815bf215546Sopenharmony_ci assert(instr->definitions[0].bytes() == instr->operands[2].bytes()); 2816bf215546Sopenharmony_ci instr->definitions[0].setFixed(instr->operands[2].physReg()); 2817bf215546Sopenharmony_ci } 2818bf215546Sopenharmony_ci 2819bf215546Sopenharmony_ci ctx.defs_done.reset(); 2820bf215546Sopenharmony_ci 2821bf215546Sopenharmony_ci /* handle fixed definitions first */ 2822bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->definitions.size(); ++i) { 2823bf215546Sopenharmony_ci auto& definition = instr->definitions[i]; 2824bf215546Sopenharmony_ci if (!definition.isFixed()) 2825bf215546Sopenharmony_ci continue; 2826bf215546Sopenharmony_ci 2827bf215546Sopenharmony_ci adjust_max_used_regs(ctx, definition.regClass(), definition.physReg()); 2828bf215546Sopenharmony_ci /* check if the target register is blocked */ 2829bf215546Sopenharmony_ci if (register_file.test(definition.physReg(), definition.bytes())) { 2830bf215546Sopenharmony_ci const PhysRegInterval def_regs{definition.physReg(), definition.size()}; 2831bf215546Sopenharmony_ci 2832bf215546Sopenharmony_ci /* create parallelcopy pair to move blocking vars */ 2833bf215546Sopenharmony_ci std::vector<unsigned> vars = collect_vars(ctx, register_file, def_regs); 2834bf215546Sopenharmony_ci 2835bf215546Sopenharmony_ci RegisterFile tmp_file(register_file); 2836bf215546Sopenharmony_ci /* re-enable the killed operands, so that we don't move the blocking vars there */ 2837bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 2838bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKillBeforeDef()) 2839bf215546Sopenharmony_ci tmp_file.fill(op); 2840bf215546Sopenharmony_ci } 2841bf215546Sopenharmony_ci 2842bf215546Sopenharmony_ci ASSERTED bool success = false; 2843bf215546Sopenharmony_ci DefInfo info(ctx, instr, definition.regClass(), -1); 2844bf215546Sopenharmony_ci success = get_regs_for_copies(ctx, tmp_file, parallelcopy, vars, info.bounds, instr, 2845bf215546Sopenharmony_ci def_regs); 2846bf215546Sopenharmony_ci assert(success); 2847bf215546Sopenharmony_ci 2848bf215546Sopenharmony_ci update_renames(ctx, register_file, parallelcopy, instr, (UpdateRenames)0); 2849bf215546Sopenharmony_ci } 2850bf215546Sopenharmony_ci ctx.defs_done.set(i); 2851bf215546Sopenharmony_ci 2852bf215546Sopenharmony_ci if (!definition.isTemp()) 2853bf215546Sopenharmony_ci continue; 2854bf215546Sopenharmony_ci 2855bf215546Sopenharmony_ci ctx.assignments[definition.tempId()].set(definition); 2856bf215546Sopenharmony_ci register_file.fill(definition); 2857bf215546Sopenharmony_ci } 2858bf215546Sopenharmony_ci 2859bf215546Sopenharmony_ci /* handle all other definitions */ 2860bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->definitions.size(); ++i) { 2861bf215546Sopenharmony_ci Definition* definition = &instr->definitions[i]; 2862bf215546Sopenharmony_ci 2863bf215546Sopenharmony_ci if (definition->isFixed() || !definition->isTemp()) 2864bf215546Sopenharmony_ci continue; 2865bf215546Sopenharmony_ci 2866bf215546Sopenharmony_ci /* find free reg */ 2867bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_split_vector) { 2868bf215546Sopenharmony_ci PhysReg reg = instr->operands[0].physReg(); 2869bf215546Sopenharmony_ci RegClass rc = definition->regClass(); 2870bf215546Sopenharmony_ci for (unsigned j = 0; j < i; j++) 2871bf215546Sopenharmony_ci reg.reg_b += instr->definitions[j].bytes(); 2872bf215546Sopenharmony_ci if (get_reg_specified(ctx, register_file, rc, instr, reg)) { 2873bf215546Sopenharmony_ci definition->setFixed(reg); 2874bf215546Sopenharmony_ci } else if (i == 0) { 2875bf215546Sopenharmony_ci RegClass vec_rc = RegClass::get(rc.type(), instr->operands[0].bytes()); 2876bf215546Sopenharmony_ci DefInfo info(ctx, ctx.pseudo_dummy, vec_rc, -1); 2877bf215546Sopenharmony_ci std::pair<PhysReg, bool> res = get_reg_simple(ctx, register_file, info); 2878bf215546Sopenharmony_ci reg = res.first; 2879bf215546Sopenharmony_ci if (res.second && get_reg_specified(ctx, register_file, rc, instr, reg)) 2880bf215546Sopenharmony_ci definition->setFixed(reg); 2881bf215546Sopenharmony_ci } else if (instr->definitions[i - 1].isFixed()) { 2882bf215546Sopenharmony_ci reg = instr->definitions[i - 1].physReg(); 2883bf215546Sopenharmony_ci reg.reg_b += instr->definitions[i - 1].bytes(); 2884bf215546Sopenharmony_ci if (get_reg_specified(ctx, register_file, rc, instr, reg)) 2885bf215546Sopenharmony_ci definition->setFixed(reg); 2886bf215546Sopenharmony_ci } 2887bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_wqm || 2888bf215546Sopenharmony_ci instr->opcode == aco_opcode::p_parallelcopy) { 2889bf215546Sopenharmony_ci PhysReg reg = instr->operands[i].physReg(); 2890bf215546Sopenharmony_ci if (instr->operands[i].isTemp() && 2891bf215546Sopenharmony_ci instr->operands[i].getTemp().type() == definition->getTemp().type() && 2892bf215546Sopenharmony_ci !register_file.test(reg, definition->bytes())) 2893bf215546Sopenharmony_ci definition->setFixed(reg); 2894bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_extract_vector) { 2895bf215546Sopenharmony_ci PhysReg reg = instr->operands[0].physReg(); 2896bf215546Sopenharmony_ci reg.reg_b += definition->bytes() * instr->operands[1].constantValue(); 2897bf215546Sopenharmony_ci if (get_reg_specified(ctx, register_file, definition->regClass(), instr, reg)) 2898bf215546Sopenharmony_ci definition->setFixed(reg); 2899bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_create_vector) { 2900bf215546Sopenharmony_ci PhysReg reg = get_reg_create_vector(ctx, register_file, definition->getTemp(), 2901bf215546Sopenharmony_ci parallelcopy, instr); 2902bf215546Sopenharmony_ci update_renames(ctx, register_file, parallelcopy, instr, (UpdateRenames)0); 2903bf215546Sopenharmony_ci definition->setFixed(reg); 2904bf215546Sopenharmony_ci } 2905bf215546Sopenharmony_ci 2906bf215546Sopenharmony_ci if (!definition->isFixed()) { 2907bf215546Sopenharmony_ci Temp tmp = definition->getTemp(); 2908bf215546Sopenharmony_ci if (definition->regClass().is_subdword() && definition->bytes() < 4) { 2909bf215546Sopenharmony_ci PhysReg reg = get_reg(ctx, register_file, tmp, parallelcopy, instr); 2910bf215546Sopenharmony_ci definition->setFixed(reg); 2911bf215546Sopenharmony_ci if (reg.byte() || register_file.test(reg, 4)) { 2912bf215546Sopenharmony_ci add_subdword_definition(program, instr, reg); 2913bf215546Sopenharmony_ci definition = &instr->definitions[i]; /* add_subdword_definition can invalidate 2914bf215546Sopenharmony_ci the reference */ 2915bf215546Sopenharmony_ci } 2916bf215546Sopenharmony_ci } else { 2917bf215546Sopenharmony_ci definition->setFixed(get_reg(ctx, register_file, tmp, parallelcopy, instr)); 2918bf215546Sopenharmony_ci } 2919bf215546Sopenharmony_ci update_renames(ctx, register_file, parallelcopy, instr, 2920bf215546Sopenharmony_ci instr->opcode != aco_opcode::p_create_vector ? rename_not_killed_ops 2921bf215546Sopenharmony_ci : (UpdateRenames)0); 2922bf215546Sopenharmony_ci } 2923bf215546Sopenharmony_ci 2924bf215546Sopenharmony_ci assert( 2925bf215546Sopenharmony_ci definition->isFixed() && 2926bf215546Sopenharmony_ci ((definition->getTemp().type() == RegType::vgpr && definition->physReg() >= 256) || 2927bf215546Sopenharmony_ci (definition->getTemp().type() != RegType::vgpr && definition->physReg() < 256))); 2928bf215546Sopenharmony_ci ctx.defs_done.set(i); 2929bf215546Sopenharmony_ci ctx.assignments[definition->tempId()].set(*definition); 2930bf215546Sopenharmony_ci register_file.fill(*definition); 2931bf215546Sopenharmony_ci } 2932bf215546Sopenharmony_ci 2933bf215546Sopenharmony_ci handle_pseudo(ctx, register_file, instr.get()); 2934bf215546Sopenharmony_ci 2935bf215546Sopenharmony_ci /* kill definitions and late-kill operands and ensure that sub-dword operands can actually 2936bf215546Sopenharmony_ci * be read */ 2937bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) { 2938bf215546Sopenharmony_ci if (def.isTemp() && def.isKill()) 2939bf215546Sopenharmony_ci register_file.clear(def); 2940bf215546Sopenharmony_ci } 2941bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 2942bf215546Sopenharmony_ci const Operand& op = instr->operands[i]; 2943bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKill() && op.isLateKill()) 2944bf215546Sopenharmony_ci register_file.clear(op); 2945bf215546Sopenharmony_ci if (op.isTemp() && op.physReg().byte() != 0) 2946bf215546Sopenharmony_ci add_subdword_operand(ctx, instr, i, op.physReg().byte(), op.regClass()); 2947bf215546Sopenharmony_ci } 2948bf215546Sopenharmony_ci 2949bf215546Sopenharmony_ci /* emit parallelcopy */ 2950bf215546Sopenharmony_ci if (!parallelcopy.empty()) { 2951bf215546Sopenharmony_ci aco_ptr<Pseudo_instruction> pc; 2952bf215546Sopenharmony_ci pc.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, 2953bf215546Sopenharmony_ci Format::PSEUDO, parallelcopy.size(), 2954bf215546Sopenharmony_ci parallelcopy.size())); 2955bf215546Sopenharmony_ci bool linear_vgpr = false; 2956bf215546Sopenharmony_ci bool sgpr_operands_alias_defs = false; 2957bf215546Sopenharmony_ci uint64_t sgpr_operands[4] = {0, 0, 0, 0}; 2958bf215546Sopenharmony_ci for (unsigned i = 0; i < parallelcopy.size(); i++) { 2959bf215546Sopenharmony_ci linear_vgpr |= parallelcopy[i].first.regClass().is_linear_vgpr(); 2960bf215546Sopenharmony_ci 2961bf215546Sopenharmony_ci if (temp_in_scc && parallelcopy[i].first.isTemp() && 2962bf215546Sopenharmony_ci parallelcopy[i].first.getTemp().type() == RegType::sgpr) { 2963bf215546Sopenharmony_ci if (!sgpr_operands_alias_defs) { 2964bf215546Sopenharmony_ci unsigned reg = parallelcopy[i].first.physReg().reg(); 2965bf215546Sopenharmony_ci unsigned size = parallelcopy[i].first.getTemp().size(); 2966bf215546Sopenharmony_ci sgpr_operands[reg / 64u] |= u_bit_consecutive64(reg % 64u, size); 2967bf215546Sopenharmony_ci 2968bf215546Sopenharmony_ci reg = parallelcopy[i].second.physReg().reg(); 2969bf215546Sopenharmony_ci size = parallelcopy[i].second.getTemp().size(); 2970bf215546Sopenharmony_ci if (sgpr_operands[reg / 64u] & u_bit_consecutive64(reg % 64u, size)) 2971bf215546Sopenharmony_ci sgpr_operands_alias_defs = true; 2972bf215546Sopenharmony_ci } 2973bf215546Sopenharmony_ci } 2974bf215546Sopenharmony_ci 2975bf215546Sopenharmony_ci pc->operands[i] = parallelcopy[i].first; 2976bf215546Sopenharmony_ci pc->definitions[i] = parallelcopy[i].second; 2977bf215546Sopenharmony_ci assert(pc->operands[i].size() == pc->definitions[i].size()); 2978bf215546Sopenharmony_ci 2979bf215546Sopenharmony_ci /* it might happen that the operand is already renamed. we have to restore the 2980bf215546Sopenharmony_ci * original name. */ 2981bf215546Sopenharmony_ci std::unordered_map<unsigned, Temp>::iterator it = 2982bf215546Sopenharmony_ci ctx.orig_names.find(pc->operands[i].tempId()); 2983bf215546Sopenharmony_ci Temp orig = it != ctx.orig_names.end() ? it->second : pc->operands[i].getTemp(); 2984bf215546Sopenharmony_ci ctx.orig_names[pc->definitions[i].tempId()] = orig; 2985bf215546Sopenharmony_ci ctx.renames[block.index][orig.id()] = pc->definitions[i].getTemp(); 2986bf215546Sopenharmony_ci } 2987bf215546Sopenharmony_ci 2988bf215546Sopenharmony_ci if (temp_in_scc && (sgpr_operands_alias_defs || linear_vgpr)) { 2989bf215546Sopenharmony_ci /* disable definitions and re-enable operands */ 2990bf215546Sopenharmony_ci RegisterFile tmp_file(register_file); 2991bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) { 2992bf215546Sopenharmony_ci if (def.isTemp() && !def.isKill()) 2993bf215546Sopenharmony_ci tmp_file.clear(def); 2994bf215546Sopenharmony_ci } 2995bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 2996bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKill()) 2997bf215546Sopenharmony_ci tmp_file.block(op.physReg(), op.regClass()); 2998bf215546Sopenharmony_ci } 2999bf215546Sopenharmony_ci 3000bf215546Sopenharmony_ci handle_pseudo(ctx, tmp_file, pc.get()); 3001bf215546Sopenharmony_ci } else { 3002bf215546Sopenharmony_ci pc->tmp_in_scc = false; 3003bf215546Sopenharmony_ci } 3004bf215546Sopenharmony_ci 3005bf215546Sopenharmony_ci instructions.emplace_back(std::move(pc)); 3006bf215546Sopenharmony_ci } 3007bf215546Sopenharmony_ci 3008bf215546Sopenharmony_ci /* some instructions need VOP3 encoding if operand/definition is not assigned to VCC */ 3009bf215546Sopenharmony_ci bool instr_needs_vop3 = 3010bf215546Sopenharmony_ci !instr->isVOP3() && 3011bf215546Sopenharmony_ci ((instr->format == Format::VOPC && !(instr->definitions[0].physReg() == vcc)) || 3012bf215546Sopenharmony_ci (instr->opcode == aco_opcode::v_cndmask_b32 && 3013bf215546Sopenharmony_ci !(instr->operands[2].physReg() == vcc)) || 3014bf215546Sopenharmony_ci ((instr->opcode == aco_opcode::v_add_co_u32 || 3015bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_addc_co_u32 || 3016bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_sub_co_u32 || 3017bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_subb_co_u32 || 3018bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_subrev_co_u32 || 3019bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_subbrev_co_u32) && 3020bf215546Sopenharmony_ci !(instr->definitions[1].physReg() == vcc)) || 3021bf215546Sopenharmony_ci ((instr->opcode == aco_opcode::v_addc_co_u32 || 3022bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_subb_co_u32 || 3023bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_subbrev_co_u32) && 3024bf215546Sopenharmony_ci !(instr->operands[2].physReg() == vcc))); 3025bf215546Sopenharmony_ci if (instr_needs_vop3) { 3026bf215546Sopenharmony_ci 3027bf215546Sopenharmony_ci /* if the first operand is a literal, we have to move it to a reg */ 3028bf215546Sopenharmony_ci if (instr->operands.size() && instr->operands[0].isLiteral() && 3029bf215546Sopenharmony_ci program->gfx_level < GFX10) { 3030bf215546Sopenharmony_ci bool can_sgpr = true; 3031bf215546Sopenharmony_ci /* check, if we have to move to vgpr */ 3032bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 3033bf215546Sopenharmony_ci if (op.isTemp() && op.getTemp().type() == RegType::sgpr) { 3034bf215546Sopenharmony_ci can_sgpr = false; 3035bf215546Sopenharmony_ci break; 3036bf215546Sopenharmony_ci } 3037bf215546Sopenharmony_ci } 3038bf215546Sopenharmony_ci /* disable definitions and re-enable operands */ 3039bf215546Sopenharmony_ci RegisterFile tmp_file(register_file); 3040bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) 3041bf215546Sopenharmony_ci tmp_file.clear(def); 3042bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 3043bf215546Sopenharmony_ci if (op.isTemp() && op.isFirstKill()) 3044bf215546Sopenharmony_ci tmp_file.block(op.physReg(), op.regClass()); 3045bf215546Sopenharmony_ci } 3046bf215546Sopenharmony_ci Temp tmp = program->allocateTmp(can_sgpr ? s1 : v1); 3047bf215546Sopenharmony_ci ctx.assignments.emplace_back(); 3048bf215546Sopenharmony_ci PhysReg reg = get_reg(ctx, tmp_file, tmp, parallelcopy, instr); 3049bf215546Sopenharmony_ci update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops); 3050bf215546Sopenharmony_ci 3051bf215546Sopenharmony_ci aco_ptr<Instruction> mov; 3052bf215546Sopenharmony_ci if (can_sgpr) 3053bf215546Sopenharmony_ci mov.reset(create_instruction<SOP1_instruction>(aco_opcode::s_mov_b32, 3054bf215546Sopenharmony_ci Format::SOP1, 1, 1)); 3055bf215546Sopenharmony_ci else 3056bf215546Sopenharmony_ci mov.reset(create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32, 3057bf215546Sopenharmony_ci Format::VOP1, 1, 1)); 3058bf215546Sopenharmony_ci mov->operands[0] = instr->operands[0]; 3059bf215546Sopenharmony_ci mov->definitions[0] = Definition(tmp); 3060bf215546Sopenharmony_ci mov->definitions[0].setFixed(reg); 3061bf215546Sopenharmony_ci 3062bf215546Sopenharmony_ci instr->operands[0] = Operand(tmp); 3063bf215546Sopenharmony_ci instr->operands[0].setFixed(reg); 3064bf215546Sopenharmony_ci instr->operands[0].setFirstKill(true); 3065bf215546Sopenharmony_ci 3066bf215546Sopenharmony_ci instructions.emplace_back(std::move(mov)); 3067bf215546Sopenharmony_ci } 3068bf215546Sopenharmony_ci 3069bf215546Sopenharmony_ci /* change the instruction to VOP3 to enable an arbitrary register pair as dst */ 3070bf215546Sopenharmony_ci aco_ptr<Instruction> tmp = std::move(instr); 3071bf215546Sopenharmony_ci Format format = asVOP3(tmp->format); 3072bf215546Sopenharmony_ci instr.reset(create_instruction<VOP3_instruction>( 3073bf215546Sopenharmony_ci tmp->opcode, format, tmp->operands.size(), tmp->definitions.size())); 3074bf215546Sopenharmony_ci std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin()); 3075bf215546Sopenharmony_ci std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin()); 3076bf215546Sopenharmony_ci } 3077bf215546Sopenharmony_ci 3078bf215546Sopenharmony_ci instructions.emplace_back(std::move(*instr_it)); 3079bf215546Sopenharmony_ci 3080bf215546Sopenharmony_ci } /* end for Instr */ 3081bf215546Sopenharmony_ci 3082bf215546Sopenharmony_ci block.instructions = std::move(instructions); 3083bf215546Sopenharmony_ci } /* end for BB */ 3084bf215546Sopenharmony_ci 3085bf215546Sopenharmony_ci /* num_gpr = rnd_up(max_used_gpr + 1) */ 3086bf215546Sopenharmony_ci program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1); 3087bf215546Sopenharmony_ci program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1); 3088bf215546Sopenharmony_ci 3089bf215546Sopenharmony_ci program->progress = CompilationProgress::after_ra; 3090bf215546Sopenharmony_ci} 3091bf215546Sopenharmony_ci 3092bf215546Sopenharmony_ci} // namespace aco 3093