1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "aco_ir.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/memstream.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include <array> 30bf215546Sopenharmony_ci#include <map> 31bf215546Sopenharmony_ci#include <set> 32bf215546Sopenharmony_ci#include <vector> 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_cinamespace aco { 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_cistatic void 37bf215546Sopenharmony_ciaco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, 38bf215546Sopenharmony_ci const char* file, unsigned line, const char* fmt, va_list args) 39bf215546Sopenharmony_ci{ 40bf215546Sopenharmony_ci char* msg; 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci if (program->debug.shorten_messages) { 43bf215546Sopenharmony_ci msg = ralloc_vasprintf(NULL, fmt, args); 44bf215546Sopenharmony_ci } else { 45bf215546Sopenharmony_ci msg = ralloc_strdup(NULL, prefix); 46bf215546Sopenharmony_ci ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line); 47bf215546Sopenharmony_ci ralloc_asprintf_append(&msg, " "); 48bf215546Sopenharmony_ci ralloc_vasprintf_append(&msg, fmt, args); 49bf215546Sopenharmony_ci } 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci if (program->debug.func) 52bf215546Sopenharmony_ci program->debug.func(program->debug.private_data, level, msg); 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci fprintf(program->debug.output, "%s\n", msg); 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci ralloc_free(msg); 57bf215546Sopenharmony_ci} 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_civoid 60bf215546Sopenharmony_ci_aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...) 61bf215546Sopenharmony_ci{ 62bf215546Sopenharmony_ci va_list args; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci va_start(args, fmt); 65bf215546Sopenharmony_ci aco_log(program, ACO_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args); 66bf215546Sopenharmony_ci va_end(args); 67bf215546Sopenharmony_ci} 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_civoid 70bf215546Sopenharmony_ci_aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...) 71bf215546Sopenharmony_ci{ 72bf215546Sopenharmony_ci va_list args; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci va_start(args, fmt); 75bf215546Sopenharmony_ci aco_log(program, ACO_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args); 76bf215546Sopenharmony_ci va_end(args); 77bf215546Sopenharmony_ci} 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_cibool 80bf215546Sopenharmony_civalidate_ir(Program* program) 81bf215546Sopenharmony_ci{ 82bf215546Sopenharmony_ci bool is_valid = true; 83bf215546Sopenharmony_ci auto check = [&program, &is_valid](bool success, const char* msg, 84bf215546Sopenharmony_ci aco::Instruction* instr) -> void 85bf215546Sopenharmony_ci { 86bf215546Sopenharmony_ci if (!success) { 87bf215546Sopenharmony_ci char* out; 88bf215546Sopenharmony_ci size_t outsize; 89bf215546Sopenharmony_ci struct u_memstream mem; 90bf215546Sopenharmony_ci u_memstream_open(&mem, &out, &outsize); 91bf215546Sopenharmony_ci FILE* const memf = u_memstream_get(&mem); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci fprintf(memf, "%s: ", msg); 94bf215546Sopenharmony_ci aco_print_instr(instr, memf); 95bf215546Sopenharmony_ci u_memstream_close(&mem); 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci aco_err(program, "%s", out); 98bf215546Sopenharmony_ci free(out); 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci is_valid = false; 101bf215546Sopenharmony_ci } 102bf215546Sopenharmony_ci }; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci auto check_block = [&program, &is_valid](bool success, const char* msg, 105bf215546Sopenharmony_ci aco::Block* block) -> void 106bf215546Sopenharmony_ci { 107bf215546Sopenharmony_ci if (!success) { 108bf215546Sopenharmony_ci aco_err(program, "%s: BB%u", msg, block->index); 109bf215546Sopenharmony_ci is_valid = false; 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci }; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci for (Block& block : program->blocks) { 114bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : block.instructions) { 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci /* check base format */ 117bf215546Sopenharmony_ci Format base_format = instr->format; 118bf215546Sopenharmony_ci base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA); 119bf215546Sopenharmony_ci base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16); 120bf215546Sopenharmony_ci base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8); 121bf215546Sopenharmony_ci if ((uint32_t)base_format & (uint32_t)Format::VOP1) 122bf215546Sopenharmony_ci base_format = Format::VOP1; 123bf215546Sopenharmony_ci else if ((uint32_t)base_format & (uint32_t)Format::VOP2) 124bf215546Sopenharmony_ci base_format = Format::VOP2; 125bf215546Sopenharmony_ci else if ((uint32_t)base_format & (uint32_t)Format::VOPC) 126bf215546Sopenharmony_ci base_format = Format::VOPC; 127bf215546Sopenharmony_ci else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) { 128bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_interp_p1ll_f16 || 129bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_interp_p1lv_f16 || 130bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || 131bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_interp_p2_f16) { 132bf215546Sopenharmony_ci /* v_interp_*_fp16 are considered VINTRP by the compiler but 133bf215546Sopenharmony_ci * they are emitted as VOP3. 134bf215546Sopenharmony_ci */ 135bf215546Sopenharmony_ci base_format = Format::VOP3; 136bf215546Sopenharmony_ci } else { 137bf215546Sopenharmony_ci base_format = Format::VINTRP; 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci } 140bf215546Sopenharmony_ci check(base_format == instr_info.format[(int)instr->opcode], 141bf215546Sopenharmony_ci "Wrong base format for instruction", instr.get()); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci /* check VOP3 modifiers */ 144bf215546Sopenharmony_ci if (instr->isVOP3() && instr->format != Format::VOP3) { 145bf215546Sopenharmony_ci check(base_format == Format::VOP2 || base_format == Format::VOP1 || 146bf215546Sopenharmony_ci base_format == Format::VOPC || base_format == Format::VINTRP, 147bf215546Sopenharmony_ci "Format cannot have VOP3/VOP3B applied", instr.get()); 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci /* check SDWA */ 151bf215546Sopenharmony_ci if (instr->isSDWA()) { 152bf215546Sopenharmony_ci check(base_format == Format::VOP2 || base_format == Format::VOP1 || 153bf215546Sopenharmony_ci base_format == Format::VOPC, 154bf215546Sopenharmony_ci "Format cannot have SDWA applied", instr.get()); 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get()); 157bf215546Sopenharmony_ci check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get()); 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci SDWA_instruction& sdwa = instr->sdwa(); 160bf215546Sopenharmony_ci check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+", 161bf215546Sopenharmony_ci instr.get()); 162bf215546Sopenharmony_ci if (base_format == Format::VOPC) { 163bf215546Sopenharmony_ci check(sdwa.clamp == false || program->gfx_level == GFX8, 164bf215546Sopenharmony_ci "SDWA VOPC clamp only supported on GFX8", instr.get()); 165bf215546Sopenharmony_ci check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) || 166bf215546Sopenharmony_ci program->gfx_level >= GFX9, 167bf215546Sopenharmony_ci "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get()); 168bf215546Sopenharmony_ci } else { 169bf215546Sopenharmony_ci const Definition& def = instr->definitions[0]; 170bf215546Sopenharmony_ci check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes", 171bf215546Sopenharmony_ci instr.get()); 172bf215546Sopenharmony_ci check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(), 173bf215546Sopenharmony_ci "SDWA definition selection size must be at most definition size", instr.get()); 174bf215546Sopenharmony_ci check( 175bf215546Sopenharmony_ci sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4, 176bf215546Sopenharmony_ci "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get()); 177bf215546Sopenharmony_ci check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset", 178bf215546Sopenharmony_ci instr.get()); 179bf215546Sopenharmony_ci check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(), 180bf215546Sopenharmony_ci "SDWA dst_sel size must be definition size for subdword definitions", 181bf215546Sopenharmony_ci instr.get()); 182bf215546Sopenharmony_ci check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0, 183bf215546Sopenharmony_ci "SDWA dst_sel offset must be 0 for subdword definitions", instr.get()); 184bf215546Sopenharmony_ci } 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) { 187bf215546Sopenharmony_ci const Operand& op = instr->operands[i]; 188bf215546Sopenharmony_ci check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get()); 189bf215546Sopenharmony_ci check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(), 190bf215546Sopenharmony_ci "SDWA operand selection size must be at most operand size", instr.get()); 191bf215546Sopenharmony_ci check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4, 192bf215546Sopenharmony_ci "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get()); 193bf215546Sopenharmony_ci check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset", 194bf215546Sopenharmony_ci instr.get()); 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci if (instr->operands.size() >= 3) { 197bf215546Sopenharmony_ci check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc, 198bf215546Sopenharmony_ci "3rd operand must be fixed to vcc with SDWA", instr.get()); 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci if (instr->definitions.size() >= 2) { 201bf215546Sopenharmony_ci check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc, 202bf215546Sopenharmony_ci "2nd definition must be fixed to vcc with SDWA", instr.get()); 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci const bool sdwa_opcodes = 206bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 && 207bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_fmamk_f32 && 208bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_fmaak_f32 && 209bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_fmamk_f16 && 210bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_fmaak_f16 && 211bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_madmk_f32 && 212bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_madak_f32 && 213bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_madmk_f16 && 214bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_madak_f16 && 215bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_readfirstlane_b32 && 216bf215546Sopenharmony_ci instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32; 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci const bool feature_mac = 219bf215546Sopenharmony_ci program->gfx_level == GFX8 && 220bf215546Sopenharmony_ci (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16); 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get()); 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci /* check opsel */ 226bf215546Sopenharmony_ci if (instr->isVOP3()) { 227bf215546Sopenharmony_ci VOP3_instruction& vop3 = instr->vop3(); 228bf215546Sopenharmony_ci check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+", 229bf215546Sopenharmony_ci instr.get()); 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; i++) { 232bf215546Sopenharmony_ci if (i >= instr->operands.size() || 233bf215546Sopenharmony_ci (instr->operands[i].hasRegClass() && 234bf215546Sopenharmony_ci instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())) 235bf215546Sopenharmony_ci check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get()); 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed()) 238bf215546Sopenharmony_ci check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", 239bf215546Sopenharmony_ci instr.get()); 240bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 || 241bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_fma_mixhi_f16 || 242bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_fma_mix_f32) { 243bf215546Sopenharmony_ci check(instr->definitions[0].regClass() == 244bf215546Sopenharmony_ci (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b), 245bf215546Sopenharmony_ci "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get()); 246bf215546Sopenharmony_ci } else if (instr->isVOP3P()) { 247bf215546Sopenharmony_ci VOP3P_instruction& vop3p = instr->vop3p(); 248bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 249bf215546Sopenharmony_ci if (instr->operands[i].hasRegClass() && 250bf215546Sopenharmony_ci instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()) 251bf215546Sopenharmony_ci check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0, 252bf215546Sopenharmony_ci "Unexpected opsel for subdword operand", instr.get()); 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition", 255bf215546Sopenharmony_ci instr.get()); 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci /* check for undefs */ 259bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 260bf215546Sopenharmony_ci if (instr->operands[i].isUndefined()) { 261bf215546Sopenharmony_ci bool flat = instr->isFlatLike(); 262bf215546Sopenharmony_ci bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() || 263bf215546Sopenharmony_ci instr->opcode == aco_opcode::p_create_vector || 264bf215546Sopenharmony_ci instr->opcode == aco_opcode::p_jump_to_epilog || 265bf215546Sopenharmony_ci (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) || 266bf215546Sopenharmony_ci ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) || 267bf215546Sopenharmony_ci (instr->isScratch() && i == 0); 268bf215546Sopenharmony_ci check(can_be_undef, "Undefs can only be used in certain operands", instr.get()); 269bf215546Sopenharmony_ci } else { 270bf215546Sopenharmony_ci check(instr->operands[i].isFixed() || instr->operands[i].isTemp() || 271bf215546Sopenharmony_ci instr->operands[i].isConstant(), 272bf215546Sopenharmony_ci "Uninitialized Operand", instr.get()); 273bf215546Sopenharmony_ci } 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci /* check subdword definitions */ 277bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->definitions.size(); i++) { 278bf215546Sopenharmony_ci if (instr->definitions[i].regClass().is_subdword()) 279bf215546Sopenharmony_ci check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(), 280bf215546Sopenharmony_ci "Only Pseudo and VMEM instructions can write subdword registers > 4 bytes", 281bf215546Sopenharmony_ci instr.get()); 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci if (instr->isSALU() || instr->isVALU()) { 285bf215546Sopenharmony_ci /* check literals */ 286bf215546Sopenharmony_ci Operand literal(s1); 287bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 288bf215546Sopenharmony_ci Operand op = instr->operands[i]; 289bf215546Sopenharmony_ci if (!op.isLiteral()) 290bf215546Sopenharmony_ci continue; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci check(!instr->isDPP() && !instr->isSDWA() && 293bf215546Sopenharmony_ci (!instr->isVOP3() || program->gfx_level >= GFX10) && 294bf215546Sopenharmony_ci (!instr->isVOP3P() || program->gfx_level >= GFX10), 295bf215546Sopenharmony_ci "Literal applied on wrong instruction format", instr.get()); 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci check(literal.isUndefined() || (literal.size() == op.size() && 298bf215546Sopenharmony_ci literal.constantValue() == op.constantValue()), 299bf215546Sopenharmony_ci "Only 1 Literal allowed", instr.get()); 300bf215546Sopenharmony_ci literal = op; 301bf215546Sopenharmony_ci check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2, 302bf215546Sopenharmony_ci "Wrong source position for Literal argument", instr.get()); 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci /* check num sgprs for VALU */ 306bf215546Sopenharmony_ci if (instr->isVALU()) { 307bf215546Sopenharmony_ci bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 || 308bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_lshrrev_b64 || 309bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_ashrrev_i64; 310bf215546Sopenharmony_ci unsigned const_bus_limit = 1; 311bf215546Sopenharmony_ci if (program->gfx_level >= GFX10 && !is_shift64) 312bf215546Sopenharmony_ci const_bus_limit = 2; 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5; 315bf215546Sopenharmony_ci if (instr->isSDWA()) 316bf215546Sopenharmony_ci scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4; 317bf215546Sopenharmony_ci else if (instr->isDPP()) 318bf215546Sopenharmony_ci scalar_mask = 0x4; 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 || 321bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_readlane_b32 || 322bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_readlane_b32_e64) { 323bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() == RegType::sgpr, 324bf215546Sopenharmony_ci "Wrong Definition type for VALU instruction", instr.get()); 325bf215546Sopenharmony_ci } else { 326bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() == RegType::vgpr, 327bf215546Sopenharmony_ci "Wrong Definition type for VALU instruction", instr.get()); 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci unsigned num_sgprs = 0; 331bf215546Sopenharmony_ci unsigned sgpr[] = {0, 0}; 332bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 333bf215546Sopenharmony_ci Operand op = instr->operands[i]; 334bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_readfirstlane_b32 || 335bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_readlane_b32 || 336bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_readlane_b32_e64) { 337bf215546Sopenharmony_ci check(i != 1 || (op.isTemp() && op.regClass().type() == RegType::sgpr) || 338bf215546Sopenharmony_ci op.isConstant(), 339bf215546Sopenharmony_ci "Must be a SGPR or a constant", instr.get()); 340bf215546Sopenharmony_ci check(i == 1 || (op.isTemp() && op.regClass().type() == RegType::vgpr && 341bf215546Sopenharmony_ci op.bytes() <= 4), 342bf215546Sopenharmony_ci "Wrong Operand type for VALU instruction", instr.get()); 343bf215546Sopenharmony_ci continue; 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_permlane16_b32 || 346bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_permlanex16_b32) { 347bf215546Sopenharmony_ci check(i != 0 || (op.isTemp() && op.regClass().type() == RegType::vgpr), 348bf215546Sopenharmony_ci "Operand 0 of v_permlane must be VGPR", instr.get()); 349bf215546Sopenharmony_ci check(i == 0 || (op.isTemp() && op.regClass().type() == RegType::sgpr) || 350bf215546Sopenharmony_ci op.isConstant(), 351bf215546Sopenharmony_ci "Lane select operands of v_permlane must be SGPR or constant", 352bf215546Sopenharmony_ci instr.get()); 353bf215546Sopenharmony_ci } 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_writelane_b32 || 356bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_writelane_b32_e64) { 357bf215546Sopenharmony_ci check(i != 2 || (op.isTemp() && op.regClass().type() == RegType::vgpr && 358bf215546Sopenharmony_ci op.bytes() <= 4), 359bf215546Sopenharmony_ci "Wrong Operand type for VALU instruction", instr.get()); 360bf215546Sopenharmony_ci check(i == 2 || (op.isTemp() && op.regClass().type() == RegType::sgpr) || 361bf215546Sopenharmony_ci op.isConstant(), 362bf215546Sopenharmony_ci "Must be a SGPR or a constant", instr.get()); 363bf215546Sopenharmony_ci continue; 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) { 366bf215546Sopenharmony_ci check(scalar_mask & (1 << i), "Wrong source position for SGPR argument", 367bf215546Sopenharmony_ci instr.get()); 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) { 370bf215546Sopenharmony_ci if (num_sgprs < 2) 371bf215546Sopenharmony_ci sgpr[num_sgprs++] = op.tempId(); 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci if (op.isConstant() && !op.isLiteral()) 376bf215546Sopenharmony_ci check(scalar_mask & (1 << i), "Wrong source position for constant argument", 377bf215546Sopenharmony_ci instr.get()); 378bf215546Sopenharmony_ci } 379bf215546Sopenharmony_ci check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit, 380bf215546Sopenharmony_ci "Too many SGPRs/literals", instr.get()); 381bf215546Sopenharmony_ci } 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci if (instr->isSOP1() || instr->isSOP2()) { 384bf215546Sopenharmony_ci if (!instr->definitions.empty()) 385bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() == RegType::sgpr, 386bf215546Sopenharmony_ci "Wrong Definition type for SALU instruction", instr.get()); 387bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 388bf215546Sopenharmony_ci check(op.isConstant() || op.regClass().type() <= RegType::sgpr, 389bf215546Sopenharmony_ci "Wrong Operand type for SALU instruction", instr.get()); 390bf215546Sopenharmony_ci } 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci } 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci switch (instr->format) { 395bf215546Sopenharmony_ci case Format::PSEUDO: { 396bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_create_vector) { 397bf215546Sopenharmony_ci unsigned size = 0; 398bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 399bf215546Sopenharmony_ci check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get()); 400bf215546Sopenharmony_ci size += op.bytes(); 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci check(size == instr->definitions[0].bytes(), 403bf215546Sopenharmony_ci "Definition size does not match operand sizes", instr.get()); 404bf215546Sopenharmony_ci if (instr->definitions[0].getTemp().type() == RegType::sgpr) { 405bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 406bf215546Sopenharmony_ci check(op.isConstant() || op.regClass().type() == RegType::sgpr, 407bf215546Sopenharmony_ci "Wrong Operand type for scalar vector", instr.get()); 408bf215546Sopenharmony_ci } 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_extract_vector) { 411bf215546Sopenharmony_ci check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(), 412bf215546Sopenharmony_ci "Wrong Operand types", instr.get()); 413bf215546Sopenharmony_ci check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <= 414bf215546Sopenharmony_ci instr->operands[0].bytes(), 415bf215546Sopenharmony_ci "Index out of range", instr.get()); 416bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() == RegType::vgpr || 417bf215546Sopenharmony_ci instr->operands[0].regClass().type() == RegType::sgpr, 418bf215546Sopenharmony_ci "Cannot extract SGPR value from VGPR vector", instr.get()); 419bf215546Sopenharmony_ci check(program->gfx_level >= GFX9 || 420bf215546Sopenharmony_ci !instr->definitions[0].regClass().is_subdword() || 421bf215546Sopenharmony_ci instr->operands[0].regClass().type() == RegType::vgpr, 422bf215546Sopenharmony_ci "Cannot extract subdword from SGPR before GFX9+", instr.get()); 423bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_split_vector) { 424bf215546Sopenharmony_ci check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get()); 425bf215546Sopenharmony_ci unsigned size = 0; 426bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) { 427bf215546Sopenharmony_ci size += def.bytes(); 428bf215546Sopenharmony_ci } 429bf215546Sopenharmony_ci check(size == instr->operands[0].bytes(), 430bf215546Sopenharmony_ci "Operand size does not match definition sizes", instr.get()); 431bf215546Sopenharmony_ci if (instr->operands[0].getTemp().type() == RegType::vgpr) { 432bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) 433bf215546Sopenharmony_ci check(def.regClass().type() == RegType::vgpr, 434bf215546Sopenharmony_ci "Wrong Definition type for VGPR split_vector", instr.get()); 435bf215546Sopenharmony_ci } else { 436bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) 437bf215546Sopenharmony_ci check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(), 438bf215546Sopenharmony_ci "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get()); 439bf215546Sopenharmony_ci } 440bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_parallelcopy) { 441bf215546Sopenharmony_ci check(instr->definitions.size() == instr->operands.size(), 442bf215546Sopenharmony_ci "Number of Operands does not match number of Definitions", instr.get()); 443bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 444bf215546Sopenharmony_ci check(instr->definitions[i].bytes() == instr->operands[i].bytes(), 445bf215546Sopenharmony_ci "Operand and Definition size must match", instr.get()); 446bf215546Sopenharmony_ci if (instr->operands[i].isTemp()) { 447bf215546Sopenharmony_ci check((instr->definitions[i].getTemp().type() == 448bf215546Sopenharmony_ci instr->operands[i].regClass().type()) || 449bf215546Sopenharmony_ci (instr->definitions[i].getTemp().type() == RegType::vgpr && 450bf215546Sopenharmony_ci instr->operands[i].regClass().type() == RegType::sgpr), 451bf215546Sopenharmony_ci "Operand and Definition types do not match", instr.get()); 452bf215546Sopenharmony_ci check(instr->definitions[i].regClass().is_linear_vgpr() == 453bf215546Sopenharmony_ci instr->operands[i].regClass().is_linear_vgpr(), 454bf215546Sopenharmony_ci "Operand and Definition types do not match", instr.get()); 455bf215546Sopenharmony_ci } else { 456bf215546Sopenharmony_ci check(!instr->definitions[i].regClass().is_linear_vgpr(), 457bf215546Sopenharmony_ci "Can only copy linear VGPRs into linear VGPRs, not constant/undef", 458bf215546Sopenharmony_ci instr.get()); 459bf215546Sopenharmony_ci } 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_phi) { 462bf215546Sopenharmony_ci check(instr->operands.size() == block.logical_preds.size(), 463bf215546Sopenharmony_ci "Number of Operands does not match number of predecessors", instr.get()); 464bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() == RegType::vgpr, 465bf215546Sopenharmony_ci "Logical Phi Definition must be vgpr", instr.get()); 466bf215546Sopenharmony_ci for (const Operand& op : instr->operands) 467bf215546Sopenharmony_ci check(instr->definitions[0].size() == op.size(), 468bf215546Sopenharmony_ci "Operand sizes must match Definition size", instr.get()); 469bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_linear_phi) { 470bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 471bf215546Sopenharmony_ci check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type", 472bf215546Sopenharmony_ci instr.get()); 473bf215546Sopenharmony_ci check(instr->definitions[0].size() == op.size(), 474bf215546Sopenharmony_ci "Operand sizes must match Definition size", instr.get()); 475bf215546Sopenharmony_ci } 476bf215546Sopenharmony_ci check(instr->operands.size() == block.linear_preds.size(), 477bf215546Sopenharmony_ci "Number of Operands does not match number of predecessors", instr.get()); 478bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_extract || 479bf215546Sopenharmony_ci instr->opcode == aco_opcode::p_insert) { 480bf215546Sopenharmony_ci check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get()); 481bf215546Sopenharmony_ci check(instr->operands[1].isConstant(), "Index must be constant", instr.get()); 482bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_extract) 483bf215546Sopenharmony_ci check(instr->operands[3].isConstant(), "Sign-extend flag must be constant", 484bf215546Sopenharmony_ci instr.get()); 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() != RegType::sgpr || 487bf215546Sopenharmony_ci instr->operands[0].getTemp().type() == RegType::sgpr, 488bf215546Sopenharmony_ci "Can't extract/insert VGPR to SGPR", instr.get()); 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_insert) 491bf215546Sopenharmony_ci check(instr->operands[0].bytes() == instr->definitions[0].bytes(), 492bf215546Sopenharmony_ci "Sizes of p_insert data operand and definition must match", instr.get()); 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci if (instr->definitions[0].getTemp().type() == RegType::sgpr) 495bf215546Sopenharmony_ci check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() && 496bf215546Sopenharmony_ci instr->definitions[1].physReg() == scc, 497bf215546Sopenharmony_ci "SGPR extract/insert needs an SCC definition", instr.get()); 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u; 500bf215546Sopenharmony_ci unsigned op_bits = instr->operands[2].constantValue(); 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_insert) { 503bf215546Sopenharmony_ci check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get()); 504bf215546Sopenharmony_ci check(op_bits < data_bits, "Size must be smaller than source", instr.get()); 505bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_extract) { 506bf215546Sopenharmony_ci check(op_bits == 8 || op_bits == 16 || op_bits == 32, 507bf215546Sopenharmony_ci "Size must be 8 or 16 or 32", instr.get()); 508bf215546Sopenharmony_ci check(data_bits >= op_bits, "Can't extract more bits than what the data has.", 509bf215546Sopenharmony_ci instr.get()); 510bf215546Sopenharmony_ci } 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci unsigned comp = data_bits / MAX2(op_bits, 1); 513bf215546Sopenharmony_ci check(instr->operands[1].constantValue() < comp, "Index must be in-bounds", 514bf215546Sopenharmony_ci instr.get()); 515bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_jump_to_epilog) { 516bf215546Sopenharmony_ci check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions", 517bf215546Sopenharmony_ci instr.get()); 518bf215546Sopenharmony_ci check(instr->operands.size() > 0 && 519bf215546Sopenharmony_ci instr->operands[0].getTemp().type() == RegType::sgpr && 520bf215546Sopenharmony_ci instr->operands[0].getTemp().size() == 2, 521bf215546Sopenharmony_ci "First operand of p_jump_to_epilog must be a SGPR", instr.get()); 522bf215546Sopenharmony_ci for (unsigned i = 1; i < instr->operands.size(); i++) { 523bf215546Sopenharmony_ci check(instr->operands[i].getTemp().type() == RegType::vgpr || 524bf215546Sopenharmony_ci instr->operands[i].isUndefined(), 525bf215546Sopenharmony_ci "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get()); 526bf215546Sopenharmony_ci } 527bf215546Sopenharmony_ci } 528bf215546Sopenharmony_ci break; 529bf215546Sopenharmony_ci } 530bf215546Sopenharmony_ci case Format::PSEUDO_REDUCTION: { 531bf215546Sopenharmony_ci for (const Operand& op : instr->operands) 532bf215546Sopenharmony_ci check(op.regClass().type() == RegType::vgpr, 533bf215546Sopenharmony_ci "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.", 534bf215546Sopenharmony_ci instr.get()); 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_reduce && 537bf215546Sopenharmony_ci instr->reduction().cluster_size == program->wave_size) 538bf215546Sopenharmony_ci check(instr->definitions[0].regClass().type() == RegType::sgpr || 539bf215546Sopenharmony_ci program->wave_size == 32, 540bf215546Sopenharmony_ci "The result of unclustered reductions must go into an SGPR.", instr.get()); 541bf215546Sopenharmony_ci else 542bf215546Sopenharmony_ci check(instr->definitions[0].regClass().type() == RegType::vgpr, 543bf215546Sopenharmony_ci "The result of scans and clustered reductions must go into a VGPR.", 544bf215546Sopenharmony_ci instr.get()); 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci break; 547bf215546Sopenharmony_ci } 548bf215546Sopenharmony_ci case Format::SMEM: { 549bf215546Sopenharmony_ci if (instr->operands.size() >= 1) 550bf215546Sopenharmony_ci check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) || 551bf215546Sopenharmony_ci (instr->operands[0].isTemp() && 552bf215546Sopenharmony_ci instr->operands[0].regClass().type() == RegType::sgpr), 553bf215546Sopenharmony_ci "SMEM operands must be sgpr", instr.get()); 554bf215546Sopenharmony_ci if (instr->operands.size() >= 2) 555bf215546Sopenharmony_ci check(instr->operands[1].isConstant() || 556bf215546Sopenharmony_ci (instr->operands[1].isTemp() && 557bf215546Sopenharmony_ci instr->operands[1].regClass().type() == RegType::sgpr), 558bf215546Sopenharmony_ci "SMEM offset must be constant or sgpr", instr.get()); 559bf215546Sopenharmony_ci if (!instr->definitions.empty()) 560bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() == RegType::sgpr, 561bf215546Sopenharmony_ci "SMEM result must be sgpr", instr.get()); 562bf215546Sopenharmony_ci break; 563bf215546Sopenharmony_ci } 564bf215546Sopenharmony_ci case Format::MTBUF: 565bf215546Sopenharmony_ci case Format::MUBUF: { 566bf215546Sopenharmony_ci check(instr->operands.size() > 1, "VMEM instructions must have at least one operand", 567bf215546Sopenharmony_ci instr.get()); 568bf215546Sopenharmony_ci check(instr->operands[1].hasRegClass() && 569bf215546Sopenharmony_ci instr->operands[1].regClass().type() == RegType::vgpr, 570bf215546Sopenharmony_ci "VADDR must be in vgpr for VMEM instructions", instr.get()); 571bf215546Sopenharmony_ci check( 572bf215546Sopenharmony_ci instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr, 573bf215546Sopenharmony_ci "VMEM resource constant must be sgpr", instr.get()); 574bf215546Sopenharmony_ci check(instr->operands.size() < 4 || 575bf215546Sopenharmony_ci (instr->operands[3].isTemp() && 576bf215546Sopenharmony_ci instr->operands[3].regClass().type() == RegType::vgpr), 577bf215546Sopenharmony_ci "VMEM write data must be vgpr", instr.get()); 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_ci const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables 580bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_ubyte || 581bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_sbyte || 582bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_ushort || 583bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_sshort || 584bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_ubyte_d16 || 585bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi || 586bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_sbyte_d16 || 587bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi || 588bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_short_d16 || 589bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_short_d16_hi || 590bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_format_d16_x || 591bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_format_d16_hi_x || 592bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_format_d16_xy || 593bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_format_d16_xyz || 594bf215546Sopenharmony_ci instr->opcode == aco_opcode::buffer_load_format_d16_xyzw || 595bf215546Sopenharmony_ci instr->opcode == aco_opcode::tbuffer_load_format_d16_x || 596bf215546Sopenharmony_ci instr->opcode == aco_opcode::tbuffer_load_format_d16_xy || 597bf215546Sopenharmony_ci instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz || 598bf215546Sopenharmony_ci instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw; 599bf215546Sopenharmony_ci if (instr->definitions.size()) { 600bf215546Sopenharmony_ci check(instr->definitions[0].isTemp() && 601bf215546Sopenharmony_ci instr->definitions[0].regClass().type() == RegType::vgpr, 602bf215546Sopenharmony_ci "VMEM definitions[0] (VDATA) must be VGPR", instr.get()); 603bf215546Sopenharmony_ci check(d16 || !instr->definitions[0].regClass().is_subdword(), 604bf215546Sopenharmony_ci "Only D16 opcodes can load subdword values.", instr.get()); 605bf215546Sopenharmony_ci check(instr->definitions[0].bytes() <= 8 || !d16, 606bf215546Sopenharmony_ci "D16 opcodes can only load up to 8 bytes.", instr.get()); 607bf215546Sopenharmony_ci } 608bf215546Sopenharmony_ci break; 609bf215546Sopenharmony_ci } 610bf215546Sopenharmony_ci case Format::MIMG: { 611bf215546Sopenharmony_ci check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands", 612bf215546Sopenharmony_ci instr.get()); 613bf215546Sopenharmony_ci check(instr->operands[0].hasRegClass() && 614bf215546Sopenharmony_ci (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8), 615bf215546Sopenharmony_ci "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get()); 616bf215546Sopenharmony_ci if (instr->operands[1].hasRegClass()) 617bf215546Sopenharmony_ci check(instr->operands[1].regClass() == s4, 618bf215546Sopenharmony_ci "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get()); 619bf215546Sopenharmony_ci if (!instr->operands[2].isUndefined()) { 620bf215546Sopenharmony_ci bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap || 621bf215546Sopenharmony_ci instr->opcode == aco_opcode::image_atomic_fcmpswap; 622bf215546Sopenharmony_ci check(instr->definitions.empty() || 623bf215546Sopenharmony_ci (instr->definitions[0].regClass() == instr->operands[2].regClass() || 624bf215546Sopenharmony_ci is_cmpswap), 625bf215546Sopenharmony_ci "MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and " 626bf215546Sopenharmony_ci "TFE/LWE loads", 627bf215546Sopenharmony_ci instr.get()); 628bf215546Sopenharmony_ci } 629bf215546Sopenharmony_ci check(instr->operands.size() == 4 || program->gfx_level >= GFX10, 630bf215546Sopenharmony_ci "NSA is only supported on GFX10+", instr.get()); 631bf215546Sopenharmony_ci for (unsigned i = 3; i < instr->operands.size(); i++) { 632bf215546Sopenharmony_ci if (instr->operands.size() == 4) { 633bf215546Sopenharmony_ci check(instr->operands[i].hasRegClass() && 634bf215546Sopenharmony_ci instr->operands[i].regClass().type() == RegType::vgpr, 635bf215546Sopenharmony_ci "MIMG operands[3] (VADDR) must be VGPR", instr.get()); 636bf215546Sopenharmony_ci } else { 637bf215546Sopenharmony_ci check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used", 638bf215546Sopenharmony_ci instr.get()); 639bf215546Sopenharmony_ci } 640bf215546Sopenharmony_ci } 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci if (instr->definitions.size()) { 643bf215546Sopenharmony_ci check(instr->definitions[0].isTemp() && 644bf215546Sopenharmony_ci instr->definitions[0].regClass().type() == RegType::vgpr, 645bf215546Sopenharmony_ci "MIMG definitions[0] (VDATA) must be VGPR", instr.get()); 646bf215546Sopenharmony_ci check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(), 647bf215546Sopenharmony_ci "Only D16 MIMG instructions can load subdword values.", instr.get()); 648bf215546Sopenharmony_ci check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16, 649bf215546Sopenharmony_ci "D16 MIMG instructions can only load up to 8 bytes.", instr.get()); 650bf215546Sopenharmony_ci } 651bf215546Sopenharmony_ci break; 652bf215546Sopenharmony_ci } 653bf215546Sopenharmony_ci case Format::DS: { 654bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 655bf215546Sopenharmony_ci check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0, 656bf215546Sopenharmony_ci "Only VGPRs are valid DS instruction operands", instr.get()); 657bf215546Sopenharmony_ci } 658bf215546Sopenharmony_ci if (!instr->definitions.empty()) 659bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() == RegType::vgpr, 660bf215546Sopenharmony_ci "DS instruction must return VGPR", instr.get()); 661bf215546Sopenharmony_ci break; 662bf215546Sopenharmony_ci } 663bf215546Sopenharmony_ci case Format::EXP: { 664bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) 665bf215546Sopenharmony_ci check(instr->operands[i].hasRegClass() && 666bf215546Sopenharmony_ci instr->operands[i].regClass().type() == RegType::vgpr, 667bf215546Sopenharmony_ci "Only VGPRs are valid Export arguments", instr.get()); 668bf215546Sopenharmony_ci break; 669bf215546Sopenharmony_ci } 670bf215546Sopenharmony_ci case Format::FLAT: 671bf215546Sopenharmony_ci check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR", 672bf215546Sopenharmony_ci instr.get()); 673bf215546Sopenharmony_ci FALLTHROUGH; 674bf215546Sopenharmony_ci case Format::GLOBAL: 675bf215546Sopenharmony_ci check( 676bf215546Sopenharmony_ci instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr, 677bf215546Sopenharmony_ci "FLAT/GLOBAL address must be vgpr", instr.get()); 678bf215546Sopenharmony_ci FALLTHROUGH; 679bf215546Sopenharmony_ci case Format::SCRATCH: { 680bf215546Sopenharmony_ci check(instr->operands[0].hasRegClass() && 681bf215546Sopenharmony_ci instr->operands[0].regClass().type() == RegType::vgpr, 682bf215546Sopenharmony_ci "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get()); 683bf215546Sopenharmony_ci check(instr->operands[1].hasRegClass() && 684bf215546Sopenharmony_ci instr->operands[1].regClass().type() == RegType::sgpr, 685bf215546Sopenharmony_ci "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get()); 686bf215546Sopenharmony_ci if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3) 687bf215546Sopenharmony_ci check(instr->operands[0].isTemp() || instr->operands[1].isTemp(), 688bf215546Sopenharmony_ci "SCRATCH must have either SADDR or ADDR operand", instr.get()); 689bf215546Sopenharmony_ci if (!instr->definitions.empty()) 690bf215546Sopenharmony_ci check(instr->definitions[0].getTemp().type() == RegType::vgpr, 691bf215546Sopenharmony_ci "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get()); 692bf215546Sopenharmony_ci else 693bf215546Sopenharmony_ci check(instr->operands[2].regClass().type() == RegType::vgpr, 694bf215546Sopenharmony_ci "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get()); 695bf215546Sopenharmony_ci break; 696bf215546Sopenharmony_ci } 697bf215546Sopenharmony_ci default: break; 698bf215546Sopenharmony_ci } 699bf215546Sopenharmony_ci } 700bf215546Sopenharmony_ci } 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci /* validate CFG */ 703bf215546Sopenharmony_ci for (unsigned i = 0; i < program->blocks.size(); i++) { 704bf215546Sopenharmony_ci Block& block = program->blocks[i]; 705bf215546Sopenharmony_ci check_block(block.index == i, "block.index must match actual index", &block); 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci /* predecessors/successors should be sorted */ 708bf215546Sopenharmony_ci for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++) 709bf215546Sopenharmony_ci check_block(block.linear_preds[j] < block.linear_preds[j + 1], 710bf215546Sopenharmony_ci "linear predecessors must be sorted", &block); 711bf215546Sopenharmony_ci for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++) 712bf215546Sopenharmony_ci check_block(block.logical_preds[j] < block.logical_preds[j + 1], 713bf215546Sopenharmony_ci "logical predecessors must be sorted", &block); 714bf215546Sopenharmony_ci for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++) 715bf215546Sopenharmony_ci check_block(block.linear_succs[j] < block.linear_succs[j + 1], 716bf215546Sopenharmony_ci "linear successors must be sorted", &block); 717bf215546Sopenharmony_ci for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++) 718bf215546Sopenharmony_ci check_block(block.logical_succs[j] < block.logical_succs[j + 1], 719bf215546Sopenharmony_ci "logical successors must be sorted", &block); 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci /* critical edges are not allowed */ 722bf215546Sopenharmony_ci if (block.linear_preds.size() > 1) { 723bf215546Sopenharmony_ci for (unsigned pred : block.linear_preds) 724bf215546Sopenharmony_ci check_block(program->blocks[pred].linear_succs.size() == 1, 725bf215546Sopenharmony_ci "linear critical edges are not allowed", &program->blocks[pred]); 726bf215546Sopenharmony_ci for (unsigned pred : block.logical_preds) 727bf215546Sopenharmony_ci check_block(program->blocks[pred].logical_succs.size() == 1, 728bf215546Sopenharmony_ci "logical critical edges are not allowed", &program->blocks[pred]); 729bf215546Sopenharmony_ci } 730bf215546Sopenharmony_ci } 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci return is_valid; 733bf215546Sopenharmony_ci} 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_ci/* RA validation */ 736bf215546Sopenharmony_cinamespace { 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_cistruct Location { 739bf215546Sopenharmony_ci Location() : block(NULL), instr(NULL) {} 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci Block* block; 742bf215546Sopenharmony_ci Instruction* instr; // NULL if it's the block's live-in 743bf215546Sopenharmony_ci}; 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_cistruct Assignment { 746bf215546Sopenharmony_ci Location defloc; 747bf215546Sopenharmony_ci Location firstloc; 748bf215546Sopenharmony_ci PhysReg reg; 749bf215546Sopenharmony_ci bool valid; 750bf215546Sopenharmony_ci}; 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_cibool 753bf215546Sopenharmony_cira_fail(Program* program, Location loc, Location loc2, const char* fmt, ...) 754bf215546Sopenharmony_ci{ 755bf215546Sopenharmony_ci va_list args; 756bf215546Sopenharmony_ci va_start(args, fmt); 757bf215546Sopenharmony_ci char msg[1024]; 758bf215546Sopenharmony_ci vsprintf(msg, fmt, args); 759bf215546Sopenharmony_ci va_end(args); 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci char* out; 762bf215546Sopenharmony_ci size_t outsize; 763bf215546Sopenharmony_ci struct u_memstream mem; 764bf215546Sopenharmony_ci u_memstream_open(&mem, &out, &outsize); 765bf215546Sopenharmony_ci FILE* const memf = u_memstream_get(&mem); 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index); 768bf215546Sopenharmony_ci if (loc.instr) { 769bf215546Sopenharmony_ci aco_print_instr(loc.instr, memf); 770bf215546Sopenharmony_ci fprintf(memf, "\n%s", msg); 771bf215546Sopenharmony_ci } else { 772bf215546Sopenharmony_ci fprintf(memf, "%s", msg); 773bf215546Sopenharmony_ci } 774bf215546Sopenharmony_ci if (loc2.block) { 775bf215546Sopenharmony_ci fprintf(memf, " in BB%d:\n", loc2.block->index); 776bf215546Sopenharmony_ci aco_print_instr(loc2.instr, memf); 777bf215546Sopenharmony_ci } 778bf215546Sopenharmony_ci fprintf(memf, "\n\n"); 779bf215546Sopenharmony_ci u_memstream_close(&mem); 780bf215546Sopenharmony_ci 781bf215546Sopenharmony_ci aco_err(program, "%s", out); 782bf215546Sopenharmony_ci free(out); 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci return true; 785bf215546Sopenharmony_ci} 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_cibool 788bf215546Sopenharmony_civalidate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, 789bf215546Sopenharmony_ci unsigned index) 790bf215546Sopenharmony_ci{ 791bf215546Sopenharmony_ci Operand op = instr->operands[index]; 792bf215546Sopenharmony_ci unsigned byte = op.physReg().byte(); 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_as_uniform) 795bf215546Sopenharmony_ci return byte == 0; 796bf215546Sopenharmony_ci if (instr->isPseudo() && gfx_level >= GFX8) 797bf215546Sopenharmony_ci return true; 798bf215546Sopenharmony_ci if (instr->isSDWA()) 799bf215546Sopenharmony_ci return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 && 800bf215546Sopenharmony_ci byte % instr->sdwa().sel[index].size() == 0; 801bf215546Sopenharmony_ci if (instr->isVOP3P()) { 802bf215546Sopenharmony_ci bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 || 803bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_fma_mixhi_f16 || 804bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_fma_mix_f32; 805bf215546Sopenharmony_ci return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) && 806bf215546Sopenharmony_ci ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1)); 807bf215546Sopenharmony_ci } 808bf215546Sopenharmony_ci if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index)) 809bf215546Sopenharmony_ci return true; 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci switch (instr->opcode) { 812bf215546Sopenharmony_ci case aco_opcode::v_cvt_f32_ubyte1: 813bf215546Sopenharmony_ci if (byte == 1) 814bf215546Sopenharmony_ci return true; 815bf215546Sopenharmony_ci break; 816bf215546Sopenharmony_ci case aco_opcode::v_cvt_f32_ubyte2: 817bf215546Sopenharmony_ci if (byte == 2) 818bf215546Sopenharmony_ci return true; 819bf215546Sopenharmony_ci break; 820bf215546Sopenharmony_ci case aco_opcode::v_cvt_f32_ubyte3: 821bf215546Sopenharmony_ci if (byte == 3) 822bf215546Sopenharmony_ci return true; 823bf215546Sopenharmony_ci break; 824bf215546Sopenharmony_ci case aco_opcode::ds_write_b8_d16_hi: 825bf215546Sopenharmony_ci case aco_opcode::ds_write_b16_d16_hi: 826bf215546Sopenharmony_ci if (byte == 2 && index == 1) 827bf215546Sopenharmony_ci return true; 828bf215546Sopenharmony_ci break; 829bf215546Sopenharmony_ci case aco_opcode::buffer_store_byte_d16_hi: 830bf215546Sopenharmony_ci case aco_opcode::buffer_store_short_d16_hi: 831bf215546Sopenharmony_ci case aco_opcode::buffer_store_format_d16_hi_x: 832bf215546Sopenharmony_ci if (byte == 2 && index == 3) 833bf215546Sopenharmony_ci return true; 834bf215546Sopenharmony_ci break; 835bf215546Sopenharmony_ci case aco_opcode::flat_store_byte_d16_hi: 836bf215546Sopenharmony_ci case aco_opcode::flat_store_short_d16_hi: 837bf215546Sopenharmony_ci case aco_opcode::scratch_store_byte_d16_hi: 838bf215546Sopenharmony_ci case aco_opcode::scratch_store_short_d16_hi: 839bf215546Sopenharmony_ci case aco_opcode::global_store_byte_d16_hi: 840bf215546Sopenharmony_ci case aco_opcode::global_store_short_d16_hi: 841bf215546Sopenharmony_ci if (byte == 2 && index == 2) 842bf215546Sopenharmony_ci return true; 843bf215546Sopenharmony_ci break; 844bf215546Sopenharmony_ci default: break; 845bf215546Sopenharmony_ci } 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci return byte == 0; 848bf215546Sopenharmony_ci} 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_cibool 851bf215546Sopenharmony_civalidate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr) 852bf215546Sopenharmony_ci{ 853bf215546Sopenharmony_ci Definition def = instr->definitions[0]; 854bf215546Sopenharmony_ci unsigned byte = def.physReg().byte(); 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci if (instr->isPseudo() && gfx_level >= GFX8) 857bf215546Sopenharmony_ci return true; 858bf215546Sopenharmony_ci if (instr->isSDWA()) 859bf215546Sopenharmony_ci return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 && 860bf215546Sopenharmony_ci byte % instr->sdwa().dst_sel.size() == 0; 861bf215546Sopenharmony_ci if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1)) 862bf215546Sopenharmony_ci return true; 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci switch (instr->opcode) { 865bf215546Sopenharmony_ci case aco_opcode::v_fma_mixhi_f16: 866bf215546Sopenharmony_ci case aco_opcode::buffer_load_ubyte_d16_hi: 867bf215546Sopenharmony_ci case aco_opcode::buffer_load_sbyte_d16_hi: 868bf215546Sopenharmony_ci case aco_opcode::buffer_load_short_d16_hi: 869bf215546Sopenharmony_ci case aco_opcode::buffer_load_format_d16_hi_x: 870bf215546Sopenharmony_ci case aco_opcode::flat_load_ubyte_d16_hi: 871bf215546Sopenharmony_ci case aco_opcode::flat_load_short_d16_hi: 872bf215546Sopenharmony_ci case aco_opcode::scratch_load_ubyte_d16_hi: 873bf215546Sopenharmony_ci case aco_opcode::scratch_load_short_d16_hi: 874bf215546Sopenharmony_ci case aco_opcode::global_load_ubyte_d16_hi: 875bf215546Sopenharmony_ci case aco_opcode::global_load_short_d16_hi: 876bf215546Sopenharmony_ci case aco_opcode::ds_read_u8_d16_hi: 877bf215546Sopenharmony_ci case aco_opcode::ds_read_u16_d16_hi: return byte == 2; 878bf215546Sopenharmony_ci default: break; 879bf215546Sopenharmony_ci } 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci return byte == 0; 882bf215546Sopenharmony_ci} 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ciunsigned 885bf215546Sopenharmony_ciget_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index) 886bf215546Sopenharmony_ci{ 887bf215546Sopenharmony_ci amd_gfx_level gfx_level = program->gfx_level; 888bf215546Sopenharmony_ci Definition def = instr->definitions[index]; 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci if (instr->isPseudo()) 891bf215546Sopenharmony_ci return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u; 892bf215546Sopenharmony_ci if (instr->isVALU()) { 893bf215546Sopenharmony_ci assert(def.bytes() <= 2); 894bf215546Sopenharmony_ci if (instr->isSDWA()) 895bf215546Sopenharmony_ci return instr->sdwa().dst_sel.size(); 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci if (instr_is_16bit(gfx_level, instr->opcode)) 898bf215546Sopenharmony_ci return 2; 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci return 4; 901bf215546Sopenharmony_ci } 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci if (instr->isMIMG()) { 904bf215546Sopenharmony_ci assert(instr->mimg().d16); 905bf215546Sopenharmony_ci return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes(); 906bf215546Sopenharmony_ci } 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci switch (instr->opcode) { 909bf215546Sopenharmony_ci case aco_opcode::buffer_load_ubyte_d16: 910bf215546Sopenharmony_ci case aco_opcode::buffer_load_sbyte_d16: 911bf215546Sopenharmony_ci case aco_opcode::buffer_load_short_d16: 912bf215546Sopenharmony_ci case aco_opcode::buffer_load_format_d16_x: 913bf215546Sopenharmony_ci case aco_opcode::tbuffer_load_format_d16_x: 914bf215546Sopenharmony_ci case aco_opcode::flat_load_ubyte_d16: 915bf215546Sopenharmony_ci case aco_opcode::flat_load_short_d16: 916bf215546Sopenharmony_ci case aco_opcode::scratch_load_ubyte_d16: 917bf215546Sopenharmony_ci case aco_opcode::scratch_load_short_d16: 918bf215546Sopenharmony_ci case aco_opcode::global_load_ubyte_d16: 919bf215546Sopenharmony_ci case aco_opcode::global_load_short_d16: 920bf215546Sopenharmony_ci case aco_opcode::ds_read_u8_d16: 921bf215546Sopenharmony_ci case aco_opcode::ds_read_u16_d16: 922bf215546Sopenharmony_ci case aco_opcode::buffer_load_ubyte_d16_hi: 923bf215546Sopenharmony_ci case aco_opcode::buffer_load_sbyte_d16_hi: 924bf215546Sopenharmony_ci case aco_opcode::buffer_load_short_d16_hi: 925bf215546Sopenharmony_ci case aco_opcode::buffer_load_format_d16_hi_x: 926bf215546Sopenharmony_ci case aco_opcode::flat_load_ubyte_d16_hi: 927bf215546Sopenharmony_ci case aco_opcode::flat_load_short_d16_hi: 928bf215546Sopenharmony_ci case aco_opcode::scratch_load_ubyte_d16_hi: 929bf215546Sopenharmony_ci case aco_opcode::scratch_load_short_d16_hi: 930bf215546Sopenharmony_ci case aco_opcode::global_load_ubyte_d16_hi: 931bf215546Sopenharmony_ci case aco_opcode::global_load_short_d16_hi: 932bf215546Sopenharmony_ci case aco_opcode::ds_read_u8_d16_hi: 933bf215546Sopenharmony_ci case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2; 934bf215546Sopenharmony_ci case aco_opcode::buffer_load_format_d16_xyz: 935bf215546Sopenharmony_ci case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6; 936bf215546Sopenharmony_ci default: return def.size() * 4; 937bf215546Sopenharmony_ci } 938bf215546Sopenharmony_ci} 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_cibool 941bf215546Sopenharmony_civalidate_instr_defs(Program* program, std::array<unsigned, 2048>& regs, 942bf215546Sopenharmony_ci const std::vector<Assignment>& assignments, const Location& loc, 943bf215546Sopenharmony_ci aco_ptr<Instruction>& instr) 944bf215546Sopenharmony_ci{ 945bf215546Sopenharmony_ci bool err = false; 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->definitions.size(); i++) { 948bf215546Sopenharmony_ci Definition& def = instr->definitions[i]; 949bf215546Sopenharmony_ci if (!def.isTemp()) 950bf215546Sopenharmony_ci continue; 951bf215546Sopenharmony_ci Temp tmp = def.getTemp(); 952bf215546Sopenharmony_ci PhysReg reg = assignments[tmp.id()].reg; 953bf215546Sopenharmony_ci for (unsigned j = 0; j < tmp.bytes(); j++) { 954bf215546Sopenharmony_ci if (regs[reg.reg_b + j]) 955bf215546Sopenharmony_ci err |= 956bf215546Sopenharmony_ci ra_fail(program, loc, assignments[regs[reg.reg_b + j]].defloc, 957bf215546Sopenharmony_ci "Assignment of element %d of %%%d already taken by %%%d from instruction", i, 958bf215546Sopenharmony_ci tmp.id(), regs[reg.reg_b + j]); 959bf215546Sopenharmony_ci regs[reg.reg_b + j] = tmp.id(); 960bf215546Sopenharmony_ci } 961bf215546Sopenharmony_ci if (def.regClass().is_subdword() && def.bytes() < 4) { 962bf215546Sopenharmony_ci unsigned written = get_subdword_bytes_written(program, instr, i); 963bf215546Sopenharmony_ci /* If written=4, the instruction still might write the upper half. In that case, it's 964bf215546Sopenharmony_ci * the lower half that isn't preserved */ 965bf215546Sopenharmony_ci for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) { 966bf215546Sopenharmony_ci unsigned written_reg = reg.reg() * 4u + j; 967bf215546Sopenharmony_ci if (regs[written_reg] && regs[written_reg] != def.tempId()) 968bf215546Sopenharmony_ci err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc, 969bf215546Sopenharmony_ci "Assignment of element %d of %%%d overwrites the full register " 970bf215546Sopenharmony_ci "taken by %%%d from instruction", 971bf215546Sopenharmony_ci i, tmp.id(), regs[written_reg]); 972bf215546Sopenharmony_ci } 973bf215546Sopenharmony_ci } 974bf215546Sopenharmony_ci } 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) { 977bf215546Sopenharmony_ci if (!def.isTemp()) 978bf215546Sopenharmony_ci continue; 979bf215546Sopenharmony_ci if (def.isKill()) { 980bf215546Sopenharmony_ci for (unsigned j = 0; j < def.getTemp().bytes(); j++) 981bf215546Sopenharmony_ci regs[def.physReg().reg_b + j] = 0; 982bf215546Sopenharmony_ci } 983bf215546Sopenharmony_ci } 984bf215546Sopenharmony_ci 985bf215546Sopenharmony_ci return err; 986bf215546Sopenharmony_ci} 987bf215546Sopenharmony_ci 988bf215546Sopenharmony_ci} /* end namespace */ 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_cibool 991bf215546Sopenharmony_civalidate_ra(Program* program) 992bf215546Sopenharmony_ci{ 993bf215546Sopenharmony_ci if (!(debug_flags & DEBUG_VALIDATE_RA)) 994bf215546Sopenharmony_ci return false; 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci bool err = false; 997bf215546Sopenharmony_ci aco::live live_vars = aco::live_var_analysis(program); 998bf215546Sopenharmony_ci std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size()); 999bf215546Sopenharmony_ci uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves); 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci std::vector<Assignment> assignments(program->peekAllocationId()); 1002bf215546Sopenharmony_ci for (Block& block : program->blocks) { 1003bf215546Sopenharmony_ci Location loc; 1004bf215546Sopenharmony_ci loc.block = █ 1005bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : block.instructions) { 1006bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_phi) { 1007bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 1008bf215546Sopenharmony_ci if (instr->operands[i].isTemp() && 1009bf215546Sopenharmony_ci instr->operands[i].getTemp().type() == RegType::sgpr && 1010bf215546Sopenharmony_ci instr->operands[i].isFirstKill()) 1011bf215546Sopenharmony_ci phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp()); 1012bf215546Sopenharmony_ci } 1013bf215546Sopenharmony_ci } 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci loc.instr = instr.get(); 1016bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) { 1017bf215546Sopenharmony_ci Operand& op = instr->operands[i]; 1018bf215546Sopenharmony_ci if (!op.isTemp()) 1019bf215546Sopenharmony_ci continue; 1020bf215546Sopenharmony_ci if (!op.isFixed()) 1021bf215546Sopenharmony_ci err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i); 1022bf215546Sopenharmony_ci if (assignments[op.tempId()].valid && assignments[op.tempId()].reg != op.physReg()) 1023bf215546Sopenharmony_ci err |= 1024bf215546Sopenharmony_ci ra_fail(program, loc, assignments[op.tempId()].firstloc, 1025bf215546Sopenharmony_ci "Operand %d has an inconsistent register assignment with instruction", i); 1026bf215546Sopenharmony_ci if ((op.getTemp().type() == RegType::vgpr && 1027bf215546Sopenharmony_ci op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) || 1028bf215546Sopenharmony_ci (op.getTemp().type() == RegType::sgpr && 1029bf215546Sopenharmony_ci op.physReg() + op.size() > program->config->num_sgprs && 1030bf215546Sopenharmony_ci op.physReg() < sgpr_limit)) 1031bf215546Sopenharmony_ci err |= ra_fail(program, loc, assignments[op.tempId()].firstloc, 1032bf215546Sopenharmony_ci "Operand %d has an out-of-bounds register assignment", i); 1033bf215546Sopenharmony_ci if (op.physReg() == vcc && !program->needs_vcc) 1034bf215546Sopenharmony_ci err |= ra_fail(program, loc, Location(), 1035bf215546Sopenharmony_ci "Operand %d fixed to vcc but needs_vcc=false", i); 1036bf215546Sopenharmony_ci if (op.regClass().is_subdword() && 1037bf215546Sopenharmony_ci !validate_subdword_operand(program->gfx_level, instr, i)) 1038bf215546Sopenharmony_ci err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i); 1039bf215546Sopenharmony_ci if (!assignments[op.tempId()].firstloc.block) 1040bf215546Sopenharmony_ci assignments[op.tempId()].firstloc = loc; 1041bf215546Sopenharmony_ci if (!assignments[op.tempId()].defloc.block) { 1042bf215546Sopenharmony_ci assignments[op.tempId()].reg = op.physReg(); 1043bf215546Sopenharmony_ci assignments[op.tempId()].valid = true; 1044bf215546Sopenharmony_ci } 1045bf215546Sopenharmony_ci } 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->definitions.size(); i++) { 1048bf215546Sopenharmony_ci Definition& def = instr->definitions[i]; 1049bf215546Sopenharmony_ci if (!def.isTemp()) 1050bf215546Sopenharmony_ci continue; 1051bf215546Sopenharmony_ci if (!def.isFixed()) 1052bf215546Sopenharmony_ci err |= 1053bf215546Sopenharmony_ci ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i); 1054bf215546Sopenharmony_ci if (assignments[def.tempId()].defloc.block) 1055bf215546Sopenharmony_ci err |= ra_fail(program, loc, assignments[def.tempId()].defloc, 1056bf215546Sopenharmony_ci "Temporary %%%d also defined by instruction", def.tempId()); 1057bf215546Sopenharmony_ci if ((def.getTemp().type() == RegType::vgpr && 1058bf215546Sopenharmony_ci def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) || 1059bf215546Sopenharmony_ci (def.getTemp().type() == RegType::sgpr && 1060bf215546Sopenharmony_ci def.physReg() + def.size() > program->config->num_sgprs && 1061bf215546Sopenharmony_ci def.physReg() < sgpr_limit)) 1062bf215546Sopenharmony_ci err |= ra_fail(program, loc, assignments[def.tempId()].firstloc, 1063bf215546Sopenharmony_ci "Definition %d has an out-of-bounds register assignment", i); 1064bf215546Sopenharmony_ci if (def.physReg() == vcc && !program->needs_vcc) 1065bf215546Sopenharmony_ci err |= ra_fail(program, loc, Location(), 1066bf215546Sopenharmony_ci "Definition %d fixed to vcc but needs_vcc=false", i); 1067bf215546Sopenharmony_ci if (def.regClass().is_subdword() && 1068bf215546Sopenharmony_ci !validate_subdword_definition(program->gfx_level, instr)) 1069bf215546Sopenharmony_ci err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i); 1070bf215546Sopenharmony_ci if (!assignments[def.tempId()].firstloc.block) 1071bf215546Sopenharmony_ci assignments[def.tempId()].firstloc = loc; 1072bf215546Sopenharmony_ci assignments[def.tempId()].defloc = loc; 1073bf215546Sopenharmony_ci assignments[def.tempId()].reg = def.physReg(); 1074bf215546Sopenharmony_ci assignments[def.tempId()].valid = true; 1075bf215546Sopenharmony_ci } 1076bf215546Sopenharmony_ci } 1077bf215546Sopenharmony_ci } 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci for (Block& block : program->blocks) { 1080bf215546Sopenharmony_ci Location loc; 1081bf215546Sopenharmony_ci loc.block = █ 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci std::array<unsigned, 2048> regs; /* register file in bytes */ 1084bf215546Sopenharmony_ci regs.fill(0); 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci IDSet live = live_vars.live_out[block.index]; 1087bf215546Sopenharmony_ci /* remove killed p_phi sgpr operands */ 1088bf215546Sopenharmony_ci for (Temp tmp : phi_sgpr_ops[block.index]) 1089bf215546Sopenharmony_ci live.erase(tmp.id()); 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci /* check live out */ 1092bf215546Sopenharmony_ci for (unsigned id : live) { 1093bf215546Sopenharmony_ci Temp tmp(id, program->temp_rc[id]); 1094bf215546Sopenharmony_ci PhysReg reg = assignments[id].reg; 1095bf215546Sopenharmony_ci for (unsigned i = 0; i < tmp.bytes(); i++) { 1096bf215546Sopenharmony_ci if (regs[reg.reg_b + i]) { 1097bf215546Sopenharmony_ci err |= ra_fail(program, loc, Location(), 1098bf215546Sopenharmony_ci "Assignment of element %d of %%%d already taken by %%%d in live-out", 1099bf215546Sopenharmony_ci i, id, regs[reg.reg_b + i]); 1100bf215546Sopenharmony_ci } 1101bf215546Sopenharmony_ci regs[reg.reg_b + i] = id; 1102bf215546Sopenharmony_ci } 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci regs.fill(0); 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) { 1107bf215546Sopenharmony_ci aco_ptr<Instruction>& instr = *it; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci /* check killed p_phi sgpr operands */ 1110bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_logical_end) { 1111bf215546Sopenharmony_ci for (Temp tmp : phi_sgpr_ops[block.index]) { 1112bf215546Sopenharmony_ci PhysReg reg = assignments[tmp.id()].reg; 1113bf215546Sopenharmony_ci for (unsigned i = 0; i < tmp.bytes(); i++) { 1114bf215546Sopenharmony_ci if (regs[reg.reg_b + i]) 1115bf215546Sopenharmony_ci err |= ra_fail( 1116bf215546Sopenharmony_ci program, loc, Location(), 1117bf215546Sopenharmony_ci "Assignment of element %d of %%%d already taken by %%%d in live-out", i, 1118bf215546Sopenharmony_ci tmp.id(), regs[reg.reg_b + i]); 1119bf215546Sopenharmony_ci } 1120bf215546Sopenharmony_ci live.insert(tmp.id()); 1121bf215546Sopenharmony_ci } 1122bf215546Sopenharmony_ci } 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) { 1125bf215546Sopenharmony_ci if (!def.isTemp()) 1126bf215546Sopenharmony_ci continue; 1127bf215546Sopenharmony_ci live.erase(def.tempId()); 1128bf215546Sopenharmony_ci } 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci /* don't count phi operands as live-in, since they are actually 1131bf215546Sopenharmony_ci * killed when they are copied at the predecessor */ 1132bf215546Sopenharmony_ci if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) { 1133bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 1134bf215546Sopenharmony_ci if (!op.isTemp()) 1135bf215546Sopenharmony_ci continue; 1136bf215546Sopenharmony_ci live.insert(op.tempId()); 1137bf215546Sopenharmony_ci } 1138bf215546Sopenharmony_ci } 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci for (unsigned id : live) { 1142bf215546Sopenharmony_ci Temp tmp(id, program->temp_rc[id]); 1143bf215546Sopenharmony_ci PhysReg reg = assignments[id].reg; 1144bf215546Sopenharmony_ci for (unsigned i = 0; i < tmp.bytes(); i++) 1145bf215546Sopenharmony_ci regs[reg.reg_b + i] = id; 1146bf215546Sopenharmony_ci } 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : block.instructions) { 1149bf215546Sopenharmony_ci loc.instr = instr.get(); 1150bf215546Sopenharmony_ci 1151bf215546Sopenharmony_ci /* remove killed p_phi operands from regs */ 1152bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_logical_end) { 1153bf215546Sopenharmony_ci for (Temp tmp : phi_sgpr_ops[block.index]) { 1154bf215546Sopenharmony_ci PhysReg reg = assignments[tmp.id()].reg; 1155bf215546Sopenharmony_ci for (unsigned i = 0; i < tmp.bytes(); i++) 1156bf215546Sopenharmony_ci regs[reg.reg_b + i] = 0; 1157bf215546Sopenharmony_ci } 1158bf215546Sopenharmony_ci } 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_ci if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) { 1161bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 1162bf215546Sopenharmony_ci if (!op.isTemp()) 1163bf215546Sopenharmony_ci continue; 1164bf215546Sopenharmony_ci if (op.isFirstKillBeforeDef()) { 1165bf215546Sopenharmony_ci for (unsigned j = 0; j < op.getTemp().bytes(); j++) 1166bf215546Sopenharmony_ci regs[op.physReg().reg_b + j] = 0; 1167bf215546Sopenharmony_ci } 1168bf215546Sopenharmony_ci } 1169bf215546Sopenharmony_ci } 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci if (!instr->isBranch() || block.linear_succs.size() != 1) 1172bf215546Sopenharmony_ci err |= validate_instr_defs(program, regs, assignments, loc, instr); 1173bf215546Sopenharmony_ci 1174bf215546Sopenharmony_ci if (!is_phi(instr)) { 1175bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 1176bf215546Sopenharmony_ci if (!op.isTemp()) 1177bf215546Sopenharmony_ci continue; 1178bf215546Sopenharmony_ci if (op.isLateKill() && op.isFirstKill()) { 1179bf215546Sopenharmony_ci for (unsigned j = 0; j < op.getTemp().bytes(); j++) 1180bf215546Sopenharmony_ci regs[op.physReg().reg_b + j] = 0; 1181bf215546Sopenharmony_ci } 1182bf215546Sopenharmony_ci } 1183bf215546Sopenharmony_ci } else if (block.linear_preds.size() != 1 || 1184bf215546Sopenharmony_ci program->blocks[block.linear_preds[0]].linear_succs.size() == 1) { 1185bf215546Sopenharmony_ci for (unsigned pred : block.linear_preds) { 1186bf215546Sopenharmony_ci aco_ptr<Instruction>& br = program->blocks[pred].instructions.back(); 1187bf215546Sopenharmony_ci assert(br->isBranch()); 1188bf215546Sopenharmony_ci err |= validate_instr_defs(program, regs, assignments, loc, br); 1189bf215546Sopenharmony_ci } 1190bf215546Sopenharmony_ci } 1191bf215546Sopenharmony_ci } 1192bf215546Sopenharmony_ci } 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ci return err; 1195bf215546Sopenharmony_ci} 1196bf215546Sopenharmony_ci} // namespace aco 1197