1/* 2 * Copyright © 2018 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include "aco_ir.h" 26 27#include "util/memstream.h" 28 29#include <array> 30#include <map> 31#include <set> 32#include <vector> 33 34namespace aco { 35 36static void 37aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, 38 const char* file, unsigned line, const char* fmt, va_list args) 39{ 40 char* msg; 41 42 if (program->debug.shorten_messages) { 43 msg = ralloc_vasprintf(NULL, fmt, args); 44 } else { 45 msg = ralloc_strdup(NULL, prefix); 46 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line); 47 ralloc_asprintf_append(&msg, " "); 48 ralloc_vasprintf_append(&msg, fmt, args); 49 } 50 51 if (program->debug.func) 52 program->debug.func(program->debug.private_data, level, msg); 53 54 fprintf(program->debug.output, "%s\n", msg); 55 56 ralloc_free(msg); 57} 58 59void 60_aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...) 61{ 62 va_list args; 63 64 va_start(args, fmt); 65 aco_log(program, ACO_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args); 66 va_end(args); 67} 68 69void 70_aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...) 71{ 72 va_list args; 73 74 va_start(args, fmt); 75 aco_log(program, ACO_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args); 76 va_end(args); 77} 78 79bool 80validate_ir(Program* program) 81{ 82 bool is_valid = true; 83 auto check = [&program, &is_valid](bool success, const char* msg, 84 aco::Instruction* instr) -> void 85 { 86 if (!success) { 87 char* out; 88 size_t outsize; 89 struct u_memstream mem; 90 u_memstream_open(&mem, &out, &outsize); 91 FILE* const memf = u_memstream_get(&mem); 92 93 fprintf(memf, "%s: ", msg); 94 aco_print_instr(instr, memf); 95 u_memstream_close(&mem); 96 97 aco_err(program, "%s", out); 98 free(out); 99 100 is_valid = false; 101 } 102 }; 103 104 auto check_block = [&program, &is_valid](bool success, const char* msg, 105 aco::Block* block) -> void 106 { 107 if (!success) { 108 aco_err(program, "%s: BB%u", msg, block->index); 109 is_valid = false; 110 } 111 }; 112 113 for (Block& block : program->blocks) { 114 for (aco_ptr<Instruction>& instr : block.instructions) { 115 116 /* check base format */ 117 Format base_format = instr->format; 118 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA); 119 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16); 120 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8); 121 if ((uint32_t)base_format & (uint32_t)Format::VOP1) 122 base_format = Format::VOP1; 123 else if ((uint32_t)base_format & (uint32_t)Format::VOP2) 124 base_format = Format::VOP2; 125 else if ((uint32_t)base_format & (uint32_t)Format::VOPC) 126 base_format = Format::VOPC; 127 else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) { 128 if (instr->opcode == aco_opcode::v_interp_p1ll_f16 || 129 instr->opcode == aco_opcode::v_interp_p1lv_f16 || 130 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || 131 instr->opcode == aco_opcode::v_interp_p2_f16) { 132 /* v_interp_*_fp16 are considered VINTRP by the compiler but 133 * they are emitted as VOP3. 134 */ 135 base_format = Format::VOP3; 136 } else { 137 base_format = Format::VINTRP; 138 } 139 } 140 check(base_format == instr_info.format[(int)instr->opcode], 141 "Wrong base format for instruction", instr.get()); 142 143 /* check VOP3 modifiers */ 144 if (instr->isVOP3() && instr->format != Format::VOP3) { 145 check(base_format == Format::VOP2 || base_format == Format::VOP1 || 146 base_format == Format::VOPC || base_format == Format::VINTRP, 147 "Format cannot have VOP3/VOP3B applied", instr.get()); 148 } 149 150 /* check SDWA */ 151 if (instr->isSDWA()) { 152 check(base_format == Format::VOP2 || base_format == Format::VOP1 || 153 base_format == Format::VOPC, 154 "Format cannot have SDWA applied", instr.get()); 155 156 check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get()); 157 check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get()); 158 159 SDWA_instruction& sdwa = instr->sdwa(); 160 check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+", 161 instr.get()); 162 if (base_format == Format::VOPC) { 163 check(sdwa.clamp == false || program->gfx_level == GFX8, 164 "SDWA VOPC clamp only supported on GFX8", instr.get()); 165 check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) || 166 program->gfx_level >= GFX9, 167 "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get()); 168 } else { 169 const Definition& def = instr->definitions[0]; 170 check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes", 171 instr.get()); 172 check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(), 173 "SDWA definition selection size must be at most definition size", instr.get()); 174 check( 175 sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4, 176 "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get()); 177 check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset", 178 instr.get()); 179 check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(), 180 "SDWA dst_sel size must be definition size for subdword definitions", 181 instr.get()); 182 check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0, 183 "SDWA dst_sel offset must be 0 for subdword definitions", instr.get()); 184 } 185 186 for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) { 187 const Operand& op = instr->operands[i]; 188 check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get()); 189 check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(), 190 "SDWA operand selection size must be at most operand size", instr.get()); 191 check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4, 192 "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get()); 193 check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset", 194 instr.get()); 195 } 196 if (instr->operands.size() >= 3) { 197 check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc, 198 "3rd operand must be fixed to vcc with SDWA", instr.get()); 199 } 200 if (instr->definitions.size() >= 2) { 201 check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc, 202 "2nd definition must be fixed to vcc with SDWA", instr.get()); 203 } 204 205 const bool sdwa_opcodes = 206 instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 && 207 instr->opcode != aco_opcode::v_fmamk_f32 && 208 instr->opcode != aco_opcode::v_fmaak_f32 && 209 instr->opcode != aco_opcode::v_fmamk_f16 && 210 instr->opcode != aco_opcode::v_fmaak_f16 && 211 instr->opcode != aco_opcode::v_madmk_f32 && 212 instr->opcode != aco_opcode::v_madak_f32 && 213 instr->opcode != aco_opcode::v_madmk_f16 && 214 instr->opcode != aco_opcode::v_madak_f16 && 215 instr->opcode != aco_opcode::v_readfirstlane_b32 && 216 instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32; 217 218 const bool feature_mac = 219 program->gfx_level == GFX8 && 220 (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16); 221 222 check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get()); 223 } 224 225 /* check opsel */ 226 if (instr->isVOP3()) { 227 VOP3_instruction& vop3 = instr->vop3(); 228 check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+", 229 instr.get()); 230 231 for (unsigned i = 0; i < 3; i++) { 232 if (i >= instr->operands.size() || 233 (instr->operands[i].hasRegClass() && 234 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())) 235 check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get()); 236 } 237 if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed()) 238 check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", 239 instr.get()); 240 } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 || 241 instr->opcode == aco_opcode::v_fma_mixhi_f16 || 242 instr->opcode == aco_opcode::v_fma_mix_f32) { 243 check(instr->definitions[0].regClass() == 244 (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b), 245 "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get()); 246 } else if (instr->isVOP3P()) { 247 VOP3P_instruction& vop3p = instr->vop3p(); 248 for (unsigned i = 0; i < instr->operands.size(); i++) { 249 if (instr->operands[i].hasRegClass() && 250 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()) 251 check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0, 252 "Unexpected opsel for subdword operand", instr.get()); 253 } 254 check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition", 255 instr.get()); 256 } 257 258 /* check for undefs */ 259 for (unsigned i = 0; i < instr->operands.size(); i++) { 260 if (instr->operands[i].isUndefined()) { 261 bool flat = instr->isFlatLike(); 262 bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() || 263 instr->opcode == aco_opcode::p_create_vector || 264 instr->opcode == aco_opcode::p_jump_to_epilog || 265 (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) || 266 ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) || 267 (instr->isScratch() && i == 0); 268 check(can_be_undef, "Undefs can only be used in certain operands", instr.get()); 269 } else { 270 check(instr->operands[i].isFixed() || instr->operands[i].isTemp() || 271 instr->operands[i].isConstant(), 272 "Uninitialized Operand", instr.get()); 273 } 274 } 275 276 /* check subdword definitions */ 277 for (unsigned i = 0; i < instr->definitions.size(); i++) { 278 if (instr->definitions[i].regClass().is_subdword()) 279 check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(), 280 "Only Pseudo and VMEM instructions can write subdword registers > 4 bytes", 281 instr.get()); 282 } 283 284 if (instr->isSALU() || instr->isVALU()) { 285 /* check literals */ 286 Operand literal(s1); 287 for (unsigned i = 0; i < instr->operands.size(); i++) { 288 Operand op = instr->operands[i]; 289 if (!op.isLiteral()) 290 continue; 291 292 check(!instr->isDPP() && !instr->isSDWA() && 293 (!instr->isVOP3() || program->gfx_level >= GFX10) && 294 (!instr->isVOP3P() || program->gfx_level >= GFX10), 295 "Literal applied on wrong instruction format", instr.get()); 296 297 check(literal.isUndefined() || (literal.size() == op.size() && 298 literal.constantValue() == op.constantValue()), 299 "Only 1 Literal allowed", instr.get()); 300 literal = op; 301 check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2, 302 "Wrong source position for Literal argument", instr.get()); 303 } 304 305 /* check num sgprs for VALU */ 306 if (instr->isVALU()) { 307 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 || 308 instr->opcode == aco_opcode::v_lshrrev_b64 || 309 instr->opcode == aco_opcode::v_ashrrev_i64; 310 unsigned const_bus_limit = 1; 311 if (program->gfx_level >= GFX10 && !is_shift64) 312 const_bus_limit = 2; 313 314 uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5; 315 if (instr->isSDWA()) 316 scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4; 317 else if (instr->isDPP()) 318 scalar_mask = 0x4; 319 320 if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 || 321 instr->opcode == aco_opcode::v_readlane_b32 || 322 instr->opcode == aco_opcode::v_readlane_b32_e64) { 323 check(instr->definitions[0].getTemp().type() == RegType::sgpr, 324 "Wrong Definition type for VALU instruction", instr.get()); 325 } else { 326 check(instr->definitions[0].getTemp().type() == RegType::vgpr, 327 "Wrong Definition type for VALU instruction", instr.get()); 328 } 329 330 unsigned num_sgprs = 0; 331 unsigned sgpr[] = {0, 0}; 332 for (unsigned i = 0; i < instr->operands.size(); i++) { 333 Operand op = instr->operands[i]; 334 if (instr->opcode == aco_opcode::v_readfirstlane_b32 || 335 instr->opcode == aco_opcode::v_readlane_b32 || 336 instr->opcode == aco_opcode::v_readlane_b32_e64) { 337 check(i != 1 || (op.isTemp() && op.regClass().type() == RegType::sgpr) || 338 op.isConstant(), 339 "Must be a SGPR or a constant", instr.get()); 340 check(i == 1 || (op.isTemp() && op.regClass().type() == RegType::vgpr && 341 op.bytes() <= 4), 342 "Wrong Operand type for VALU instruction", instr.get()); 343 continue; 344 } 345 if (instr->opcode == aco_opcode::v_permlane16_b32 || 346 instr->opcode == aco_opcode::v_permlanex16_b32) { 347 check(i != 0 || (op.isTemp() && op.regClass().type() == RegType::vgpr), 348 "Operand 0 of v_permlane must be VGPR", instr.get()); 349 check(i == 0 || (op.isTemp() && op.regClass().type() == RegType::sgpr) || 350 op.isConstant(), 351 "Lane select operands of v_permlane must be SGPR or constant", 352 instr.get()); 353 } 354 355 if (instr->opcode == aco_opcode::v_writelane_b32 || 356 instr->opcode == aco_opcode::v_writelane_b32_e64) { 357 check(i != 2 || (op.isTemp() && op.regClass().type() == RegType::vgpr && 358 op.bytes() <= 4), 359 "Wrong Operand type for VALU instruction", instr.get()); 360 check(i == 2 || (op.isTemp() && op.regClass().type() == RegType::sgpr) || 361 op.isConstant(), 362 "Must be a SGPR or a constant", instr.get()); 363 continue; 364 } 365 if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) { 366 check(scalar_mask & (1 << i), "Wrong source position for SGPR argument", 367 instr.get()); 368 369 if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) { 370 if (num_sgprs < 2) 371 sgpr[num_sgprs++] = op.tempId(); 372 } 373 } 374 375 if (op.isConstant() && !op.isLiteral()) 376 check(scalar_mask & (1 << i), "Wrong source position for constant argument", 377 instr.get()); 378 } 379 check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit, 380 "Too many SGPRs/literals", instr.get()); 381 } 382 383 if (instr->isSOP1() || instr->isSOP2()) { 384 if (!instr->definitions.empty()) 385 check(instr->definitions[0].getTemp().type() == RegType::sgpr, 386 "Wrong Definition type for SALU instruction", instr.get()); 387 for (const Operand& op : instr->operands) { 388 check(op.isConstant() || op.regClass().type() <= RegType::sgpr, 389 "Wrong Operand type for SALU instruction", instr.get()); 390 } 391 } 392 } 393 394 switch (instr->format) { 395 case Format::PSEUDO: { 396 if (instr->opcode == aco_opcode::p_create_vector) { 397 unsigned size = 0; 398 for (const Operand& op : instr->operands) { 399 check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get()); 400 size += op.bytes(); 401 } 402 check(size == instr->definitions[0].bytes(), 403 "Definition size does not match operand sizes", instr.get()); 404 if (instr->definitions[0].getTemp().type() == RegType::sgpr) { 405 for (const Operand& op : instr->operands) { 406 check(op.isConstant() || op.regClass().type() == RegType::sgpr, 407 "Wrong Operand type for scalar vector", instr.get()); 408 } 409 } 410 } else if (instr->opcode == aco_opcode::p_extract_vector) { 411 check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(), 412 "Wrong Operand types", instr.get()); 413 check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <= 414 instr->operands[0].bytes(), 415 "Index out of range", instr.get()); 416 check(instr->definitions[0].getTemp().type() == RegType::vgpr || 417 instr->operands[0].regClass().type() == RegType::sgpr, 418 "Cannot extract SGPR value from VGPR vector", instr.get()); 419 check(program->gfx_level >= GFX9 || 420 !instr->definitions[0].regClass().is_subdword() || 421 instr->operands[0].regClass().type() == RegType::vgpr, 422 "Cannot extract subdword from SGPR before GFX9+", instr.get()); 423 } else if (instr->opcode == aco_opcode::p_split_vector) { 424 check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get()); 425 unsigned size = 0; 426 for (const Definition& def : instr->definitions) { 427 size += def.bytes(); 428 } 429 check(size == instr->operands[0].bytes(), 430 "Operand size does not match definition sizes", instr.get()); 431 if (instr->operands[0].getTemp().type() == RegType::vgpr) { 432 for (const Definition& def : instr->definitions) 433 check(def.regClass().type() == RegType::vgpr, 434 "Wrong Definition type for VGPR split_vector", instr.get()); 435 } else { 436 for (const Definition& def : instr->definitions) 437 check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(), 438 "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get()); 439 } 440 } else if (instr->opcode == aco_opcode::p_parallelcopy) { 441 check(instr->definitions.size() == instr->operands.size(), 442 "Number of Operands does not match number of Definitions", instr.get()); 443 for (unsigned i = 0; i < instr->operands.size(); i++) { 444 check(instr->definitions[i].bytes() == instr->operands[i].bytes(), 445 "Operand and Definition size must match", instr.get()); 446 if (instr->operands[i].isTemp()) { 447 check((instr->definitions[i].getTemp().type() == 448 instr->operands[i].regClass().type()) || 449 (instr->definitions[i].getTemp().type() == RegType::vgpr && 450 instr->operands[i].regClass().type() == RegType::sgpr), 451 "Operand and Definition types do not match", instr.get()); 452 check(instr->definitions[i].regClass().is_linear_vgpr() == 453 instr->operands[i].regClass().is_linear_vgpr(), 454 "Operand and Definition types do not match", instr.get()); 455 } else { 456 check(!instr->definitions[i].regClass().is_linear_vgpr(), 457 "Can only copy linear VGPRs into linear VGPRs, not constant/undef", 458 instr.get()); 459 } 460 } 461 } else if (instr->opcode == aco_opcode::p_phi) { 462 check(instr->operands.size() == block.logical_preds.size(), 463 "Number of Operands does not match number of predecessors", instr.get()); 464 check(instr->definitions[0].getTemp().type() == RegType::vgpr, 465 "Logical Phi Definition must be vgpr", instr.get()); 466 for (const Operand& op : instr->operands) 467 check(instr->definitions[0].size() == op.size(), 468 "Operand sizes must match Definition size", instr.get()); 469 } else if (instr->opcode == aco_opcode::p_linear_phi) { 470 for (const Operand& op : instr->operands) { 471 check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type", 472 instr.get()); 473 check(instr->definitions[0].size() == op.size(), 474 "Operand sizes must match Definition size", instr.get()); 475 } 476 check(instr->operands.size() == block.linear_preds.size(), 477 "Number of Operands does not match number of predecessors", instr.get()); 478 } else if (instr->opcode == aco_opcode::p_extract || 479 instr->opcode == aco_opcode::p_insert) { 480 check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get()); 481 check(instr->operands[1].isConstant(), "Index must be constant", instr.get()); 482 if (instr->opcode == aco_opcode::p_extract) 483 check(instr->operands[3].isConstant(), "Sign-extend flag must be constant", 484 instr.get()); 485 486 check(instr->definitions[0].getTemp().type() != RegType::sgpr || 487 instr->operands[0].getTemp().type() == RegType::sgpr, 488 "Can't extract/insert VGPR to SGPR", instr.get()); 489 490 if (instr->opcode == aco_opcode::p_insert) 491 check(instr->operands[0].bytes() == instr->definitions[0].bytes(), 492 "Sizes of p_insert data operand and definition must match", instr.get()); 493 494 if (instr->definitions[0].getTemp().type() == RegType::sgpr) 495 check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() && 496 instr->definitions[1].physReg() == scc, 497 "SGPR extract/insert needs an SCC definition", instr.get()); 498 499 unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u; 500 unsigned op_bits = instr->operands[2].constantValue(); 501 502 if (instr->opcode == aco_opcode::p_insert) { 503 check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get()); 504 check(op_bits < data_bits, "Size must be smaller than source", instr.get()); 505 } else if (instr->opcode == aco_opcode::p_extract) { 506 check(op_bits == 8 || op_bits == 16 || op_bits == 32, 507 "Size must be 8 or 16 or 32", instr.get()); 508 check(data_bits >= op_bits, "Can't extract more bits than what the data has.", 509 instr.get()); 510 } 511 512 unsigned comp = data_bits / MAX2(op_bits, 1); 513 check(instr->operands[1].constantValue() < comp, "Index must be in-bounds", 514 instr.get()); 515 } else if (instr->opcode == aco_opcode::p_jump_to_epilog) { 516 check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions", 517 instr.get()); 518 check(instr->operands.size() > 0 && 519 instr->operands[0].getTemp().type() == RegType::sgpr && 520 instr->operands[0].getTemp().size() == 2, 521 "First operand of p_jump_to_epilog must be a SGPR", instr.get()); 522 for (unsigned i = 1; i < instr->operands.size(); i++) { 523 check(instr->operands[i].getTemp().type() == RegType::vgpr || 524 instr->operands[i].isUndefined(), 525 "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get()); 526 } 527 } 528 break; 529 } 530 case Format::PSEUDO_REDUCTION: { 531 for (const Operand& op : instr->operands) 532 check(op.regClass().type() == RegType::vgpr, 533 "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.", 534 instr.get()); 535 536 if (instr->opcode == aco_opcode::p_reduce && 537 instr->reduction().cluster_size == program->wave_size) 538 check(instr->definitions[0].regClass().type() == RegType::sgpr || 539 program->wave_size == 32, 540 "The result of unclustered reductions must go into an SGPR.", instr.get()); 541 else 542 check(instr->definitions[0].regClass().type() == RegType::vgpr, 543 "The result of scans and clustered reductions must go into a VGPR.", 544 instr.get()); 545 546 break; 547 } 548 case Format::SMEM: { 549 if (instr->operands.size() >= 1) 550 check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) || 551 (instr->operands[0].isTemp() && 552 instr->operands[0].regClass().type() == RegType::sgpr), 553 "SMEM operands must be sgpr", instr.get()); 554 if (instr->operands.size() >= 2) 555 check(instr->operands[1].isConstant() || 556 (instr->operands[1].isTemp() && 557 instr->operands[1].regClass().type() == RegType::sgpr), 558 "SMEM offset must be constant or sgpr", instr.get()); 559 if (!instr->definitions.empty()) 560 check(instr->definitions[0].getTemp().type() == RegType::sgpr, 561 "SMEM result must be sgpr", instr.get()); 562 break; 563 } 564 case Format::MTBUF: 565 case Format::MUBUF: { 566 check(instr->operands.size() > 1, "VMEM instructions must have at least one operand", 567 instr.get()); 568 check(instr->operands[1].hasRegClass() && 569 instr->operands[1].regClass().type() == RegType::vgpr, 570 "VADDR must be in vgpr for VMEM instructions", instr.get()); 571 check( 572 instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr, 573 "VMEM resource constant must be sgpr", instr.get()); 574 check(instr->operands.size() < 4 || 575 (instr->operands[3].isTemp() && 576 instr->operands[3].regClass().type() == RegType::vgpr), 577 "VMEM write data must be vgpr", instr.get()); 578 579 const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables 580 instr->opcode == aco_opcode::buffer_load_ubyte || 581 instr->opcode == aco_opcode::buffer_load_sbyte || 582 instr->opcode == aco_opcode::buffer_load_ushort || 583 instr->opcode == aco_opcode::buffer_load_sshort || 584 instr->opcode == aco_opcode::buffer_load_ubyte_d16 || 585 instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi || 586 instr->opcode == aco_opcode::buffer_load_sbyte_d16 || 587 instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi || 588 instr->opcode == aco_opcode::buffer_load_short_d16 || 589 instr->opcode == aco_opcode::buffer_load_short_d16_hi || 590 instr->opcode == aco_opcode::buffer_load_format_d16_x || 591 instr->opcode == aco_opcode::buffer_load_format_d16_hi_x || 592 instr->opcode == aco_opcode::buffer_load_format_d16_xy || 593 instr->opcode == aco_opcode::buffer_load_format_d16_xyz || 594 instr->opcode == aco_opcode::buffer_load_format_d16_xyzw || 595 instr->opcode == aco_opcode::tbuffer_load_format_d16_x || 596 instr->opcode == aco_opcode::tbuffer_load_format_d16_xy || 597 instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz || 598 instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw; 599 if (instr->definitions.size()) { 600 check(instr->definitions[0].isTemp() && 601 instr->definitions[0].regClass().type() == RegType::vgpr, 602 "VMEM definitions[0] (VDATA) must be VGPR", instr.get()); 603 check(d16 || !instr->definitions[0].regClass().is_subdword(), 604 "Only D16 opcodes can load subdword values.", instr.get()); 605 check(instr->definitions[0].bytes() <= 8 || !d16, 606 "D16 opcodes can only load up to 8 bytes.", instr.get()); 607 } 608 break; 609 } 610 case Format::MIMG: { 611 check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands", 612 instr.get()); 613 check(instr->operands[0].hasRegClass() && 614 (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8), 615 "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get()); 616 if (instr->operands[1].hasRegClass()) 617 check(instr->operands[1].regClass() == s4, 618 "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get()); 619 if (!instr->operands[2].isUndefined()) { 620 bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap || 621 instr->opcode == aco_opcode::image_atomic_fcmpswap; 622 check(instr->definitions.empty() || 623 (instr->definitions[0].regClass() == instr->operands[2].regClass() || 624 is_cmpswap), 625 "MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and " 626 "TFE/LWE loads", 627 instr.get()); 628 } 629 check(instr->operands.size() == 4 || program->gfx_level >= GFX10, 630 "NSA is only supported on GFX10+", instr.get()); 631 for (unsigned i = 3; i < instr->operands.size(); i++) { 632 if (instr->operands.size() == 4) { 633 check(instr->operands[i].hasRegClass() && 634 instr->operands[i].regClass().type() == RegType::vgpr, 635 "MIMG operands[3] (VADDR) must be VGPR", instr.get()); 636 } else { 637 check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used", 638 instr.get()); 639 } 640 } 641 642 if (instr->definitions.size()) { 643 check(instr->definitions[0].isTemp() && 644 instr->definitions[0].regClass().type() == RegType::vgpr, 645 "MIMG definitions[0] (VDATA) must be VGPR", instr.get()); 646 check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(), 647 "Only D16 MIMG instructions can load subdword values.", instr.get()); 648 check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16, 649 "D16 MIMG instructions can only load up to 8 bytes.", instr.get()); 650 } 651 break; 652 } 653 case Format::DS: { 654 for (const Operand& op : instr->operands) { 655 check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0, 656 "Only VGPRs are valid DS instruction operands", instr.get()); 657 } 658 if (!instr->definitions.empty()) 659 check(instr->definitions[0].getTemp().type() == RegType::vgpr, 660 "DS instruction must return VGPR", instr.get()); 661 break; 662 } 663 case Format::EXP: { 664 for (unsigned i = 0; i < 4; i++) 665 check(instr->operands[i].hasRegClass() && 666 instr->operands[i].regClass().type() == RegType::vgpr, 667 "Only VGPRs are valid Export arguments", instr.get()); 668 break; 669 } 670 case Format::FLAT: 671 check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR", 672 instr.get()); 673 FALLTHROUGH; 674 case Format::GLOBAL: 675 check( 676 instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr, 677 "FLAT/GLOBAL address must be vgpr", instr.get()); 678 FALLTHROUGH; 679 case Format::SCRATCH: { 680 check(instr->operands[0].hasRegClass() && 681 instr->operands[0].regClass().type() == RegType::vgpr, 682 "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get()); 683 check(instr->operands[1].hasRegClass() && 684 instr->operands[1].regClass().type() == RegType::sgpr, 685 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get()); 686 if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3) 687 check(instr->operands[0].isTemp() || instr->operands[1].isTemp(), 688 "SCRATCH must have either SADDR or ADDR operand", instr.get()); 689 if (!instr->definitions.empty()) 690 check(instr->definitions[0].getTemp().type() == RegType::vgpr, 691 "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get()); 692 else 693 check(instr->operands[2].regClass().type() == RegType::vgpr, 694 "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get()); 695 break; 696 } 697 default: break; 698 } 699 } 700 } 701 702 /* validate CFG */ 703 for (unsigned i = 0; i < program->blocks.size(); i++) { 704 Block& block = program->blocks[i]; 705 check_block(block.index == i, "block.index must match actual index", &block); 706 707 /* predecessors/successors should be sorted */ 708 for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++) 709 check_block(block.linear_preds[j] < block.linear_preds[j + 1], 710 "linear predecessors must be sorted", &block); 711 for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++) 712 check_block(block.logical_preds[j] < block.logical_preds[j + 1], 713 "logical predecessors must be sorted", &block); 714 for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++) 715 check_block(block.linear_succs[j] < block.linear_succs[j + 1], 716 "linear successors must be sorted", &block); 717 for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++) 718 check_block(block.logical_succs[j] < block.logical_succs[j + 1], 719 "logical successors must be sorted", &block); 720 721 /* critical edges are not allowed */ 722 if (block.linear_preds.size() > 1) { 723 for (unsigned pred : block.linear_preds) 724 check_block(program->blocks[pred].linear_succs.size() == 1, 725 "linear critical edges are not allowed", &program->blocks[pred]); 726 for (unsigned pred : block.logical_preds) 727 check_block(program->blocks[pred].logical_succs.size() == 1, 728 "logical critical edges are not allowed", &program->blocks[pred]); 729 } 730 } 731 732 return is_valid; 733} 734 735/* RA validation */ 736namespace { 737 738struct Location { 739 Location() : block(NULL), instr(NULL) {} 740 741 Block* block; 742 Instruction* instr; // NULL if it's the block's live-in 743}; 744 745struct Assignment { 746 Location defloc; 747 Location firstloc; 748 PhysReg reg; 749 bool valid; 750}; 751 752bool 753ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...) 754{ 755 va_list args; 756 va_start(args, fmt); 757 char msg[1024]; 758 vsprintf(msg, fmt, args); 759 va_end(args); 760 761 char* out; 762 size_t outsize; 763 struct u_memstream mem; 764 u_memstream_open(&mem, &out, &outsize); 765 FILE* const memf = u_memstream_get(&mem); 766 767 fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index); 768 if (loc.instr) { 769 aco_print_instr(loc.instr, memf); 770 fprintf(memf, "\n%s", msg); 771 } else { 772 fprintf(memf, "%s", msg); 773 } 774 if (loc2.block) { 775 fprintf(memf, " in BB%d:\n", loc2.block->index); 776 aco_print_instr(loc2.instr, memf); 777 } 778 fprintf(memf, "\n\n"); 779 u_memstream_close(&mem); 780 781 aco_err(program, "%s", out); 782 free(out); 783 784 return true; 785} 786 787bool 788validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, 789 unsigned index) 790{ 791 Operand op = instr->operands[index]; 792 unsigned byte = op.physReg().byte(); 793 794 if (instr->opcode == aco_opcode::p_as_uniform) 795 return byte == 0; 796 if (instr->isPseudo() && gfx_level >= GFX8) 797 return true; 798 if (instr->isSDWA()) 799 return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 && 800 byte % instr->sdwa().sel[index].size() == 0; 801 if (instr->isVOP3P()) { 802 bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 || 803 instr->opcode == aco_opcode::v_fma_mixhi_f16 || 804 instr->opcode == aco_opcode::v_fma_mix_f32; 805 return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) && 806 ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1)); 807 } 808 if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index)) 809 return true; 810 811 switch (instr->opcode) { 812 case aco_opcode::v_cvt_f32_ubyte1: 813 if (byte == 1) 814 return true; 815 break; 816 case aco_opcode::v_cvt_f32_ubyte2: 817 if (byte == 2) 818 return true; 819 break; 820 case aco_opcode::v_cvt_f32_ubyte3: 821 if (byte == 3) 822 return true; 823 break; 824 case aco_opcode::ds_write_b8_d16_hi: 825 case aco_opcode::ds_write_b16_d16_hi: 826 if (byte == 2 && index == 1) 827 return true; 828 break; 829 case aco_opcode::buffer_store_byte_d16_hi: 830 case aco_opcode::buffer_store_short_d16_hi: 831 case aco_opcode::buffer_store_format_d16_hi_x: 832 if (byte == 2 && index == 3) 833 return true; 834 break; 835 case aco_opcode::flat_store_byte_d16_hi: 836 case aco_opcode::flat_store_short_d16_hi: 837 case aco_opcode::scratch_store_byte_d16_hi: 838 case aco_opcode::scratch_store_short_d16_hi: 839 case aco_opcode::global_store_byte_d16_hi: 840 case aco_opcode::global_store_short_d16_hi: 841 if (byte == 2 && index == 2) 842 return true; 843 break; 844 default: break; 845 } 846 847 return byte == 0; 848} 849 850bool 851validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr) 852{ 853 Definition def = instr->definitions[0]; 854 unsigned byte = def.physReg().byte(); 855 856 if (instr->isPseudo() && gfx_level >= GFX8) 857 return true; 858 if (instr->isSDWA()) 859 return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 && 860 byte % instr->sdwa().dst_sel.size() == 0; 861 if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1)) 862 return true; 863 864 switch (instr->opcode) { 865 case aco_opcode::v_fma_mixhi_f16: 866 case aco_opcode::buffer_load_ubyte_d16_hi: 867 case aco_opcode::buffer_load_sbyte_d16_hi: 868 case aco_opcode::buffer_load_short_d16_hi: 869 case aco_opcode::buffer_load_format_d16_hi_x: 870 case aco_opcode::flat_load_ubyte_d16_hi: 871 case aco_opcode::flat_load_short_d16_hi: 872 case aco_opcode::scratch_load_ubyte_d16_hi: 873 case aco_opcode::scratch_load_short_d16_hi: 874 case aco_opcode::global_load_ubyte_d16_hi: 875 case aco_opcode::global_load_short_d16_hi: 876 case aco_opcode::ds_read_u8_d16_hi: 877 case aco_opcode::ds_read_u16_d16_hi: return byte == 2; 878 default: break; 879 } 880 881 return byte == 0; 882} 883 884unsigned 885get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index) 886{ 887 amd_gfx_level gfx_level = program->gfx_level; 888 Definition def = instr->definitions[index]; 889 890 if (instr->isPseudo()) 891 return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u; 892 if (instr->isVALU()) { 893 assert(def.bytes() <= 2); 894 if (instr->isSDWA()) 895 return instr->sdwa().dst_sel.size(); 896 897 if (instr_is_16bit(gfx_level, instr->opcode)) 898 return 2; 899 900 return 4; 901 } 902 903 if (instr->isMIMG()) { 904 assert(instr->mimg().d16); 905 return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes(); 906 } 907 908 switch (instr->opcode) { 909 case aco_opcode::buffer_load_ubyte_d16: 910 case aco_opcode::buffer_load_sbyte_d16: 911 case aco_opcode::buffer_load_short_d16: 912 case aco_opcode::buffer_load_format_d16_x: 913 case aco_opcode::tbuffer_load_format_d16_x: 914 case aco_opcode::flat_load_ubyte_d16: 915 case aco_opcode::flat_load_short_d16: 916 case aco_opcode::scratch_load_ubyte_d16: 917 case aco_opcode::scratch_load_short_d16: 918 case aco_opcode::global_load_ubyte_d16: 919 case aco_opcode::global_load_short_d16: 920 case aco_opcode::ds_read_u8_d16: 921 case aco_opcode::ds_read_u16_d16: 922 case aco_opcode::buffer_load_ubyte_d16_hi: 923 case aco_opcode::buffer_load_sbyte_d16_hi: 924 case aco_opcode::buffer_load_short_d16_hi: 925 case aco_opcode::buffer_load_format_d16_hi_x: 926 case aco_opcode::flat_load_ubyte_d16_hi: 927 case aco_opcode::flat_load_short_d16_hi: 928 case aco_opcode::scratch_load_ubyte_d16_hi: 929 case aco_opcode::scratch_load_short_d16_hi: 930 case aco_opcode::global_load_ubyte_d16_hi: 931 case aco_opcode::global_load_short_d16_hi: 932 case aco_opcode::ds_read_u8_d16_hi: 933 case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2; 934 case aco_opcode::buffer_load_format_d16_xyz: 935 case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6; 936 default: return def.size() * 4; 937 } 938} 939 940bool 941validate_instr_defs(Program* program, std::array<unsigned, 2048>& regs, 942 const std::vector<Assignment>& assignments, const Location& loc, 943 aco_ptr<Instruction>& instr) 944{ 945 bool err = false; 946 947 for (unsigned i = 0; i < instr->definitions.size(); i++) { 948 Definition& def = instr->definitions[i]; 949 if (!def.isTemp()) 950 continue; 951 Temp tmp = def.getTemp(); 952 PhysReg reg = assignments[tmp.id()].reg; 953 for (unsigned j = 0; j < tmp.bytes(); j++) { 954 if (regs[reg.reg_b + j]) 955 err |= 956 ra_fail(program, loc, assignments[regs[reg.reg_b + j]].defloc, 957 "Assignment of element %d of %%%d already taken by %%%d from instruction", i, 958 tmp.id(), regs[reg.reg_b + j]); 959 regs[reg.reg_b + j] = tmp.id(); 960 } 961 if (def.regClass().is_subdword() && def.bytes() < 4) { 962 unsigned written = get_subdword_bytes_written(program, instr, i); 963 /* If written=4, the instruction still might write the upper half. In that case, it's 964 * the lower half that isn't preserved */ 965 for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) { 966 unsigned written_reg = reg.reg() * 4u + j; 967 if (regs[written_reg] && regs[written_reg] != def.tempId()) 968 err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc, 969 "Assignment of element %d of %%%d overwrites the full register " 970 "taken by %%%d from instruction", 971 i, tmp.id(), regs[written_reg]); 972 } 973 } 974 } 975 976 for (const Definition& def : instr->definitions) { 977 if (!def.isTemp()) 978 continue; 979 if (def.isKill()) { 980 for (unsigned j = 0; j < def.getTemp().bytes(); j++) 981 regs[def.physReg().reg_b + j] = 0; 982 } 983 } 984 985 return err; 986} 987 988} /* end namespace */ 989 990bool 991validate_ra(Program* program) 992{ 993 if (!(debug_flags & DEBUG_VALIDATE_RA)) 994 return false; 995 996 bool err = false; 997 aco::live live_vars = aco::live_var_analysis(program); 998 std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size()); 999 uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves); 1000 1001 std::vector<Assignment> assignments(program->peekAllocationId()); 1002 for (Block& block : program->blocks) { 1003 Location loc; 1004 loc.block = █ 1005 for (aco_ptr<Instruction>& instr : block.instructions) { 1006 if (instr->opcode == aco_opcode::p_phi) { 1007 for (unsigned i = 0; i < instr->operands.size(); i++) { 1008 if (instr->operands[i].isTemp() && 1009 instr->operands[i].getTemp().type() == RegType::sgpr && 1010 instr->operands[i].isFirstKill()) 1011 phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp()); 1012 } 1013 } 1014 1015 loc.instr = instr.get(); 1016 for (unsigned i = 0; i < instr->operands.size(); i++) { 1017 Operand& op = instr->operands[i]; 1018 if (!op.isTemp()) 1019 continue; 1020 if (!op.isFixed()) 1021 err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i); 1022 if (assignments[op.tempId()].valid && assignments[op.tempId()].reg != op.physReg()) 1023 err |= 1024 ra_fail(program, loc, assignments[op.tempId()].firstloc, 1025 "Operand %d has an inconsistent register assignment with instruction", i); 1026 if ((op.getTemp().type() == RegType::vgpr && 1027 op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) || 1028 (op.getTemp().type() == RegType::sgpr && 1029 op.physReg() + op.size() > program->config->num_sgprs && 1030 op.physReg() < sgpr_limit)) 1031 err |= ra_fail(program, loc, assignments[op.tempId()].firstloc, 1032 "Operand %d has an out-of-bounds register assignment", i); 1033 if (op.physReg() == vcc && !program->needs_vcc) 1034 err |= ra_fail(program, loc, Location(), 1035 "Operand %d fixed to vcc but needs_vcc=false", i); 1036 if (op.regClass().is_subdword() && 1037 !validate_subdword_operand(program->gfx_level, instr, i)) 1038 err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i); 1039 if (!assignments[op.tempId()].firstloc.block) 1040 assignments[op.tempId()].firstloc = loc; 1041 if (!assignments[op.tempId()].defloc.block) { 1042 assignments[op.tempId()].reg = op.physReg(); 1043 assignments[op.tempId()].valid = true; 1044 } 1045 } 1046 1047 for (unsigned i = 0; i < instr->definitions.size(); i++) { 1048 Definition& def = instr->definitions[i]; 1049 if (!def.isTemp()) 1050 continue; 1051 if (!def.isFixed()) 1052 err |= 1053 ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i); 1054 if (assignments[def.tempId()].defloc.block) 1055 err |= ra_fail(program, loc, assignments[def.tempId()].defloc, 1056 "Temporary %%%d also defined by instruction", def.tempId()); 1057 if ((def.getTemp().type() == RegType::vgpr && 1058 def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) || 1059 (def.getTemp().type() == RegType::sgpr && 1060 def.physReg() + def.size() > program->config->num_sgprs && 1061 def.physReg() < sgpr_limit)) 1062 err |= ra_fail(program, loc, assignments[def.tempId()].firstloc, 1063 "Definition %d has an out-of-bounds register assignment", i); 1064 if (def.physReg() == vcc && !program->needs_vcc) 1065 err |= ra_fail(program, loc, Location(), 1066 "Definition %d fixed to vcc but needs_vcc=false", i); 1067 if (def.regClass().is_subdword() && 1068 !validate_subdword_definition(program->gfx_level, instr)) 1069 err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i); 1070 if (!assignments[def.tempId()].firstloc.block) 1071 assignments[def.tempId()].firstloc = loc; 1072 assignments[def.tempId()].defloc = loc; 1073 assignments[def.tempId()].reg = def.physReg(); 1074 assignments[def.tempId()].valid = true; 1075 } 1076 } 1077 } 1078 1079 for (Block& block : program->blocks) { 1080 Location loc; 1081 loc.block = █ 1082 1083 std::array<unsigned, 2048> regs; /* register file in bytes */ 1084 regs.fill(0); 1085 1086 IDSet live = live_vars.live_out[block.index]; 1087 /* remove killed p_phi sgpr operands */ 1088 for (Temp tmp : phi_sgpr_ops[block.index]) 1089 live.erase(tmp.id()); 1090 1091 /* check live out */ 1092 for (unsigned id : live) { 1093 Temp tmp(id, program->temp_rc[id]); 1094 PhysReg reg = assignments[id].reg; 1095 for (unsigned i = 0; i < tmp.bytes(); i++) { 1096 if (regs[reg.reg_b + i]) { 1097 err |= ra_fail(program, loc, Location(), 1098 "Assignment of element %d of %%%d already taken by %%%d in live-out", 1099 i, id, regs[reg.reg_b + i]); 1100 } 1101 regs[reg.reg_b + i] = id; 1102 } 1103 } 1104 regs.fill(0); 1105 1106 for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) { 1107 aco_ptr<Instruction>& instr = *it; 1108 1109 /* check killed p_phi sgpr operands */ 1110 if (instr->opcode == aco_opcode::p_logical_end) { 1111 for (Temp tmp : phi_sgpr_ops[block.index]) { 1112 PhysReg reg = assignments[tmp.id()].reg; 1113 for (unsigned i = 0; i < tmp.bytes(); i++) { 1114 if (regs[reg.reg_b + i]) 1115 err |= ra_fail( 1116 program, loc, Location(), 1117 "Assignment of element %d of %%%d already taken by %%%d in live-out", i, 1118 tmp.id(), regs[reg.reg_b + i]); 1119 } 1120 live.insert(tmp.id()); 1121 } 1122 } 1123 1124 for (const Definition& def : instr->definitions) { 1125 if (!def.isTemp()) 1126 continue; 1127 live.erase(def.tempId()); 1128 } 1129 1130 /* don't count phi operands as live-in, since they are actually 1131 * killed when they are copied at the predecessor */ 1132 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) { 1133 for (const Operand& op : instr->operands) { 1134 if (!op.isTemp()) 1135 continue; 1136 live.insert(op.tempId()); 1137 } 1138 } 1139 } 1140 1141 for (unsigned id : live) { 1142 Temp tmp(id, program->temp_rc[id]); 1143 PhysReg reg = assignments[id].reg; 1144 for (unsigned i = 0; i < tmp.bytes(); i++) 1145 regs[reg.reg_b + i] = id; 1146 } 1147 1148 for (aco_ptr<Instruction>& instr : block.instructions) { 1149 loc.instr = instr.get(); 1150 1151 /* remove killed p_phi operands from regs */ 1152 if (instr->opcode == aco_opcode::p_logical_end) { 1153 for (Temp tmp : phi_sgpr_ops[block.index]) { 1154 PhysReg reg = assignments[tmp.id()].reg; 1155 for (unsigned i = 0; i < tmp.bytes(); i++) 1156 regs[reg.reg_b + i] = 0; 1157 } 1158 } 1159 1160 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) { 1161 for (const Operand& op : instr->operands) { 1162 if (!op.isTemp()) 1163 continue; 1164 if (op.isFirstKillBeforeDef()) { 1165 for (unsigned j = 0; j < op.getTemp().bytes(); j++) 1166 regs[op.physReg().reg_b + j] = 0; 1167 } 1168 } 1169 } 1170 1171 if (!instr->isBranch() || block.linear_succs.size() != 1) 1172 err |= validate_instr_defs(program, regs, assignments, loc, instr); 1173 1174 if (!is_phi(instr)) { 1175 for (const Operand& op : instr->operands) { 1176 if (!op.isTemp()) 1177 continue; 1178 if (op.isLateKill() && op.isFirstKill()) { 1179 for (unsigned j = 0; j < op.getTemp().bytes(); j++) 1180 regs[op.physReg().reg_b + j] = 0; 1181 } 1182 } 1183 } else if (block.linear_preds.size() != 1 || 1184 program->blocks[block.linear_preds[0]].linear_succs.size() == 1) { 1185 for (unsigned pred : block.linear_preds) { 1186 aco_ptr<Instruction>& br = program->blocks[pred].instructions.back(); 1187 assert(br->isBranch()); 1188 err |= validate_instr_defs(program, regs, assignments, loc, br); 1189 } 1190 } 1191 } 1192 } 1193 1194 return err; 1195} 1196} // namespace aco 1197