1/* 2 * Copyright (C) 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "util/ralloc.h" 25#include "ir3_ra.h" 26#include "ir3_shader.h" 27 28/* This file implements a validation pass for register allocation. We check 29 * that the assignment of SSA values to registers is "valid", in the sense 30 * that each original definition reaches all of its uses without being 31 * clobbered by something else. 32 * 33 * The validation is a forward dataflow analysis. The state at each point 34 * consists of, for each physical register, the SSA value occupying it, or a 35 * few special values: 36 * 37 * - "unknown" is set initially, before the dataflow analysis assigns it a 38 * value. This is the lattice bottom. 39 * - Values at the start get "undef", which acts like a special SSA value that 40 * indicates it is never written. 41 * - "overdefined" registers are set to more than one value, depending on 42 * which path you take to get to the spot. This is the lattice top. 43 * 44 * Overdefined is necessary to distinguish because in some programs, like this 45 * simple example, it's perfectly normal and allowed: 46 * 47 * if (...) { 48 * mov.u32u32 ssa_1(r1.x), ... 49 * ... 50 * } else { 51 * mov.u32u32 ssa_2(r1.x), ... 52 * ... 53 * } 54 * // r1.x is overdefined here! 55 * 56 * However, if an ssa value after the if is accidentally assigned to r1.x, we 57 * need to remember that it's invalid to catch the mistake. Overdef has to be 58 * distinguished from undef so that the state forms a valid lattice to 59 * guarantee that the analysis always terminates. We could avoid relying on 60 * overdef by using liveness analysis, but not relying on liveness has the 61 * benefit that we can catch bugs in liveness analysis too. 62 * 63 * One tricky thing we have to handle is the coalescing of splits/collects, 64 * which means that multiple SSA values can occupy a register at the same 65 * time. While we could use the same merge set indices that RA uses, again 66 * that would rely on the merge set calculation being correct which we don't 67 * want to. Instead we treat splits/collects as transfer instructions, similar 68 * to the parallelcopy instructions inserted by RA, and have them copy their 69 * sources to their destinations. This means that each physreg must carry the 70 * SSA def assigned to it plus an offset into that definition, and when 71 * validating sources we must look through splits/collects to find the 72 * "original" source for each subregister. 73 */ 74 75#define UNKNOWN ((struct ir3_register *)NULL) 76#define UNDEF ((struct ir3_register *)(uintptr_t)1) 77#define OVERDEF ((struct ir3_register *)(uintptr_t)2) 78 79struct reg_state { 80 struct ir3_register *def; 81 unsigned offset; 82}; 83 84struct file_state { 85 struct reg_state regs[RA_MAX_FILE_SIZE]; 86}; 87 88struct reaching_state { 89 struct file_state half, full, shared; 90}; 91 92struct ra_val_ctx { 93 struct ir3_instruction *current_instr; 94 95 struct reaching_state reaching; 96 struct reaching_state *block_reaching; 97 unsigned block_count; 98 99 unsigned full_size, half_size; 100 101 bool merged_regs; 102 103 bool failed; 104}; 105 106static void 107validate_error(struct ra_val_ctx *ctx, const char *condstr) 108{ 109 fprintf(stderr, "ra validation fail: %s\n", condstr); 110 fprintf(stderr, " -> for instruction: "); 111 ir3_print_instr(ctx->current_instr); 112 abort(); 113} 114 115#define validate_assert(ctx, cond) \ 116 do { \ 117 if (!(cond)) { \ 118 validate_error(ctx, #cond); \ 119 } \ 120 } while (0) 121 122static unsigned 123get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg) 124{ 125 if (reg->flags & IR3_REG_SHARED) 126 return RA_SHARED_SIZE; 127 else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) 128 return ctx->full_size; 129 else 130 return ctx->half_size; 131} 132 133/* Validate simple things, like the registers being in-bounds. This way we 134 * don't have to worry about out-of-bounds accesses later. 135 */ 136 137static void 138validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 139{ 140 ctx->current_instr = instr; 141 ra_foreach_dst (dst, instr) { 142 unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst); 143 validate_assert(ctx, dst_max <= get_file_size(ctx, dst)); 144 if (dst->tied) 145 validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied)); 146 } 147 148 ra_foreach_src (src, instr) { 149 unsigned src_max = ra_reg_get_physreg(src) + reg_size(src); 150 validate_assert(ctx, src_max <= get_file_size(ctx, src)); 151 } 152} 153 154/* This is the lattice operator. */ 155static bool 156merge_reg(struct reg_state *dst, const struct reg_state *src) 157{ 158 if (dst->def == UNKNOWN) { 159 *dst = *src; 160 return src->def != UNKNOWN; 161 } else if (dst->def == OVERDEF) { 162 return false; 163 } else { 164 if (src->def == UNKNOWN) 165 return false; 166 else if (src->def == OVERDEF) { 167 *dst = *src; 168 return true; 169 } else { 170 if (dst->def != src->def || dst->offset != src->offset) { 171 dst->def = OVERDEF; 172 dst->offset = 0; 173 return true; 174 } else { 175 return false; 176 } 177 } 178 } 179} 180 181static bool 182merge_file(struct file_state *dst, const struct file_state *src, unsigned size) 183{ 184 bool progress = false; 185 for (unsigned i = 0; i < size; i++) 186 progress |= merge_reg(&dst->regs[i], &src->regs[i]); 187 return progress; 188} 189 190static bool 191merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst, 192 const struct reaching_state *src) 193{ 194 bool progress = false; 195 progress |= merge_file(&dst->full, &src->full, ctx->full_size); 196 progress |= merge_file(&dst->half, &src->half, ctx->half_size); 197 return progress; 198} 199 200static bool 201merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst, 202 const struct reaching_state *src) 203{ 204 return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE); 205} 206 207static struct file_state * 208ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg) 209{ 210 if (reg->flags & IR3_REG_SHARED) 211 return &ctx->reaching.shared; 212 else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) 213 return &ctx->reaching.full; 214 else 215 return &ctx->reaching.half; 216} 217 218static void 219propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 220{ 221 ra_foreach_dst (dst, instr) { 222 struct file_state *file = ra_val_get_file(ctx, dst); 223 physreg_t physreg = ra_reg_get_physreg(dst); 224 for (unsigned i = 0; i < reg_size(dst); i++) { 225 file->regs[physreg + i] = (struct reg_state){ 226 .def = dst, 227 .offset = i, 228 }; 229 } 230 } 231} 232 233static void 234propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split) 235{ 236 struct ir3_register *dst = split->dsts[0]; 237 struct ir3_register *src = split->srcs[0]; 238 physreg_t dst_physreg = ra_reg_get_physreg(dst); 239 physreg_t src_physreg = ra_reg_get_physreg(src); 240 struct file_state *file = ra_val_get_file(ctx, dst); 241 242 unsigned offset = split->split.off * reg_elem_size(src); 243 for (unsigned i = 0; i < reg_elem_size(src); i++) { 244 file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i]; 245 } 246} 247 248static void 249propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect) 250{ 251 struct ir3_register *dst = collect->dsts[0]; 252 physreg_t dst_physreg = ra_reg_get_physreg(dst); 253 struct file_state *file = ra_val_get_file(ctx, dst); 254 255 unsigned size = reg_size(dst); 256 struct reg_state srcs[size]; 257 258 for (unsigned i = 0; i < collect->srcs_count; i++) { 259 struct ir3_register *src = collect->srcs[i]; 260 unsigned dst_offset = i * reg_elem_size(dst); 261 for (unsigned j = 0; j < reg_elem_size(dst); j++) { 262 if (!ra_reg_is_src(src)) { 263 srcs[dst_offset + j] = (struct reg_state){ 264 .def = dst, 265 .offset = dst_offset + j, 266 }; 267 } else { 268 physreg_t src_physreg = ra_reg_get_physreg(src); 269 srcs[dst_offset + j] = file->regs[src_physreg + j]; 270 } 271 } 272 } 273 274 for (unsigned i = 0; i < size; i++) 275 file->regs[dst_physreg + i] = srcs[i]; 276} 277 278static void 279propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy) 280{ 281 unsigned size = 0; 282 for (unsigned i = 0; i < pcopy->dsts_count; i++) { 283 size += reg_size(pcopy->srcs[i]); 284 } 285 286 struct reg_state srcs[size]; 287 288 unsigned offset = 0; 289 for (unsigned i = 0; i < pcopy->srcs_count; i++) { 290 struct ir3_register *dst = pcopy->dsts[i]; 291 struct ir3_register *src = pcopy->srcs[i]; 292 struct file_state *file = ra_val_get_file(ctx, dst); 293 294 for (unsigned j = 0; j < reg_size(dst); j++) { 295 if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) { 296 srcs[offset + j] = (struct reg_state){ 297 .def = dst, 298 .offset = j, 299 }; 300 } else { 301 physreg_t src_physreg = ra_reg_get_physreg(src); 302 srcs[offset + j] = file->regs[src_physreg + j]; 303 } 304 } 305 306 offset += reg_size(dst); 307 } 308 assert(offset == size); 309 310 offset = 0; 311 for (unsigned i = 0; i < pcopy->dsts_count; i++) { 312 struct ir3_register *dst = pcopy->dsts[i]; 313 physreg_t dst_physreg = ra_reg_get_physreg(dst); 314 struct file_state *file = ra_val_get_file(ctx, dst); 315 316 for (unsigned j = 0; j < reg_size(dst); j++) 317 file->regs[dst_physreg + j] = srcs[offset + j]; 318 319 offset += reg_size(dst); 320 } 321 assert(offset == size); 322} 323 324static void 325propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 326{ 327 if (instr->opc == OPC_META_SPLIT) 328 propagate_split(ctx, instr); 329 else if (instr->opc == OPC_META_COLLECT) 330 propagate_collect(ctx, instr); 331 else if (instr->opc == OPC_META_PARALLEL_COPY) 332 propagate_parallelcopy(ctx, instr); 333 else 334 propagate_normal_instr(ctx, instr); 335} 336 337static bool 338propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block) 339{ 340 ctx->reaching = ctx->block_reaching[block->index]; 341 342 foreach_instr (instr, &block->instr_list) { 343 propagate_instr(ctx, instr); 344 } 345 346 bool progress = false; 347 for (unsigned i = 0; i < 2; i++) { 348 struct ir3_block *succ = block->successors[i]; 349 if (!succ) 350 continue; 351 progress |= 352 merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching); 353 } 354 for (unsigned i = 0; i < 2; i++) { 355 struct ir3_block *succ = block->physical_successors[i]; 356 if (!succ) 357 continue; 358 progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index], 359 &ctx->reaching); 360 } 361 return progress; 362} 363 364static void 365chase_definition(struct reg_state *state) 366{ 367 while (true) { 368 struct ir3_instruction *instr = state->def->instr; 369 switch (instr->opc) { 370 case OPC_META_SPLIT: { 371 struct ir3_register *new_def = instr->srcs[0]->def; 372 unsigned offset = instr->split.off * reg_elem_size(new_def); 373 *state = (struct reg_state){ 374 .def = new_def, 375 .offset = state->offset + offset, 376 }; 377 break; 378 } 379 case OPC_META_COLLECT: { 380 unsigned src_idx = state->offset / reg_elem_size(state->def); 381 unsigned src_offset = state->offset % reg_elem_size(state->def); 382 struct ir3_register *new_def = instr->srcs[src_idx]->def; 383 if (new_def) { 384 *state = (struct reg_state){ 385 .def = new_def, 386 .offset = src_offset, 387 }; 388 } else { 389 /* Bail on immed/const */ 390 return; 391 } 392 break; 393 } 394 case OPC_META_PARALLEL_COPY: { 395 unsigned dst_idx = ~0; 396 for (unsigned i = 0; i < instr->dsts_count; i++) { 397 if (instr->dsts[i] == state->def) { 398 dst_idx = i; 399 break; 400 } 401 } 402 assert(dst_idx != ~0); 403 404 struct ir3_register *new_def = instr->srcs[dst_idx]->def; 405 if (new_def) { 406 state->def = new_def; 407 } else { 408 /* Bail on immed/const */ 409 return; 410 } 411 break; 412 } 413 default: 414 return; 415 } 416 } 417} 418 419static void 420dump_reg_state(struct reg_state *state) 421{ 422 if (state->def == UNDEF) { 423 fprintf(stderr, "no reaching definition"); 424 } else if (state->def == OVERDEF) { 425 fprintf(stderr, 426 "more than one reaching definition or partial definition"); 427 } else { 428 /* The analysis should always remove UNKNOWN eventually. */ 429 assert(state->def != UNKNOWN); 430 431 fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno, 432 state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "", 433 state->def->num / 4, "xyzw"[state->def->num % 4], 434 state -> offset); 435 } 436} 437 438static void 439check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr, 440 struct ir3_register *src) 441{ 442 struct file_state *file = ra_val_get_file(ctx, src); 443 physreg_t physreg = ra_reg_get_physreg(src); 444 for (unsigned i = 0; i < reg_size(src); i++) { 445 struct reg_state expected = (struct reg_state){ 446 .def = src->def, 447 .offset = i, 448 }; 449 chase_definition(&expected); 450 451 struct reg_state actual = file->regs[physreg + i]; 452 453 if (expected.def != actual.def || expected.offset != actual.offset) { 454 fprintf( 455 stderr, 456 "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n", 457 src->def->instr->serialno, src->def->name, i); 458 fprintf(stderr, "expected: "); 459 dump_reg_state(&expected); 460 fprintf(stderr, "\n"); 461 fprintf(stderr, "actual: "); 462 dump_reg_state(&actual); 463 fprintf(stderr, "\n"); 464 fprintf(stderr, "-> for instruction: "); 465 ir3_print_instr(instr); 466 ctx->failed = true; 467 } 468 } 469} 470 471static void 472check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 473{ 474 if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT || 475 instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) { 476 return; 477 } 478 479 ra_foreach_src (src, instr) { 480 check_reaching_src(ctx, instr, src); 481 } 482} 483 484static void 485check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block) 486{ 487 ctx->reaching = ctx->block_reaching[block->index]; 488 489 foreach_instr (instr, &block->instr_list) { 490 check_reaching_instr(ctx, instr); 491 propagate_instr(ctx, instr); 492 } 493 494 for (unsigned i = 0; i < 2; i++) { 495 struct ir3_block *succ = block->successors[i]; 496 if (!succ) 497 continue; 498 499 unsigned pred_idx = ir3_block_get_pred_index(succ, block); 500 foreach_instr (instr, &succ->instr_list) { 501 if (instr->opc != OPC_META_PHI) 502 break; 503 if (instr->srcs[pred_idx]->def) 504 check_reaching_src(ctx, instr, instr->srcs[pred_idx]); 505 } 506 } 507} 508 509static void 510check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir) 511{ 512 ctx->block_reaching = 513 rzalloc_array(ctx, struct reaching_state, ctx->block_count); 514 515 struct reaching_state *start = &ctx->block_reaching[0]; 516 for (unsigned i = 0; i < ctx->full_size; i++) 517 start->full.regs[i].def = UNDEF; 518 for (unsigned i = 0; i < ctx->half_size; i++) 519 start->half.regs[i].def = UNDEF; 520 for (unsigned i = 0; i < RA_SHARED_SIZE; i++) 521 start->shared.regs[i].def = UNDEF; 522 523 bool progress; 524 do { 525 progress = false; 526 foreach_block (block, &ir->block_list) { 527 progress |= propagate_block(ctx, block); 528 } 529 } while (progress); 530 531 foreach_block (block, &ir->block_list) { 532 check_reaching_block(ctx, block); 533 } 534 535 if (ctx->failed) { 536 fprintf(stderr, "failing shader:\n"); 537 ir3_print(ir); 538 abort(); 539 } 540} 541 542void 543ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, 544 unsigned half_size, unsigned block_count) 545{ 546#ifdef NDEBUG 547#define VALIDATE 0 548#else 549#define VALIDATE 1 550#endif 551 552 if (!VALIDATE) 553 return; 554 555 struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx); 556 ctx->merged_regs = v->mergedregs; 557 ctx->full_size = full_size; 558 ctx->half_size = half_size; 559 ctx->block_count = block_count; 560 561 foreach_block (block, &v->ir->block_list) { 562 foreach_instr (instr, &block->instr_list) { 563 validate_simple(ctx, instr); 564 } 565 } 566 567 check_reaching_defs(ctx, v->ir); 568 569 ralloc_free(ctx); 570} 571