1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "util/ralloc.h" 25bf215546Sopenharmony_ci#include "ir3_ra.h" 26bf215546Sopenharmony_ci#include "ir3_shader.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* This file implements a validation pass for register allocation. We check 29bf215546Sopenharmony_ci * that the assignment of SSA values to registers is "valid", in the sense 30bf215546Sopenharmony_ci * that each original definition reaches all of its uses without being 31bf215546Sopenharmony_ci * clobbered by something else. 32bf215546Sopenharmony_ci * 33bf215546Sopenharmony_ci * The validation is a forward dataflow analysis. The state at each point 34bf215546Sopenharmony_ci * consists of, for each physical register, the SSA value occupying it, or a 35bf215546Sopenharmony_ci * few special values: 36bf215546Sopenharmony_ci * 37bf215546Sopenharmony_ci * - "unknown" is set initially, before the dataflow analysis assigns it a 38bf215546Sopenharmony_ci * value. This is the lattice bottom. 39bf215546Sopenharmony_ci * - Values at the start get "undef", which acts like a special SSA value that 40bf215546Sopenharmony_ci * indicates it is never written. 41bf215546Sopenharmony_ci * - "overdefined" registers are set to more than one value, depending on 42bf215546Sopenharmony_ci * which path you take to get to the spot. This is the lattice top. 43bf215546Sopenharmony_ci * 44bf215546Sopenharmony_ci * Overdefined is necessary to distinguish because in some programs, like this 45bf215546Sopenharmony_ci * simple example, it's perfectly normal and allowed: 46bf215546Sopenharmony_ci * 47bf215546Sopenharmony_ci * if (...) { 48bf215546Sopenharmony_ci * mov.u32u32 ssa_1(r1.x), ... 49bf215546Sopenharmony_ci * ... 50bf215546Sopenharmony_ci * } else { 51bf215546Sopenharmony_ci * mov.u32u32 ssa_2(r1.x), ... 52bf215546Sopenharmony_ci * ... 53bf215546Sopenharmony_ci * } 54bf215546Sopenharmony_ci * // r1.x is overdefined here! 55bf215546Sopenharmony_ci * 56bf215546Sopenharmony_ci * However, if an ssa value after the if is accidentally assigned to r1.x, we 57bf215546Sopenharmony_ci * need to remember that it's invalid to catch the mistake. Overdef has to be 58bf215546Sopenharmony_ci * distinguished from undef so that the state forms a valid lattice to 59bf215546Sopenharmony_ci * guarantee that the analysis always terminates. We could avoid relying on 60bf215546Sopenharmony_ci * overdef by using liveness analysis, but not relying on liveness has the 61bf215546Sopenharmony_ci * benefit that we can catch bugs in liveness analysis too. 62bf215546Sopenharmony_ci * 63bf215546Sopenharmony_ci * One tricky thing we have to handle is the coalescing of splits/collects, 64bf215546Sopenharmony_ci * which means that multiple SSA values can occupy a register at the same 65bf215546Sopenharmony_ci * time. While we could use the same merge set indices that RA uses, again 66bf215546Sopenharmony_ci * that would rely on the merge set calculation being correct which we don't 67bf215546Sopenharmony_ci * want to. Instead we treat splits/collects as transfer instructions, similar 68bf215546Sopenharmony_ci * to the parallelcopy instructions inserted by RA, and have them copy their 69bf215546Sopenharmony_ci * sources to their destinations. This means that each physreg must carry the 70bf215546Sopenharmony_ci * SSA def assigned to it plus an offset into that definition, and when 71bf215546Sopenharmony_ci * validating sources we must look through splits/collects to find the 72bf215546Sopenharmony_ci * "original" source for each subregister. 73bf215546Sopenharmony_ci */ 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci#define UNKNOWN ((struct ir3_register *)NULL) 76bf215546Sopenharmony_ci#define UNDEF ((struct ir3_register *)(uintptr_t)1) 77bf215546Sopenharmony_ci#define OVERDEF ((struct ir3_register *)(uintptr_t)2) 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_cistruct reg_state { 80bf215546Sopenharmony_ci struct ir3_register *def; 81bf215546Sopenharmony_ci unsigned offset; 82bf215546Sopenharmony_ci}; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_cistruct file_state { 85bf215546Sopenharmony_ci struct reg_state regs[RA_MAX_FILE_SIZE]; 86bf215546Sopenharmony_ci}; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_cistruct reaching_state { 89bf215546Sopenharmony_ci struct file_state half, full, shared; 90bf215546Sopenharmony_ci}; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_cistruct ra_val_ctx { 93bf215546Sopenharmony_ci struct ir3_instruction *current_instr; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci struct reaching_state reaching; 96bf215546Sopenharmony_ci struct reaching_state *block_reaching; 97bf215546Sopenharmony_ci unsigned block_count; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci unsigned full_size, half_size; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci bool merged_regs; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci bool failed; 104bf215546Sopenharmony_ci}; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_cistatic void 107bf215546Sopenharmony_civalidate_error(struct ra_val_ctx *ctx, const char *condstr) 108bf215546Sopenharmony_ci{ 109bf215546Sopenharmony_ci fprintf(stderr, "ra validation fail: %s\n", condstr); 110bf215546Sopenharmony_ci fprintf(stderr, " -> for instruction: "); 111bf215546Sopenharmony_ci ir3_print_instr(ctx->current_instr); 112bf215546Sopenharmony_ci abort(); 113bf215546Sopenharmony_ci} 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci#define validate_assert(ctx, cond) \ 116bf215546Sopenharmony_ci do { \ 117bf215546Sopenharmony_ci if (!(cond)) { \ 118bf215546Sopenharmony_ci validate_error(ctx, #cond); \ 119bf215546Sopenharmony_ci } \ 120bf215546Sopenharmony_ci } while (0) 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_cistatic unsigned 123bf215546Sopenharmony_ciget_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg) 124bf215546Sopenharmony_ci{ 125bf215546Sopenharmony_ci if (reg->flags & IR3_REG_SHARED) 126bf215546Sopenharmony_ci return RA_SHARED_SIZE; 127bf215546Sopenharmony_ci else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) 128bf215546Sopenharmony_ci return ctx->full_size; 129bf215546Sopenharmony_ci else 130bf215546Sopenharmony_ci return ctx->half_size; 131bf215546Sopenharmony_ci} 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci/* Validate simple things, like the registers being in-bounds. This way we 134bf215546Sopenharmony_ci * don't have to worry about out-of-bounds accesses later. 135bf215546Sopenharmony_ci */ 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_cistatic void 138bf215546Sopenharmony_civalidate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 139bf215546Sopenharmony_ci{ 140bf215546Sopenharmony_ci ctx->current_instr = instr; 141bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 142bf215546Sopenharmony_ci unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst); 143bf215546Sopenharmony_ci validate_assert(ctx, dst_max <= get_file_size(ctx, dst)); 144bf215546Sopenharmony_ci if (dst->tied) 145bf215546Sopenharmony_ci validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied)); 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 149bf215546Sopenharmony_ci unsigned src_max = ra_reg_get_physreg(src) + reg_size(src); 150bf215546Sopenharmony_ci validate_assert(ctx, src_max <= get_file_size(ctx, src)); 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci} 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci/* This is the lattice operator. */ 155bf215546Sopenharmony_cistatic bool 156bf215546Sopenharmony_cimerge_reg(struct reg_state *dst, const struct reg_state *src) 157bf215546Sopenharmony_ci{ 158bf215546Sopenharmony_ci if (dst->def == UNKNOWN) { 159bf215546Sopenharmony_ci *dst = *src; 160bf215546Sopenharmony_ci return src->def != UNKNOWN; 161bf215546Sopenharmony_ci } else if (dst->def == OVERDEF) { 162bf215546Sopenharmony_ci return false; 163bf215546Sopenharmony_ci } else { 164bf215546Sopenharmony_ci if (src->def == UNKNOWN) 165bf215546Sopenharmony_ci return false; 166bf215546Sopenharmony_ci else if (src->def == OVERDEF) { 167bf215546Sopenharmony_ci *dst = *src; 168bf215546Sopenharmony_ci return true; 169bf215546Sopenharmony_ci } else { 170bf215546Sopenharmony_ci if (dst->def != src->def || dst->offset != src->offset) { 171bf215546Sopenharmony_ci dst->def = OVERDEF; 172bf215546Sopenharmony_ci dst->offset = 0; 173bf215546Sopenharmony_ci return true; 174bf215546Sopenharmony_ci } else { 175bf215546Sopenharmony_ci return false; 176bf215546Sopenharmony_ci } 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci } 179bf215546Sopenharmony_ci} 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_cistatic bool 182bf215546Sopenharmony_cimerge_file(struct file_state *dst, const struct file_state *src, unsigned size) 183bf215546Sopenharmony_ci{ 184bf215546Sopenharmony_ci bool progress = false; 185bf215546Sopenharmony_ci for (unsigned i = 0; i < size; i++) 186bf215546Sopenharmony_ci progress |= merge_reg(&dst->regs[i], &src->regs[i]); 187bf215546Sopenharmony_ci return progress; 188bf215546Sopenharmony_ci} 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_cistatic bool 191bf215546Sopenharmony_cimerge_state(struct ra_val_ctx *ctx, struct reaching_state *dst, 192bf215546Sopenharmony_ci const struct reaching_state *src) 193bf215546Sopenharmony_ci{ 194bf215546Sopenharmony_ci bool progress = false; 195bf215546Sopenharmony_ci progress |= merge_file(&dst->full, &src->full, ctx->full_size); 196bf215546Sopenharmony_ci progress |= merge_file(&dst->half, &src->half, ctx->half_size); 197bf215546Sopenharmony_ci return progress; 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_cistatic bool 201bf215546Sopenharmony_cimerge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst, 202bf215546Sopenharmony_ci const struct reaching_state *src) 203bf215546Sopenharmony_ci{ 204bf215546Sopenharmony_ci return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE); 205bf215546Sopenharmony_ci} 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_cistatic struct file_state * 208bf215546Sopenharmony_cira_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg) 209bf215546Sopenharmony_ci{ 210bf215546Sopenharmony_ci if (reg->flags & IR3_REG_SHARED) 211bf215546Sopenharmony_ci return &ctx->reaching.shared; 212bf215546Sopenharmony_ci else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) 213bf215546Sopenharmony_ci return &ctx->reaching.full; 214bf215546Sopenharmony_ci else 215bf215546Sopenharmony_ci return &ctx->reaching.half; 216bf215546Sopenharmony_ci} 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_cistatic void 219bf215546Sopenharmony_cipropagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 220bf215546Sopenharmony_ci{ 221bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 222bf215546Sopenharmony_ci struct file_state *file = ra_val_get_file(ctx, dst); 223bf215546Sopenharmony_ci physreg_t physreg = ra_reg_get_physreg(dst); 224bf215546Sopenharmony_ci for (unsigned i = 0; i < reg_size(dst); i++) { 225bf215546Sopenharmony_ci file->regs[physreg + i] = (struct reg_state){ 226bf215546Sopenharmony_ci .def = dst, 227bf215546Sopenharmony_ci .offset = i, 228bf215546Sopenharmony_ci }; 229bf215546Sopenharmony_ci } 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci} 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_cistatic void 234bf215546Sopenharmony_cipropagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split) 235bf215546Sopenharmony_ci{ 236bf215546Sopenharmony_ci struct ir3_register *dst = split->dsts[0]; 237bf215546Sopenharmony_ci struct ir3_register *src = split->srcs[0]; 238bf215546Sopenharmony_ci physreg_t dst_physreg = ra_reg_get_physreg(dst); 239bf215546Sopenharmony_ci physreg_t src_physreg = ra_reg_get_physreg(src); 240bf215546Sopenharmony_ci struct file_state *file = ra_val_get_file(ctx, dst); 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci unsigned offset = split->split.off * reg_elem_size(src); 243bf215546Sopenharmony_ci for (unsigned i = 0; i < reg_elem_size(src); i++) { 244bf215546Sopenharmony_ci file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i]; 245bf215546Sopenharmony_ci } 246bf215546Sopenharmony_ci} 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_cistatic void 249bf215546Sopenharmony_cipropagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect) 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci struct ir3_register *dst = collect->dsts[0]; 252bf215546Sopenharmony_ci physreg_t dst_physreg = ra_reg_get_physreg(dst); 253bf215546Sopenharmony_ci struct file_state *file = ra_val_get_file(ctx, dst); 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci unsigned size = reg_size(dst); 256bf215546Sopenharmony_ci struct reg_state srcs[size]; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci for (unsigned i = 0; i < collect->srcs_count; i++) { 259bf215546Sopenharmony_ci struct ir3_register *src = collect->srcs[i]; 260bf215546Sopenharmony_ci unsigned dst_offset = i * reg_elem_size(dst); 261bf215546Sopenharmony_ci for (unsigned j = 0; j < reg_elem_size(dst); j++) { 262bf215546Sopenharmony_ci if (!ra_reg_is_src(src)) { 263bf215546Sopenharmony_ci srcs[dst_offset + j] = (struct reg_state){ 264bf215546Sopenharmony_ci .def = dst, 265bf215546Sopenharmony_ci .offset = dst_offset + j, 266bf215546Sopenharmony_ci }; 267bf215546Sopenharmony_ci } else { 268bf215546Sopenharmony_ci physreg_t src_physreg = ra_reg_get_physreg(src); 269bf215546Sopenharmony_ci srcs[dst_offset + j] = file->regs[src_physreg + j]; 270bf215546Sopenharmony_ci } 271bf215546Sopenharmony_ci } 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci for (unsigned i = 0; i < size; i++) 275bf215546Sopenharmony_ci file->regs[dst_physreg + i] = srcs[i]; 276bf215546Sopenharmony_ci} 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_cistatic void 279bf215546Sopenharmony_cipropagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy) 280bf215546Sopenharmony_ci{ 281bf215546Sopenharmony_ci unsigned size = 0; 282bf215546Sopenharmony_ci for (unsigned i = 0; i < pcopy->dsts_count; i++) { 283bf215546Sopenharmony_ci size += reg_size(pcopy->srcs[i]); 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci struct reg_state srcs[size]; 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci unsigned offset = 0; 289bf215546Sopenharmony_ci for (unsigned i = 0; i < pcopy->srcs_count; i++) { 290bf215546Sopenharmony_ci struct ir3_register *dst = pcopy->dsts[i]; 291bf215546Sopenharmony_ci struct ir3_register *src = pcopy->srcs[i]; 292bf215546Sopenharmony_ci struct file_state *file = ra_val_get_file(ctx, dst); 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci for (unsigned j = 0; j < reg_size(dst); j++) { 295bf215546Sopenharmony_ci if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) { 296bf215546Sopenharmony_ci srcs[offset + j] = (struct reg_state){ 297bf215546Sopenharmony_ci .def = dst, 298bf215546Sopenharmony_ci .offset = j, 299bf215546Sopenharmony_ci }; 300bf215546Sopenharmony_ci } else { 301bf215546Sopenharmony_ci physreg_t src_physreg = ra_reg_get_physreg(src); 302bf215546Sopenharmony_ci srcs[offset + j] = file->regs[src_physreg + j]; 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci } 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci offset += reg_size(dst); 307bf215546Sopenharmony_ci } 308bf215546Sopenharmony_ci assert(offset == size); 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci offset = 0; 311bf215546Sopenharmony_ci for (unsigned i = 0; i < pcopy->dsts_count; i++) { 312bf215546Sopenharmony_ci struct ir3_register *dst = pcopy->dsts[i]; 313bf215546Sopenharmony_ci physreg_t dst_physreg = ra_reg_get_physreg(dst); 314bf215546Sopenharmony_ci struct file_state *file = ra_val_get_file(ctx, dst); 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci for (unsigned j = 0; j < reg_size(dst); j++) 317bf215546Sopenharmony_ci file->regs[dst_physreg + j] = srcs[offset + j]; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci offset += reg_size(dst); 320bf215546Sopenharmony_ci } 321bf215546Sopenharmony_ci assert(offset == size); 322bf215546Sopenharmony_ci} 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_cistatic void 325bf215546Sopenharmony_cipropagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 326bf215546Sopenharmony_ci{ 327bf215546Sopenharmony_ci if (instr->opc == OPC_META_SPLIT) 328bf215546Sopenharmony_ci propagate_split(ctx, instr); 329bf215546Sopenharmony_ci else if (instr->opc == OPC_META_COLLECT) 330bf215546Sopenharmony_ci propagate_collect(ctx, instr); 331bf215546Sopenharmony_ci else if (instr->opc == OPC_META_PARALLEL_COPY) 332bf215546Sopenharmony_ci propagate_parallelcopy(ctx, instr); 333bf215546Sopenharmony_ci else 334bf215546Sopenharmony_ci propagate_normal_instr(ctx, instr); 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cistatic bool 338bf215546Sopenharmony_cipropagate_block(struct ra_val_ctx *ctx, struct ir3_block *block) 339bf215546Sopenharmony_ci{ 340bf215546Sopenharmony_ci ctx->reaching = ctx->block_reaching[block->index]; 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 343bf215546Sopenharmony_ci propagate_instr(ctx, instr); 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci bool progress = false; 347bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 348bf215546Sopenharmony_ci struct ir3_block *succ = block->successors[i]; 349bf215546Sopenharmony_ci if (!succ) 350bf215546Sopenharmony_ci continue; 351bf215546Sopenharmony_ci progress |= 352bf215546Sopenharmony_ci merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching); 353bf215546Sopenharmony_ci } 354bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 355bf215546Sopenharmony_ci struct ir3_block *succ = block->physical_successors[i]; 356bf215546Sopenharmony_ci if (!succ) 357bf215546Sopenharmony_ci continue; 358bf215546Sopenharmony_ci progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index], 359bf215546Sopenharmony_ci &ctx->reaching); 360bf215546Sopenharmony_ci } 361bf215546Sopenharmony_ci return progress; 362bf215546Sopenharmony_ci} 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_cistatic void 365bf215546Sopenharmony_cichase_definition(struct reg_state *state) 366bf215546Sopenharmony_ci{ 367bf215546Sopenharmony_ci while (true) { 368bf215546Sopenharmony_ci struct ir3_instruction *instr = state->def->instr; 369bf215546Sopenharmony_ci switch (instr->opc) { 370bf215546Sopenharmony_ci case OPC_META_SPLIT: { 371bf215546Sopenharmony_ci struct ir3_register *new_def = instr->srcs[0]->def; 372bf215546Sopenharmony_ci unsigned offset = instr->split.off * reg_elem_size(new_def); 373bf215546Sopenharmony_ci *state = (struct reg_state){ 374bf215546Sopenharmony_ci .def = new_def, 375bf215546Sopenharmony_ci .offset = state->offset + offset, 376bf215546Sopenharmony_ci }; 377bf215546Sopenharmony_ci break; 378bf215546Sopenharmony_ci } 379bf215546Sopenharmony_ci case OPC_META_COLLECT: { 380bf215546Sopenharmony_ci unsigned src_idx = state->offset / reg_elem_size(state->def); 381bf215546Sopenharmony_ci unsigned src_offset = state->offset % reg_elem_size(state->def); 382bf215546Sopenharmony_ci struct ir3_register *new_def = instr->srcs[src_idx]->def; 383bf215546Sopenharmony_ci if (new_def) { 384bf215546Sopenharmony_ci *state = (struct reg_state){ 385bf215546Sopenharmony_ci .def = new_def, 386bf215546Sopenharmony_ci .offset = src_offset, 387bf215546Sopenharmony_ci }; 388bf215546Sopenharmony_ci } else { 389bf215546Sopenharmony_ci /* Bail on immed/const */ 390bf215546Sopenharmony_ci return; 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci break; 393bf215546Sopenharmony_ci } 394bf215546Sopenharmony_ci case OPC_META_PARALLEL_COPY: { 395bf215546Sopenharmony_ci unsigned dst_idx = ~0; 396bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->dsts_count; i++) { 397bf215546Sopenharmony_ci if (instr->dsts[i] == state->def) { 398bf215546Sopenharmony_ci dst_idx = i; 399bf215546Sopenharmony_ci break; 400bf215546Sopenharmony_ci } 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci assert(dst_idx != ~0); 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci struct ir3_register *new_def = instr->srcs[dst_idx]->def; 405bf215546Sopenharmony_ci if (new_def) { 406bf215546Sopenharmony_ci state->def = new_def; 407bf215546Sopenharmony_ci } else { 408bf215546Sopenharmony_ci /* Bail on immed/const */ 409bf215546Sopenharmony_ci return; 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci break; 412bf215546Sopenharmony_ci } 413bf215546Sopenharmony_ci default: 414bf215546Sopenharmony_ci return; 415bf215546Sopenharmony_ci } 416bf215546Sopenharmony_ci } 417bf215546Sopenharmony_ci} 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_cistatic void 420bf215546Sopenharmony_cidump_reg_state(struct reg_state *state) 421bf215546Sopenharmony_ci{ 422bf215546Sopenharmony_ci if (state->def == UNDEF) { 423bf215546Sopenharmony_ci fprintf(stderr, "no reaching definition"); 424bf215546Sopenharmony_ci } else if (state->def == OVERDEF) { 425bf215546Sopenharmony_ci fprintf(stderr, 426bf215546Sopenharmony_ci "more than one reaching definition or partial definition"); 427bf215546Sopenharmony_ci } else { 428bf215546Sopenharmony_ci /* The analysis should always remove UNKNOWN eventually. */ 429bf215546Sopenharmony_ci assert(state->def != UNKNOWN); 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno, 432bf215546Sopenharmony_ci state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "", 433bf215546Sopenharmony_ci state->def->num / 4, "xyzw"[state->def->num % 4], 434bf215546Sopenharmony_ci state -> offset); 435bf215546Sopenharmony_ci } 436bf215546Sopenharmony_ci} 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_cistatic void 439bf215546Sopenharmony_cicheck_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr, 440bf215546Sopenharmony_ci struct ir3_register *src) 441bf215546Sopenharmony_ci{ 442bf215546Sopenharmony_ci struct file_state *file = ra_val_get_file(ctx, src); 443bf215546Sopenharmony_ci physreg_t physreg = ra_reg_get_physreg(src); 444bf215546Sopenharmony_ci for (unsigned i = 0; i < reg_size(src); i++) { 445bf215546Sopenharmony_ci struct reg_state expected = (struct reg_state){ 446bf215546Sopenharmony_ci .def = src->def, 447bf215546Sopenharmony_ci .offset = i, 448bf215546Sopenharmony_ci }; 449bf215546Sopenharmony_ci chase_definition(&expected); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci struct reg_state actual = file->regs[physreg + i]; 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci if (expected.def != actual.def || expected.offset != actual.offset) { 454bf215546Sopenharmony_ci fprintf( 455bf215546Sopenharmony_ci stderr, 456bf215546Sopenharmony_ci "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n", 457bf215546Sopenharmony_ci src->def->instr->serialno, src->def->name, i); 458bf215546Sopenharmony_ci fprintf(stderr, "expected: "); 459bf215546Sopenharmony_ci dump_reg_state(&expected); 460bf215546Sopenharmony_ci fprintf(stderr, "\n"); 461bf215546Sopenharmony_ci fprintf(stderr, "actual: "); 462bf215546Sopenharmony_ci dump_reg_state(&actual); 463bf215546Sopenharmony_ci fprintf(stderr, "\n"); 464bf215546Sopenharmony_ci fprintf(stderr, "-> for instruction: "); 465bf215546Sopenharmony_ci ir3_print_instr(instr); 466bf215546Sopenharmony_ci ctx->failed = true; 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci } 469bf215546Sopenharmony_ci} 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_cistatic void 472bf215546Sopenharmony_cicheck_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 473bf215546Sopenharmony_ci{ 474bf215546Sopenharmony_ci if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT || 475bf215546Sopenharmony_ci instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) { 476bf215546Sopenharmony_ci return; 477bf215546Sopenharmony_ci } 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 480bf215546Sopenharmony_ci check_reaching_src(ctx, instr, src); 481bf215546Sopenharmony_ci } 482bf215546Sopenharmony_ci} 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_cistatic void 485bf215546Sopenharmony_cicheck_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block) 486bf215546Sopenharmony_ci{ 487bf215546Sopenharmony_ci ctx->reaching = ctx->block_reaching[block->index]; 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 490bf215546Sopenharmony_ci check_reaching_instr(ctx, instr); 491bf215546Sopenharmony_ci propagate_instr(ctx, instr); 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 495bf215546Sopenharmony_ci struct ir3_block *succ = block->successors[i]; 496bf215546Sopenharmony_ci if (!succ) 497bf215546Sopenharmony_ci continue; 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci unsigned pred_idx = ir3_block_get_pred_index(succ, block); 500bf215546Sopenharmony_ci foreach_instr (instr, &succ->instr_list) { 501bf215546Sopenharmony_ci if (instr->opc != OPC_META_PHI) 502bf215546Sopenharmony_ci break; 503bf215546Sopenharmony_ci if (instr->srcs[pred_idx]->def) 504bf215546Sopenharmony_ci check_reaching_src(ctx, instr, instr->srcs[pred_idx]); 505bf215546Sopenharmony_ci } 506bf215546Sopenharmony_ci } 507bf215546Sopenharmony_ci} 508bf215546Sopenharmony_ci 509bf215546Sopenharmony_cistatic void 510bf215546Sopenharmony_cicheck_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir) 511bf215546Sopenharmony_ci{ 512bf215546Sopenharmony_ci ctx->block_reaching = 513bf215546Sopenharmony_ci rzalloc_array(ctx, struct reaching_state, ctx->block_count); 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci struct reaching_state *start = &ctx->block_reaching[0]; 516bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->full_size; i++) 517bf215546Sopenharmony_ci start->full.regs[i].def = UNDEF; 518bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->half_size; i++) 519bf215546Sopenharmony_ci start->half.regs[i].def = UNDEF; 520bf215546Sopenharmony_ci for (unsigned i = 0; i < RA_SHARED_SIZE; i++) 521bf215546Sopenharmony_ci start->shared.regs[i].def = UNDEF; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci bool progress; 524bf215546Sopenharmony_ci do { 525bf215546Sopenharmony_ci progress = false; 526bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 527bf215546Sopenharmony_ci progress |= propagate_block(ctx, block); 528bf215546Sopenharmony_ci } 529bf215546Sopenharmony_ci } while (progress); 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 532bf215546Sopenharmony_ci check_reaching_block(ctx, block); 533bf215546Sopenharmony_ci } 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci if (ctx->failed) { 536bf215546Sopenharmony_ci fprintf(stderr, "failing shader:\n"); 537bf215546Sopenharmony_ci ir3_print(ir); 538bf215546Sopenharmony_ci abort(); 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci} 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_civoid 543bf215546Sopenharmony_ciir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, 544bf215546Sopenharmony_ci unsigned half_size, unsigned block_count) 545bf215546Sopenharmony_ci{ 546bf215546Sopenharmony_ci#ifdef NDEBUG 547bf215546Sopenharmony_ci#define VALIDATE 0 548bf215546Sopenharmony_ci#else 549bf215546Sopenharmony_ci#define VALIDATE 1 550bf215546Sopenharmony_ci#endif 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci if (!VALIDATE) 553bf215546Sopenharmony_ci return; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx); 556bf215546Sopenharmony_ci ctx->merged_regs = v->mergedregs; 557bf215546Sopenharmony_ci ctx->full_size = full_size; 558bf215546Sopenharmony_ci ctx->half_size = half_size; 559bf215546Sopenharmony_ci ctx->block_count = block_count; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci foreach_block (block, &v->ir->block_list) { 562bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 563bf215546Sopenharmony_ci validate_simple(ctx, instr); 564bf215546Sopenharmony_ci } 565bf215546Sopenharmony_ci } 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci check_reaching_defs(ctx, v->ir); 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci ralloc_free(ctx); 570bf215546Sopenharmony_ci} 571