1/* 2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "agx_compiler.h" 25#include "agx_builder.h" 26 27/* SSA-based register allocator */ 28 29/** Returns number of registers written by an instruction */ 30unsigned 31agx_write_registers(agx_instr *I, unsigned d) 32{ 33 unsigned size = I->dest[d].size == AGX_SIZE_32 ? 2 : 1; 34 35 switch (I->op) { 36 case AGX_OPCODE_LD_VARY: 37 assert(1 <= I->channels && I->channels <= 4); 38 return I->channels * size; 39 40 case AGX_OPCODE_DEVICE_LOAD: 41 case AGX_OPCODE_TEXTURE_SAMPLE: 42 case AGX_OPCODE_LD_TILE: 43 /* TODO: mask */ 44 return 4 * size; 45 46 case AGX_OPCODE_LD_VARY_FLAT: 47 return 6; 48 case AGX_OPCODE_P_COMBINE: 49 { 50 unsigned components = 0; 51 52 for (unsigned i = 0; i < 4; ++i) { 53 if (!agx_is_null(I->src[i])) 54 components = i + 1; 55 } 56 57 return components * size; 58 } 59 default: 60 return size; 61 } 62} 63 64static unsigned 65agx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max) 66{ 67 for (unsigned reg = 0; reg < max; reg += align) { 68 bool conflict = false; 69 70 for (unsigned j = 0; j < count; ++j) 71 conflict |= BITSET_TEST(used_regs, reg + j); 72 73 if (!conflict) { 74 for (unsigned j = 0; j < count; ++j) 75 BITSET_SET(used_regs, reg + j); 76 77 return reg; 78 } 79 } 80 81 /* Couldn't find a free register, dump the state of the register file */ 82 fprintf(stderr, "Failed to find register of size %u aligned %u max %u.\n", 83 count, align, max); 84 85 fprintf(stderr, "Register file:\n"); 86 for (unsigned i = 0; i < BITSET_WORDS(max); ++i) 87 fprintf(stderr, " %08X\n", used_regs[i]); 88 89 unreachable("Could not find a free register"); 90} 91 92/** Assign registers to SSA values in a block. */ 93 94static void 95agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps) 96{ 97 BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 }; 98 99 agx_foreach_predecessor(block, pred) { 100 for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i) 101 used_regs[i] |= (*pred)->regs_out[i]; 102 } 103 104 BITSET_SET(used_regs, 0); // control flow writes r0l 105 BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID 106 BITSET_SET(used_regs, (5*2 + 1)); 107 BITSET_SET(used_regs, (6*2 + 0)); 108 BITSET_SET(used_regs, (6*2 + 1)); 109 110 agx_foreach_instr_in_block(block, I) { 111 /* Optimization: if a split contains the last use of a vector, the split 112 * can be removed by assigning the destinations overlapping the source. 113 */ 114 if (I->op == AGX_OPCODE_P_SPLIT && I->src[0].kill) { 115 unsigned reg = ssa_to_reg[I->src[0].value]; 116 unsigned length = ncomps[I->src[0].value]; 117 unsigned width = agx_size_align_16(I->src[0].size); 118 unsigned count = length / width; 119 120 agx_foreach_dest(I, d) { 121 /* Skip excess components */ 122 if (d >= count) { 123 assert(agx_is_null(I->dest[d])); 124 continue; 125 } 126 127 /* The source of the split is killed. If a destination of the split 128 * is null, that channel is killed. Free it. 129 */ 130 if (agx_is_null(I->dest[d])) { 131 for (unsigned i = 0; i < width; ++i) 132 BITSET_CLEAR(used_regs, reg + (width * d) + i); 133 134 continue; 135 } 136 137 /* Otherwise, transfer the liveness */ 138 unsigned offset = d * width; 139 140 assert(I->dest[d].type == AGX_INDEX_NORMAL); 141 assert(offset < length); 142 143 ssa_to_reg[I->dest[d].value] = reg + offset; 144 } 145 146 continue; 147 } 148 149 /* First, free killed sources */ 150 agx_foreach_src(I, s) { 151 if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) { 152 unsigned reg = ssa_to_reg[I->src[s].value]; 153 unsigned count = ncomps[I->src[s].value]; 154 155 for (unsigned i = 0; i < count; ++i) 156 BITSET_CLEAR(used_regs, reg + i); 157 } 158 } 159 160 /* Next, assign destinations one at a time. This is always legal 161 * because of the SSA form. 162 */ 163 agx_foreach_dest(I, d) { 164 if (I->dest[d].type == AGX_INDEX_NORMAL) { 165 unsigned count = agx_write_registers(I, d); 166 unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2; 167 unsigned reg = agx_assign_regs(used_regs, count, align, AGX_NUM_REGS); 168 169 ssa_to_reg[I->dest[d].value] = reg; 170 } 171 } 172 } 173 174 STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs)); 175 memcpy(block->regs_out, used_regs, sizeof(used_regs)); 176} 177 178/* 179 * Resolve an agx_index of type NORMAL or REGISTER to a physical register, once 180 * registers have been allocated for all SSA values. 181 */ 182static unsigned 183agx_index_to_reg(uint8_t *ssa_to_reg, agx_index idx) 184{ 185 if (idx.type == AGX_INDEX_NORMAL) { 186 return ssa_to_reg[idx.value]; 187 } else { 188 assert(idx.type == AGX_INDEX_REGISTER); 189 return idx.value; 190 } 191} 192 193/* 194 * Lower phis to parallel copies at the logical end of a given block. If a block 195 * needs parallel copies inserted, a successor of the block has a phi node. To 196 * have a (nontrivial) phi node, a block must have multiple predecessors. So the 197 * edge from the block to the successor (with phi) is not the only edge entering 198 * the successor. Because the control flow graph has no critical edges, this 199 * edge must therefore be the only edge leaving the block, so the block must 200 * have only a single successor. 201 */ 202static void 203agx_insert_parallel_copies(agx_context *ctx, agx_block *block) 204{ 205 bool any_succ = false; 206 unsigned nr_phi = 0; 207 208 /* Phi nodes logically happen on the control flow edge, so parallel copies 209 * are added at the end of the predecessor */ 210 agx_builder b = agx_init_builder(ctx, agx_after_block_logical(block)); 211 212 agx_foreach_successor(block, succ) { 213 assert(nr_phi == 0 && "control flow graph has a critical edge"); 214 215 /* Phi nodes can only come at the start of the block */ 216 agx_foreach_instr_in_block(succ, phi) { 217 if (phi->op != AGX_OPCODE_PHI) break; 218 219 assert(!any_succ && "control flow graph has a critical edge"); 220 nr_phi++; 221 } 222 223 any_succ = true; 224 225 /* Nothing to do if there are no phi nodes */ 226 if (nr_phi == 0) 227 continue; 228 229 unsigned pred_index = agx_predecessor_index(succ, block); 230 231 /* Create a parallel copy lowering all the phi nodes */ 232 struct agx_copy *copies = calloc(sizeof(*copies), nr_phi); 233 234 unsigned i = 0; 235 236 agx_foreach_instr_in_block(succ, phi) { 237 if (phi->op != AGX_OPCODE_PHI) break; 238 239 agx_index dest = phi->dest[0]; 240 agx_index src = phi->src[pred_index]; 241 242 assert(dest.type == AGX_INDEX_REGISTER); 243 assert(src.type == AGX_INDEX_REGISTER); 244 assert(dest.size == src.size); 245 246 copies[i++] = (struct agx_copy) { 247 .dest = dest.value, 248 .src = src.value, 249 .size = src.size 250 }; 251 } 252 253 agx_emit_parallel_copies(&b, copies, nr_phi); 254 255 free(copies); 256 } 257} 258 259void 260agx_ra(agx_context *ctx) 261{ 262 unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned)); 263 264 agx_compute_liveness(ctx); 265 uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t)); 266 uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t)); 267 268 agx_foreach_instr_global(ctx, I) { 269 agx_foreach_dest(I, d) { 270 if (I->dest[d].type != AGX_INDEX_NORMAL) continue; 271 272 unsigned v = I->dest[d].value; 273 assert(ncomps[v] == 0 && "broken SSA"); 274 ncomps[v] = agx_write_registers(I, d); 275 } 276 } 277 278 /* Assign registers in dominance-order. This coincides with source-order due 279 * to a NIR invariant, so we do not need special handling for this. 280 */ 281 agx_foreach_block(ctx, block) { 282 agx_ra_assign_local(block, ssa_to_reg, ncomps); 283 } 284 285 agx_foreach_instr_global(ctx, ins) { 286 agx_foreach_src(ins, s) { 287 if (ins->src[s].type == AGX_INDEX_NORMAL) { 288 unsigned v = ssa_to_reg[ins->src[s].value]; 289 ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size)); 290 } 291 } 292 293 agx_foreach_dest(ins, d) { 294 if (ins->dest[d].type == AGX_INDEX_NORMAL) { 295 unsigned v = ssa_to_reg[ins->dest[d].value]; 296 ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size)); 297 } 298 } 299 } 300 301 agx_foreach_instr_global_safe(ctx, ins) { 302 /* Lower away RA pseudo-instructions */ 303 agx_builder b = agx_init_builder(ctx, agx_after_instr(ins)); 304 305 if (ins->op == AGX_OPCODE_P_COMBINE) { 306 unsigned base = agx_index_to_reg(ssa_to_reg, ins->dest[0]); 307 unsigned width = agx_size_align_16(ins->dest[0].size); 308 309 struct agx_copy copies[4]; 310 unsigned n = 0; 311 312 /* Move the sources */ 313 for (unsigned i = 0; i < 4; ++i) { 314 if (agx_is_null(ins->src[i])) continue; 315 assert(ins->src[i].size == ins->dest[0].size); 316 317 copies[n++] = (struct agx_copy) { 318 .dest = base + (i * width), 319 .src = agx_index_to_reg(ssa_to_reg, ins->src[i]) , 320 .size = ins->src[i].size 321 }; 322 } 323 324 agx_emit_parallel_copies(&b, copies, n); 325 agx_remove_instruction(ins); 326 continue; 327 } else if (ins->op == AGX_OPCODE_P_EXTRACT) { 328 /* Uses the destination size */ 329 unsigned size = agx_size_align_16(ins->dest[0].size); 330 unsigned left = agx_index_to_reg(ssa_to_reg, ins->dest[0]); 331 unsigned right = agx_index_to_reg(ssa_to_reg, ins->src[0]) + (size * ins->imm); 332 333 if (left != right) { 334 agx_mov_to(&b, agx_register(left, ins->dest[0].size), 335 agx_register(right, ins->src[0].size)); 336 } 337 338 agx_remove_instruction(ins); 339 continue; 340 } else if (ins->op == AGX_OPCODE_P_SPLIT) { 341 unsigned base = agx_index_to_reg(ssa_to_reg, ins->src[0]); 342 unsigned width = agx_size_align_16(ins->src[0].size); 343 344 struct agx_copy copies[4]; 345 unsigned n = 0; 346 347 /* Move the sources */ 348 for (unsigned i = 0; i < 4; ++i) { 349 if (agx_is_null(ins->dest[i])) continue; 350 assert(ins->dest[i].size == ins->src[0].size); 351 352 copies[n++] = (struct agx_copy) { 353 .dest = agx_index_to_reg(ssa_to_reg, ins->dest[i]), 354 .src = base + (i * width), 355 .size = ins->dest[i].size 356 }; 357 } 358 359 /* Lower away */ 360 agx_builder b = agx_init_builder(ctx, agx_after_instr(ins)); 361 agx_emit_parallel_copies(&b, copies, n); 362 agx_remove_instruction(ins); 363 continue; 364 } 365 366 367 } 368 369 /* Insert parallel copies lowering phi nodes */ 370 agx_foreach_block(ctx, block) { 371 agx_insert_parallel_copies(ctx, block); 372 } 373 374 /* Phi nodes can be removed now */ 375 agx_foreach_instr_global_safe(ctx, I) { 376 if (I->op == AGX_OPCODE_PHI || I->op == AGX_OPCODE_P_LOGICAL_END) 377 agx_remove_instruction(I); 378 379 /* Remove identity moves */ 380 if (I->op == AGX_OPCODE_MOV && I->src[0].type == AGX_INDEX_REGISTER && 381 I->dest[0].size == I->src[0].size && I->src[0].value == I->dest[0].value) { 382 383 assert(I->dest[0].type == AGX_INDEX_REGISTER); 384 agx_remove_instruction(I); 385 } 386 } 387 388 free(ssa_to_reg); 389 free(ncomps); 390 free(alloc); 391} 392