1/* 2 * Copyright (C) 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "util/rb_tree.h" 25#include "ir3_ra.h" 26#include "ir3_shader.h" 27 28/* 29 * This pass does two things: 30 * 31 * 1. Calculates the maximum register pressure. To do this, we need to use the 32 * exact same technique that RA uses for combining meta_split instructions 33 * with their sources, so that our calculation agrees with RA. 34 * 2. Spills when the register pressure is exceeded a limit calculated by RA. 35 * The implementation is based on "Register Spilling and Live-Range Splitting 36 * for SSA-Form Programs" by Braun and Hack, although again care has to be 37 * taken to handle combining split/collect instructions. 38 */ 39 40struct reg_or_immed { 41 unsigned flags; 42 union { 43 struct ir3_register *def; 44 uint32_t uimm; 45 unsigned const_num; 46 }; 47}; 48 49struct ra_spill_interval { 50 struct ir3_reg_interval interval; 51 52 struct rb_node node; 53 struct rb_node half_node; 54 55 /* The current SSA value/const/immed this source is mapped to. */ 56 struct reg_or_immed dst; 57 58 /* When computing use distances we use the distance relative to the start 59 * of the block. So, for example, a value that's defined in cycle 5 of the 60 * block and used 6 cycles later will always have a next_use_distance of 11 61 * until we reach that use. 62 */ 63 unsigned next_use_distance; 64 65 /* Whether this value was reloaded and therefore doesn't need to be 66 * spilled again. Corresponds to the S set in the paper. 67 */ 68 bool already_spilled; 69 70 /* We need to add sources early for accounting purposes, but we have to 71 * insert the reload code for them last. Keep track of whether this interval 72 * needs to be reloaded later. 73 */ 74 bool needs_reload; 75 76 /* Keep track of whether this interval currently can't be spilled because: 77 * - It or one of its children is a source and we're making space for 78 * sources. 79 * - It is a destination and we're making space for destinations. 80 */ 81 bool cant_spill; 82 83 /* Whether this interval can be rematerialized. */ 84 bool can_rematerialize; 85}; 86 87struct ra_spill_block_state { 88 unsigned *next_use_end; 89 unsigned *next_use_start; 90 91 unsigned cycles; 92 93 /* Map from SSA def to reg_or_immed it is mapped to at the end of the block. 94 * This map only contains values which we didn't spill, so it also serves as 95 * a record of the new live-out set for this block. 96 */ 97 struct hash_table *remap; 98 99 /* For blocks whose successors are visited first (i.e. loop backedges), which 100 * values should be live at the end. 101 */ 102 BITSET_WORD *live_out; 103 104 bool visited; 105}; 106 107struct ra_spill_ctx { 108 struct ir3_reg_ctx reg_ctx; 109 110 struct ra_spill_interval **intervals; 111 unsigned intervals_count; 112 113 /* rb tree of live intervals that we can spill, ordered by next-use distance. 114 * full_live_intervals contains the full+shared intervals in the merged_regs 115 * case. We use this list to determine what to spill. 116 */ 117 struct rb_tree full_live_intervals; 118 struct rb_tree half_live_intervals; 119 120 struct ir3_pressure cur_pressure, max_pressure; 121 122 struct ir3_pressure limit_pressure; 123 124 /* When spilling, we need to reserve a register to serve as the zero'd 125 * "base". For simplicity we reserve a register at the beginning so that it's 126 * always available. 127 */ 128 struct ir3_register *base_reg; 129 130 /* Current pvtmem offset in bytes. */ 131 unsigned spill_slot; 132 133 struct ir3_liveness *live; 134 135 const struct ir3_compiler *compiler; 136 137 struct ra_spill_block_state *blocks; 138 139 bool spilling; 140 141 bool merged_regs; 142}; 143 144static void 145add_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir) 146{ 147 struct ir3_block *start = ir3_start_block(ir); 148 149 /* We need to stick it after any meta instructions which need to be first. */ 150 struct ir3_instruction *after = NULL; 151 foreach_instr (instr, &start->instr_list) { 152 if (instr->opc != OPC_META_INPUT && 153 instr->opc != OPC_META_TEX_PREFETCH) { 154 after = instr; 155 break; 156 } 157 } 158 159 struct ir3_instruction *mov = create_immed(start, 0); 160 161 if (after) 162 ir3_instr_move_before(mov, after); 163 164 ctx->base_reg = mov->dsts[0]; 165 166 /* We don't create an interval, etc. for the base reg, so just lower the 167 * register pressure limit to account for it. We assume it's always 168 * available for simplicity. 169 */ 170 ctx->limit_pressure.full -= reg_size(ctx->base_reg); 171} 172 173 174/* Compute the number of cycles per instruction used for next-use-distance 175 * analysis. This is just approximate, obviously. 176 */ 177static unsigned 178instr_cycles(struct ir3_instruction *instr) 179{ 180 if (instr->opc == OPC_META_PARALLEL_COPY) { 181 unsigned cycles = 0; 182 for (unsigned i = 0; i < instr->dsts_count; i++) { 183 if (!instr->srcs[i]->def || 184 instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) { 185 cycles += reg_elems(instr->srcs[i]); 186 } 187 } 188 189 return cycles; 190 } 191 192 if (instr->opc == OPC_META_COLLECT) { 193 unsigned cycles = 0; 194 for (unsigned i = 0; i < instr->srcs_count; i++) { 195 if (!instr->srcs[i]->def || 196 instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) { 197 cycles++; 198 } 199 } 200 201 return cycles; 202 } 203 204 if (is_meta(instr)) 205 return 0; 206 207 return 1 + instr->repeat; 208} 209 210static bool 211compute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block, 212 unsigned *tmp_next_use) 213{ 214 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 215 memcpy(tmp_next_use, state->next_use_end, 216 ctx->live->definitions_count * sizeof(*tmp_next_use)); 217 218 unsigned cycle = state->cycles; 219 foreach_instr_rev (instr, &block->instr_list) { 220 ra_foreach_dst (dst, instr) { 221 dst->next_use = tmp_next_use[dst->name]; 222 } 223 224 ra_foreach_src (src, instr) { 225 src->next_use = tmp_next_use[src->def->name]; 226 } 227 228 cycle -= instr_cycles(instr); 229 230 if (instr->opc == OPC_META_PARALLEL_COPY) { 231 ra_foreach_src_n (src, i, instr) { 232 if (src->def->merge_set == instr->dsts[i]->merge_set && 233 src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) { 234 tmp_next_use[src->def->name] = 235 tmp_next_use[instr->dsts[i]->name]; 236 } else { 237 tmp_next_use[src->def->name] = cycle; 238 } 239 } 240 } else if (instr->opc != OPC_META_PHI) { 241 ra_foreach_src (src, instr) { 242 tmp_next_use[src->def->name] = cycle; 243 } 244 } 245 246 ra_foreach_dst (dst, instr) { 247 tmp_next_use[dst->name] = UINT_MAX; 248 } 249 } 250 251 memcpy(state->next_use_start, tmp_next_use, 252 ctx->live->definitions_count * sizeof(*tmp_next_use)); 253 254 bool progress = false; 255 for (unsigned i = 0; i < block->predecessors_count; i++) { 256 const struct ir3_block *pred = block->predecessors[i]; 257 struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index]; 258 259 /* Add a large-enough distance in front of edges exiting the loop so that 260 * variables that are live-through the loop but not used inside it are 261 * prioritized for spilling, as per the paper. This just needs to be 262 * larger than the longest path through the loop. 263 */ 264 bool loop_exit = pred->loop_depth < block->loop_depth; 265 unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0); 266 267 for (unsigned j = 0; j < ctx->live->definitions_count; j++) { 268 if (state->next_use_start[j] < UINT_MAX && 269 state->next_use_start[j] + block_distance < 270 pred_state->next_use_end[j]) { 271 pred_state->next_use_end[j] = state->next_use_start[j] + 272 block_distance; 273 progress = true; 274 } 275 } 276 277 foreach_instr (phi, &block->instr_list) { 278 if (phi->opc != OPC_META_PHI) 279 break; 280 if (!phi->srcs[i]->def) 281 continue; 282 unsigned src = phi->srcs[i]->def->name; 283 if (phi->dsts[0]->next_use < UINT_MAX && 284 phi->dsts[0]->next_use + block_distance < 285 pred_state->next_use_end[src]) { 286 pred_state->next_use_end[src] = phi->dsts[0]->next_use + 287 block_distance; 288 progress = true; 289 } 290 } 291 } 292 293 return progress; 294} 295 296static void 297compute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir) 298{ 299 for (unsigned i = 0; i < ctx->live->block_count; i++) { 300 ctx->blocks[i].next_use_start = 301 ralloc_array(ctx, unsigned, ctx->live->definitions_count); 302 ctx->blocks[i].next_use_end = 303 ralloc_array(ctx, unsigned, ctx->live->definitions_count); 304 305 for (unsigned j = 0; j < ctx->live->definitions_count; j++) { 306 ctx->blocks[i].next_use_start[j] = UINT_MAX; 307 ctx->blocks[i].next_use_end[j] = UINT_MAX; 308 } 309 } 310 311 foreach_block (block, &ir->block_list) { 312 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 313 state->cycles = 0; 314 foreach_instr (instr, &block->instr_list) { 315 state->cycles += instr_cycles(instr); 316 foreach_dst (dst, instr) { 317 dst->spill_slot = ~0; 318 } 319 } 320 } 321 322 unsigned *tmp_next_use = 323 ralloc_array(ctx, unsigned, ctx->live->definitions_count); 324 325 bool progress = true; 326 while (progress) { 327 progress = false; 328 foreach_block_rev (block, &ir->block_list) { 329 progress |= compute_block_next_distance(ctx, block, tmp_next_use); 330 } 331 } 332} 333 334static bool 335can_rematerialize(struct ir3_register *reg) 336{ 337 if (reg->flags & IR3_REG_ARRAY) 338 return false; 339 if (reg->instr->opc != OPC_MOV) 340 return false; 341 if (!(reg->instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST))) 342 return false; 343 if (reg->instr->srcs[0]->flags & IR3_REG_RELATIV) 344 return false; 345 return true; 346} 347 348static struct ir3_register * 349rematerialize(struct ir3_register *reg, struct ir3_instruction *after, 350 struct ir3_block *block) 351{ 352 d("rematerializing ssa_%u:%u", reg->instr->serialno, reg->name); 353 354 struct ir3_instruction *remat = 355 ir3_instr_create(block, reg->instr->opc, 1, reg->instr->srcs_count); 356 struct ir3_register *dst = __ssa_dst(remat); 357 dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 358 for (unsigned i = 0; i < reg->instr->srcs_count; i++) { 359 struct ir3_register *src = 360 ir3_src_create(remat, INVALID_REG, reg->instr->srcs[i]->flags); 361 *src = *reg->instr->srcs[i]; 362 } 363 364 remat->cat1 = reg->instr->cat1; 365 366 dst->merge_set = reg->merge_set; 367 dst->merge_set_offset = reg->merge_set_offset; 368 dst->interval_start = reg->interval_start; 369 dst->interval_end = reg->interval_end; 370 371 if (after) 372 ir3_instr_move_before(remat, after); 373 374 return dst; 375} 376 377static void 378ra_spill_interval_init(struct ra_spill_interval *interval, 379 struct ir3_register *reg) 380{ 381 ir3_reg_interval_init(&interval->interval, reg); 382 interval->dst.flags = reg->flags; 383 interval->dst.def = reg; 384 interval->already_spilled = false; 385 interval->needs_reload = false; 386 interval->cant_spill = false; 387 interval->can_rematerialize = can_rematerialize(reg); 388} 389 390static struct ra_spill_interval * 391ir3_reg_interval_to_interval(struct ir3_reg_interval *interval) 392{ 393 return rb_node_data(struct ra_spill_interval, interval, interval); 394} 395 396static struct ra_spill_interval * 397ra_spill_interval_root(struct ra_spill_interval *interval) 398{ 399 struct ir3_reg_interval *ir3_interval = &interval->interval; 400 while (ir3_interval->parent) 401 ir3_interval = ir3_interval->parent; 402 return ir3_reg_interval_to_interval(ir3_interval); 403} 404 405static struct ra_spill_ctx * 406ir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx) 407{ 408 return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx); 409} 410 411static int 412spill_interval_cmp(const struct ra_spill_interval *a, 413 const struct ra_spill_interval *b) 414{ 415 /* Prioritize intervals that we can rematerialize. */ 416 if (a->can_rematerialize && !b->can_rematerialize) 417 return 1; 418 if (!a->can_rematerialize && b->can_rematerialize) 419 return -1; 420 421 return a->next_use_distance - b->next_use_distance; 422} 423 424static int 425ra_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b) 426{ 427 const struct ra_spill_interval *a = 428 rb_node_data(const struct ra_spill_interval, _a, node); 429 const struct ra_spill_interval *b = 430 rb_node_data(const struct ra_spill_interval, _b, node); 431 return spill_interval_cmp(a, b); 432} 433 434static int 435ra_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b) 436{ 437 const struct ra_spill_interval *a = 438 rb_node_data(const struct ra_spill_interval, _a, half_node); 439 const struct ra_spill_interval *b = 440 rb_node_data(const struct ra_spill_interval, _b, half_node); 441 return spill_interval_cmp(a, b); 442} 443 444static void 445interval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval) 446{ 447 struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval); 448 struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx); 449 450 unsigned size = reg_size(interval->interval.reg); 451 if (interval->interval.reg->flags & IR3_REG_SHARED) { 452 ctx->cur_pressure.shared += size; 453 } else { 454 if (interval->interval.reg->flags & IR3_REG_HALF) { 455 ctx->cur_pressure.half += size; 456 if (ctx->spilling) { 457 rb_tree_insert(&ctx->half_live_intervals, &interval->half_node, 458 ra_spill_interval_half_cmp); 459 } 460 } 461 if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) { 462 ctx->cur_pressure.full += size; 463 if (ctx->spilling) { 464 rb_tree_insert(&ctx->full_live_intervals, &interval->node, 465 ra_spill_interval_cmp); 466 } 467 } 468 } 469} 470 471static void 472interval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval) 473{ 474 struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval); 475 struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx); 476 477 unsigned size = reg_size(interval->interval.reg); 478 if (interval->interval.reg->flags & IR3_REG_SHARED) { 479 ctx->cur_pressure.shared -= size; 480 } else { 481 if (interval->interval.reg->flags & IR3_REG_HALF) { 482 ctx->cur_pressure.half -= size; 483 if (ctx->spilling) { 484 rb_tree_remove(&ctx->half_live_intervals, &interval->half_node); 485 } 486 } 487 if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) { 488 ctx->cur_pressure.full -= size; 489 if (ctx->spilling) { 490 rb_tree_remove(&ctx->full_live_intervals, &interval->node); 491 } 492 } 493 } 494} 495 496static void 497interval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent, 498 struct ir3_reg_interval *_child) 499{ 500 interval_add(_ctx, _child); 501} 502 503static void 504spill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v, 505 struct ir3_liveness *live) 506{ 507 ctx->live = live; 508 ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *, 509 ctx->live->definitions_count); 510 struct ra_spill_interval *intervals = 511 rzalloc_array(ctx, struct ra_spill_interval, 512 ctx->live->definitions_count); 513 for (unsigned i = 0; i < ctx->live->definitions_count; i++) 514 ctx->intervals[i] = &intervals[i]; 515 516 ctx->intervals_count = ctx->live->definitions_count; 517 ctx->compiler = v->compiler; 518 ctx->merged_regs = v->mergedregs; 519 520 rb_tree_init(&ctx->reg_ctx.intervals); 521 ctx->reg_ctx.interval_add = interval_add; 522 ctx->reg_ctx.interval_delete = interval_delete; 523 ctx->reg_ctx.interval_readd = interval_readd; 524} 525 526static void 527ra_spill_ctx_insert(struct ra_spill_ctx *ctx, 528 struct ra_spill_interval *interval) 529{ 530 ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); 531} 532 533static void 534ra_spill_ctx_remove(struct ra_spill_ctx *ctx, 535 struct ra_spill_interval *interval) 536{ 537 ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval); 538} 539 540static void 541init_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 542{ 543 struct ra_spill_interval *interval = ctx->intervals[dst->name]; 544 ra_spill_interval_init(interval, dst); 545 if (ctx->spilling) { 546 interval->next_use_distance = dst->next_use; 547 548 /* We only need to keep track of used-ness if this value may be 549 * rematerialized. This also keeps us from nuking things that may be 550 * in the keeps list (e.g. atomics, input splits). 551 */ 552 if (interval->can_rematerialize) 553 dst->instr->flags |= IR3_INSTR_UNUSED; 554 } 555} 556 557static void 558insert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 559{ 560 struct ra_spill_interval *interval = ctx->intervals[dst->name]; 561 if (interval->interval.inserted) 562 return; 563 564 ra_spill_ctx_insert(ctx, interval); 565 interval->cant_spill = true; 566 567 /* For precolored inputs, make sure we leave enough registers to allow for 568 * holes in the inputs. It can happen that the binning shader has a lower 569 * register pressure than the main shader, but the main shader decided to 570 * add holes between the inputs which means that the binning shader has a 571 * higher register demand. 572 */ 573 if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) { 574 physreg_t physreg = ra_reg_get_physreg(dst); 575 physreg_t max = physreg + reg_size(dst); 576 577 if (interval->interval.reg->flags & IR3_REG_SHARED) 578 ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max); 579 else if (interval->interval.reg->flags & IR3_REG_HALF) 580 ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max); 581 else 582 ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max); 583 } 584} 585 586static void 587insert_src(struct ra_spill_ctx *ctx, struct ir3_register *src) 588{ 589 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 590 591 if (!interval->interval.inserted) { 592 ra_spill_ctx_insert(ctx, interval); 593 interval->needs_reload = true; 594 interval->already_spilled = true; 595 } 596 597 ra_spill_interval_root(interval)->cant_spill = true; 598 599} 600 601static void 602remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 603 struct ir3_register *src) 604{ 605 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 606 607 if (!interval->interval.inserted || interval->interval.parent || 608 !rb_tree_is_empty(&interval->interval.children)) 609 return; 610 611 ra_spill_ctx_remove(ctx, interval); 612} 613 614static void 615remove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 616 struct ir3_register *src) 617{ 618 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 619 620 if (!interval->interval.inserted) 621 return; 622 623 ra_spill_ctx_remove(ctx, interval); 624} 625 626static void 627finish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 628{ 629 struct ra_spill_interval *interval = ctx->intervals[dst->name]; 630 interval->cant_spill = false; 631} 632 633static void 634remove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 635{ 636 struct ra_spill_interval *interval = ctx->intervals[dst->name]; 637 638 if (!interval->interval.inserted) 639 return; 640 641 ra_spill_ctx_remove(ctx, interval); 642} 643 644static void 645update_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src) 646{ 647 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 648 649 assert(interval->interval.inserted); 650 651 interval->next_use_distance = src->next_use; 652 653 /* If this node is inserted in one of the trees, then it needs to be resorted 654 * as its key has changed. 655 */ 656 if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) { 657 if (src->flags & IR3_REG_HALF) { 658 rb_tree_remove(&ctx->half_live_intervals, &interval->half_node); 659 rb_tree_insert(&ctx->half_live_intervals, &interval->half_node, 660 ra_spill_interval_half_cmp); 661 } 662 if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) { 663 rb_tree_remove(&ctx->full_live_intervals, &interval->node); 664 rb_tree_insert(&ctx->full_live_intervals, &interval->node, 665 ra_spill_interval_cmp); 666 } 667 } 668} 669 670static unsigned 671get_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg) 672{ 673 if (reg->merge_set) { 674 if (reg->merge_set->spill_slot == ~0) { 675 reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot, 676 reg->merge_set->alignment); 677 ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2; 678 } 679 return reg->merge_set->spill_slot + reg->merge_set_offset * 2; 680 } else { 681 if (reg->spill_slot == ~0) { 682 reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg)); 683 ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2; 684 } 685 return reg->spill_slot; 686 } 687} 688 689static void 690set_src_val(struct ir3_register *src, const struct reg_or_immed *val) 691{ 692 if (val->flags & IR3_REG_IMMED) { 693 src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF); 694 src->uim_val = val->uimm; 695 src->def = NULL; 696 } else if (val->flags & IR3_REG_CONST) { 697 src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF); 698 src->num = val->const_num; 699 src->def = NULL; 700 } else { 701 src->def = val->def; 702 val->def->instr->flags &= ~IR3_INSTR_UNUSED; 703 } 704} 705 706static struct ir3_register * 707materialize_pcopy_src(const struct reg_or_immed *src, 708 struct ir3_instruction *instr, 709 struct ir3_block *block) 710{ 711 struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1); 712 struct ir3_register *dst = __ssa_dst(mov); 713 dst->flags |= src->flags & IR3_REG_HALF; 714 struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags); 715 set_src_val(mov_src, src); 716 mov->cat1.src_type = mov->cat1.dst_type = 717 (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 718 719 if (instr) 720 ir3_instr_move_before(mov, instr); 721 return dst; 722} 723 724static void 725spill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val, 726 unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block) 727{ 728 struct ir3_register *reg; 729 730 /* If spilling an immed/const pcopy src, we need to actually materialize it 731 * first with a mov. 732 */ 733 if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) { 734 reg = materialize_pcopy_src(val, instr, block); 735 } else { 736 reg = val->def; 737 reg->instr->flags &= ~IR3_INSTR_UNUSED; 738 } 739 740 d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name, 741 spill_slot); 742 743 unsigned elems = reg_elems(reg); 744 struct ir3_instruction *spill = 745 ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3); 746 ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg; 747 unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED | 748 IR3_REG_CONST | IR3_REG_SSA | 749 IR3_REG_ARRAY); 750 struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags); 751 ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems; 752 spill->cat6.dst_offset = spill_slot; 753 spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 754 755 src->def = reg; 756 if (reg->flags & IR3_REG_ARRAY) { 757 src->size = reg->size; 758 src->array.id = reg->array.id; 759 src->array.offset = 0; 760 } else { 761 src->wrmask = reg->wrmask; 762 } 763 764 if (instr) 765 ir3_instr_move_before(spill, instr); 766} 767 768static void 769spill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval, 770 struct ir3_instruction *instr, struct ir3_block *block) 771{ 772 if (interval->can_rematerialize && !interval->interval.reg->merge_set) 773 return; 774 775 spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg), 776 instr, block); 777} 778 779/* This is similar to "limit" in the paper. */ 780static void 781limit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 782{ 783 if (ctx->cur_pressure.half > ctx->limit_pressure.half) { 784 d("cur half pressure %u exceeds %u", ctx->cur_pressure.half, 785 ctx->limit_pressure.half); 786 rb_tree_foreach_safe (struct ra_spill_interval, interval, 787 &ctx->half_live_intervals, half_node) { 788 d("trying ssa_%u:%u", interval->interval.reg->instr->serialno, 789 interval->interval.reg->name); 790 if (!interval->cant_spill) { 791 if (!interval->already_spilled) 792 spill_interval(ctx, interval, instr, instr->block); 793 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 794 if (ctx->cur_pressure.half <= ctx->limit_pressure.half) 795 break; 796 } 797 } 798 799 assert(ctx->cur_pressure.half <= ctx->limit_pressure.half); 800 } 801 802 if (ctx->cur_pressure.full > ctx->limit_pressure.full) { 803 d("cur full pressure %u exceeds %u", ctx->cur_pressure.full, 804 ctx->limit_pressure.full); 805 rb_tree_foreach_safe (struct ra_spill_interval, interval, 806 &ctx->full_live_intervals, node) { 807 d("trying ssa_%u:%u", interval->interval.reg->instr->serialno, 808 interval->interval.reg->name); 809 if (!interval->cant_spill) { 810 if (!interval->already_spilled) 811 spill_interval(ctx, interval, instr, instr->block); 812 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 813 if (ctx->cur_pressure.full <= ctx->limit_pressure.full) 814 break; 815 } else { 816 d("can't spill"); 817 } 818 } 819 820 assert(ctx->cur_pressure.full <= ctx->limit_pressure.full); 821 } 822} 823 824/* There's a corner case where we reload a value which has overlapping live 825 * values already reloaded, either because it's the child of some other interval 826 * that was already reloaded or some of its children have already been 827 * reloaded. Because RA only expects overlapping source/dest intervals for meta 828 * instructions (split/collect), and we don't want to add register pressure by 829 * creating an entirely separate value, we need to add splits and collects to 830 * deal with this case. These splits/collects have to also have correct merge 831 * set information, so that it doesn't result in any actual code or register 832 * pressure in practice. 833 */ 834 835static void 836add_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def, 837 unsigned offset) 838{ 839 def->merge_set = set; 840 def->merge_set_offset = offset; 841 def->interval_start = set->interval_start + offset; 842 def->interval_end = set->interval_start + offset + reg_size(def); 843} 844 845static struct ir3_register * 846split(struct ir3_register *def, unsigned offset, 847 struct ir3_instruction *after, struct ir3_block *block) 848{ 849 if (reg_elems(def) == 1) { 850 assert(offset == 0); 851 return def; 852 } 853 854 assert(!(def->flags & IR3_REG_ARRAY)); 855 assert(def->merge_set); 856 struct ir3_instruction *split = 857 ir3_instr_create(block, OPC_META_SPLIT, 1, 1); 858 struct ir3_register *dst = __ssa_dst(split); 859 dst->flags |= def->flags & IR3_REG_HALF; 860 struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags); 861 src->wrmask = def->wrmask; 862 src->def = def; 863 add_to_merge_set(def->merge_set, dst, 864 def->merge_set_offset + offset * reg_elem_size(def)); 865 if (after) 866 ir3_instr_move_before(split, after); 867 return dst; 868} 869 870static struct ir3_register * 871extract(struct ir3_register *parent_def, unsigned offset, unsigned elems, 872 struct ir3_instruction *after, struct ir3_block *block) 873{ 874 if (offset == 0 && elems == reg_elems(parent_def)) 875 return parent_def; 876 877 struct ir3_register *srcs[elems]; 878 for (unsigned i = 0; i < elems; i++) { 879 srcs[i] = split(parent_def, offset + i, after, block); 880 } 881 882 struct ir3_instruction *collect = 883 ir3_instr_create(block, OPC_META_COLLECT, 1, elems); 884 struct ir3_register *dst = __ssa_dst(collect); 885 dst->flags |= parent_def->flags & IR3_REG_HALF; 886 dst->wrmask = MASK(elems); 887 add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset); 888 889 for (unsigned i = 0; i < elems; i++) { 890 ir3_src_create(collect, INVALID_REG, parent_def->flags)->def = srcs[i]; 891 } 892 893 if (after) 894 ir3_instr_move_before(collect, after); 895 return dst; 896} 897 898static struct ir3_register * 899reload(struct ra_spill_ctx *ctx, struct ir3_register *reg, 900 struct ir3_instruction *after, struct ir3_block *block) 901{ 902 unsigned spill_slot = get_spill_slot(ctx, reg); 903 904 d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name, 905 spill_slot); 906 907 unsigned elems = reg_elems(reg); 908 struct ir3_instruction *reload = 909 ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3); 910 struct ir3_register *dst = __ssa_dst(reload); 911 dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 912 /* The reload may be split into multiple pieces, and if the destination 913 * overlaps with the base register then it could get clobbered before the 914 * last ldp in the sequence. Note that we always reserve space for the base 915 * register throughout the whole program, so effectively extending its live 916 * range past the end of the instruction isn't a problem for our pressure 917 * accounting. 918 */ 919 dst->flags |= IR3_REG_EARLY_CLOBBER; 920 ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg; 921 struct ir3_register *offset_reg = 922 ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED); 923 offset_reg->uim_val = spill_slot; 924 ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems; 925 reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 926 927 if (reg->flags & IR3_REG_ARRAY) { 928 dst->array.offset = 0; 929 dst->array.id = reg->array.id; 930 dst->size = reg->size; 931 } else { 932 dst->wrmask = MASK(elems); 933 } 934 935 dst->merge_set = reg->merge_set; 936 dst->merge_set_offset = reg->merge_set_offset; 937 dst->interval_start = reg->interval_start; 938 dst->interval_end = reg->interval_end; 939 940 if (after) 941 ir3_instr_move_before(reload, after); 942 943 return dst; 944} 945 946static void 947rewrite_src_interval(struct ra_spill_ctx *ctx, 948 struct ra_spill_interval *interval, 949 struct ir3_register *def, 950 struct ir3_instruction *instr, 951 struct ir3_block *block) 952{ 953 interval->dst.flags = def->flags; 954 interval->dst.def = def; 955 interval->needs_reload = false; 956 957 rb_tree_foreach (struct ra_spill_interval, child, 958 &interval->interval.children, interval.node) { 959 struct ir3_register *child_reg = child->interval.reg; 960 struct ir3_register *child_def = 961 extract(def, (child_reg->interval_start - 962 interval->interval.reg->interval_start) / reg_elem_size(def), 963 reg_elems(child_reg), instr, block); 964 rewrite_src_interval(ctx, child, child_def, instr, block); 965 } 966} 967 968static void 969reload_def(struct ra_spill_ctx *ctx, struct ir3_register *def, 970 struct ir3_instruction *instr, struct ir3_block *block) 971{ 972 unsigned elems = reg_elems(def); 973 struct ra_spill_interval *interval = ctx->intervals[def->name]; 974 975 struct ir3_reg_interval *ir3_parent = interval->interval.parent; 976 977 if (ir3_parent) { 978 struct ra_spill_interval *parent = 979 ir3_reg_interval_to_interval(ir3_parent); 980 if (!parent->needs_reload) { 981 interval->dst.flags = def->flags; 982 interval->dst.def = extract( 983 parent->dst.def, (def->interval_start - parent->dst.def->interval_start) / 984 reg_elem_size(def), elems, instr, block); 985 return; 986 } 987 } 988 989 struct ir3_register *dst; 990 if (interval->can_rematerialize) 991 dst = rematerialize(def, instr, block); 992 else 993 dst = reload(ctx, def, instr, block); 994 995 rewrite_src_interval(ctx, interval, dst, instr, block); 996} 997 998static void 999reload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 1000 struct ir3_register *src) 1001{ 1002 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 1003 1004 if (interval->needs_reload) { 1005 reload_def(ctx, src->def, instr, instr->block); 1006 } 1007 1008 ra_spill_interval_root(interval)->cant_spill = false; 1009} 1010 1011static void 1012rewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 1013 struct ir3_register *src) 1014{ 1015 struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 1016 1017 set_src_val(src, &interval->dst); 1018} 1019 1020static void 1021update_max_pressure(struct ra_spill_ctx *ctx) 1022{ 1023 d("pressure:"); 1024 d("\tfull: %u", ctx->cur_pressure.full); 1025 d("\thalf: %u", ctx->cur_pressure.half); 1026 d("\tshared: %u", ctx->cur_pressure.shared); 1027 1028 ctx->max_pressure.full = 1029 MAX2(ctx->max_pressure.full, ctx->cur_pressure.full); 1030 ctx->max_pressure.half = 1031 MAX2(ctx->max_pressure.half, ctx->cur_pressure.half); 1032 ctx->max_pressure.shared = 1033 MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared); 1034} 1035 1036static void 1037handle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 1038{ 1039 ra_foreach_dst (dst, instr) { 1040 init_dst(ctx, dst); 1041 } 1042 1043 if (ctx->spilling) { 1044 ra_foreach_src (src, instr) 1045 insert_src(ctx, src); 1046 } 1047 1048 /* Handle tied and early-kill destinations. If a destination is tied to a 1049 * source and that source is live-through, then we need to allocate a new 1050 * register for the destination which is live-through itself and cannot 1051 * overlap the sources. Similarly early-kill destinations cannot overlap 1052 * sources. 1053 */ 1054 1055 ra_foreach_dst (dst, instr) { 1056 struct ir3_register *tied_src = dst->tied; 1057 if ((tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL)) || 1058 (dst->flags & IR3_REG_EARLY_CLOBBER)) 1059 insert_dst(ctx, dst); 1060 } 1061 1062 if (ctx->spilling) 1063 limit(ctx, instr); 1064 else 1065 update_max_pressure(ctx); 1066 1067 if (ctx->spilling) { 1068 ra_foreach_src (src, instr) { 1069 reload_src(ctx, instr, src); 1070 update_src_next_use(ctx, src); 1071 } 1072 } 1073 1074 ra_foreach_src (src, instr) { 1075 if (src->flags & IR3_REG_FIRST_KILL) 1076 remove_src_early(ctx, instr, src); 1077 } 1078 1079 ra_foreach_dst (dst, instr) { 1080 insert_dst(ctx, dst); 1081 } 1082 1083 if (ctx->spilling) 1084 limit(ctx, instr); 1085 else 1086 update_max_pressure(ctx); 1087 1088 /* We have to remove sources before rewriting them so that we can lookup the 1089 * interval to remove before the source itself is changed. 1090 */ 1091 ra_foreach_src (src, instr) { 1092 if (src->flags & IR3_REG_FIRST_KILL) 1093 remove_src(ctx, instr, src); 1094 } 1095 1096 if (ctx->spilling) { 1097 ra_foreach_src (src, instr) { 1098 rewrite_src(ctx, instr, src); 1099 } 1100 } 1101 1102 ra_foreach_dst (dst, instr) { 1103 finish_dst(ctx, dst); 1104 } 1105 1106 for (unsigned i = 0; i < instr->dsts_count; i++) { 1107 if (ra_reg_is_dst(instr->dsts[i]) && 1108 (instr->dsts[i]->flags & IR3_REG_UNUSED)) 1109 remove_dst(ctx, instr->dsts[i]); 1110 } 1111} 1112 1113static struct ra_spill_interval * 1114create_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def) 1115{ 1116 unsigned name = ctx->intervals_count++; 1117 unsigned offset = ctx->live->interval_offset; 1118 1119 /* This is kinda hacky, but we need to create a fake SSA def here that is 1120 * only used as part of the pcopy accounting. See below. 1121 */ 1122 struct ir3_register *reg = rzalloc(ctx, struct ir3_register); 1123 *reg = *def; 1124 reg->name = name; 1125 reg->interval_start = offset; 1126 reg->interval_end = offset + reg_size(def); 1127 reg->merge_set = NULL; 1128 1129 ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *, 1130 ctx->intervals_count); 1131 struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval); 1132 ra_spill_interval_init(interval, reg); 1133 ctx->intervals[name] = interval; 1134 ctx->live->interval_offset += reg_size(def); 1135 return interval; 1136} 1137 1138/* In the sequence of copies generated (see below), would this source be killed? 1139 */ 1140static bool 1141is_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n) 1142{ 1143 struct ir3_register *src = pcopy->srcs[src_n]; 1144 if (!(src->flags & IR3_REG_KILL)) 1145 return false; 1146 for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) { 1147 if (pcopy->srcs[j]->def == src->def) 1148 return false; 1149 } 1150 return true; 1151} 1152 1153/* Parallel copies are different from normal instructions. The sources together 1154 * may be larger than the entire register file, so we cannot just reload every 1155 * source like normal, and indeed that probably wouldn't be a great idea. 1156 * Instead we essentially need to lower the parallel copy to "copies," just like 1157 * in the normal CSSA construction, although we implement the copies by 1158 * reloading and then possibly spilling values. We essentially just shuffle 1159 * around the sources until each source either (a) is live or (b) has the same 1160 * spill slot as its corresponding destination. We do this by decomposing the 1161 * copy into a series of copies, so: 1162 * 1163 * a, b, c = d, e, f 1164 * 1165 * becomes: 1166 * 1167 * d' = d 1168 * e' = e 1169 * f' = f 1170 * a = d' 1171 * b = e' 1172 * c = f' 1173 * 1174 * the temporary SSA values d', e', and f' never actually show up in the result. 1175 * They are only used for our internal accounting. They may, however, have their 1176 * own spill slot created for them. Similarly, we don't actually emit any copy 1177 * instructions, although we emit the spills/reloads that *would've* been 1178 * required if those copies were there. 1179 * 1180 * TODO: in order to reduce the number of temporaries and therefore spill slots, 1181 * we could instead do a more complicated analysis that considers the location 1182 * transfer graph. 1183 * 1184 * In addition, we actually remove the parallel copy and rewrite all its uses 1185 * (in the phi nodes) rather than rewrite its sources at the end. Recreating it 1186 * later turns out to be easier than keeping it up-to-date throughout this pass, 1187 * since we may have to remove entries for phi sources that are spilled and add 1188 * entries for live-outs that are spilled and reloaded, which can happen here 1189 * and then possibly be undone or done again when processing live-ins of the 1190 * successor block. 1191 */ 1192 1193static void 1194handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy) 1195{ 1196 foreach_dst (dst, pcopy) { 1197 struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1198 ra_spill_interval_init(dst_interval, dst); 1199 } 1200 1201 foreach_src_n (src, i, pcopy) { 1202 d("processing src %u", i); 1203 struct ir3_register *dst = pcopy->dsts[i]; 1204 1205 /* Skip the intermediate copy for cases where the source is merged with 1206 * the destination. Crucially this means that we also don't reload/spill 1207 * it if it's been spilled, because it shares the same spill slot. 1208 */ 1209 if (src->def && src->def->merge_set && 1210 src->def->merge_set == dst->merge_set && 1211 src->def->merge_set_offset == dst->merge_set_offset) { 1212 struct ra_spill_interval *src_interval = ctx->intervals[src->def->name]; 1213 struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1214 if (src_interval->interval.inserted) { 1215 update_src_next_use(ctx, src); 1216 if (is_last_pcopy_src(pcopy, i)) 1217 ra_spill_ctx_remove(ctx, src_interval); 1218 dst_interval->cant_spill = true; 1219 ra_spill_ctx_insert(ctx, dst_interval); 1220 limit(ctx, pcopy); 1221 dst_interval->cant_spill = false; 1222 dst_interval->dst = src_interval->dst; 1223 } 1224 } else if (src->def) { 1225 struct ra_spill_interval *temp_interval = 1226 create_temp_interval(ctx, dst); 1227 struct ir3_register *temp = temp_interval->interval.reg; 1228 temp_interval->next_use_distance = src->next_use; 1229 1230 insert_src(ctx, src); 1231 limit(ctx, pcopy); 1232 reload_src(ctx, pcopy, src); 1233 update_src_next_use(ctx, src); 1234 if (is_last_pcopy_src(pcopy, i)) 1235 remove_src(ctx, pcopy, src); 1236 struct ra_spill_interval *src_interval = 1237 ctx->intervals[src->def->name]; 1238 temp_interval->dst = src_interval->dst; 1239 1240 temp_interval->cant_spill = true; 1241 ra_spill_ctx_insert(ctx, temp_interval); 1242 limit(ctx, pcopy); 1243 temp_interval->cant_spill = false; 1244 1245 src->flags = temp->flags; 1246 src->def = temp; 1247 } 1248 } 1249 1250 d("done with pcopy srcs"); 1251 1252 foreach_src_n (src, i, pcopy) { 1253 struct ir3_register *dst = pcopy->dsts[i]; 1254 1255 if (src->def && src->def->merge_set && 1256 src->def->merge_set == dst->merge_set && 1257 src->def->merge_set_offset == dst->merge_set_offset) 1258 continue; 1259 1260 struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1261 1262 if (!src->def) { 1263 dst_interval->cant_spill = true; 1264 ra_spill_ctx_insert(ctx, dst_interval); 1265 limit(ctx, pcopy); 1266 dst_interval->cant_spill = false; 1267 1268 assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED)); 1269 if (src->flags & IR3_REG_CONST) { 1270 dst_interval->dst.flags = src->flags; 1271 dst_interval->dst.const_num = src->num; 1272 } else { 1273 dst_interval->dst.flags = src->flags; 1274 dst_interval->dst.uimm = src->uim_val; 1275 } 1276 } else { 1277 struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name]; 1278 1279 insert_src(ctx, src); 1280 limit(ctx, pcopy); 1281 reload_src(ctx, pcopy, src); 1282 remove_src(ctx, pcopy, src); 1283 1284 dst_interval->dst = temp_interval->dst; 1285 ra_spill_ctx_insert(ctx, dst_interval); 1286 } 1287 } 1288 1289 pcopy->flags |= IR3_INSTR_UNUSED; 1290} 1291 1292static void 1293handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 1294{ 1295 init_dst(ctx, instr->dsts[0]); 1296 insert_dst(ctx, instr->dsts[0]); 1297 finish_dst(ctx, instr->dsts[0]); 1298} 1299 1300static void 1301remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 1302{ 1303 if (instr->opc == OPC_META_TEX_PREFETCH) { 1304 ra_foreach_src (src, instr) 1305 remove_src(ctx, instr, src); 1306 } 1307 if (instr->dsts[0]->flags & IR3_REG_UNUSED) 1308 remove_dst(ctx, instr->dsts[0]); 1309} 1310 1311static void 1312handle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block, 1313 struct ir3_register *def) 1314{ 1315 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1316 ra_spill_interval_init(interval, def); 1317 if (ctx->spilling) { 1318 interval->next_use_distance = 1319 ctx->blocks[block->index].next_use_start[def->name]; 1320 } 1321 1322 ra_spill_ctx_insert(ctx, interval); 1323} 1324 1325static bool 1326is_live_in_phi(struct ir3_register *def, struct ir3_block *block) 1327{ 1328 return def->instr->opc == OPC_META_PHI && def->instr->block == block; 1329} 1330 1331static bool 1332is_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def, 1333 struct ir3_block *block, unsigned pred_idx) 1334{ 1335 struct ir3_block *pred = block->predecessors[pred_idx]; 1336 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1337 if (is_live_in_phi(def, block)) { 1338 def = def->instr->srcs[pred_idx]->def; 1339 if (!def) 1340 return false; 1341 } 1342 1343 return _mesa_hash_table_search(state->remap, def); 1344} 1345 1346static bool 1347is_live_in_undef(struct ir3_register *def, 1348 struct ir3_block *block, unsigned pred_idx) 1349{ 1350 if (!is_live_in_phi(def, block)) 1351 return false; 1352 1353 return !def->instr->srcs[pred_idx]->def; 1354} 1355 1356static struct reg_or_immed * 1357read_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1358 struct ir3_block *block, unsigned pred_idx) 1359{ 1360 struct ir3_block *pred = block->predecessors[pred_idx]; 1361 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1362 1363 if (is_live_in_phi(def, block)) { 1364 def = def->instr->srcs[pred_idx]->def; 1365 if (!def) 1366 return NULL; 1367 } 1368 1369 struct hash_entry *entry = _mesa_hash_table_search(state->remap, def); 1370 if (entry) 1371 return entry->data; 1372 else 1373 return NULL; 1374} 1375 1376static bool 1377is_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def, 1378 struct ir3_block *block) 1379{ 1380 for (unsigned i = 0; i < block->predecessors_count; i++) { 1381 if (!is_live_in_pred(ctx, def, block, i)) 1382 return false; 1383 } 1384 1385 return true; 1386} 1387 1388static void 1389spill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1390 struct ir3_block *block) 1391{ 1392 for (unsigned i = 0; i < block->predecessors_count; i++) { 1393 struct ir3_block *pred = block->predecessors[i]; 1394 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1395 1396 if (!state->visited) 1397 continue; 1398 1399 struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i); 1400 if (pred_def) { 1401 spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred); 1402 } 1403 } 1404} 1405 1406static void 1407spill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block) 1408{ 1409 bool all_preds_visited = true; 1410 for (unsigned i = 0; i < block->predecessors_count; i++) { 1411 struct ir3_block *pred = block->predecessors[i]; 1412 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1413 if (!state->visited) { 1414 all_preds_visited = false; 1415 break; 1416 } 1417 } 1418 1419 /* Note: in the paper they explicitly spill live-through values first, but we 1420 * should be doing that automatically by virtue of picking the largest 1421 * distance due to the extra distance added to edges out of loops. 1422 * 1423 * TODO: Keep track of pressure in each block and preemptively spill 1424 * live-through values as described in the paper to avoid spilling them 1425 * inside the loop. 1426 */ 1427 1428 if (ctx->cur_pressure.half > ctx->limit_pressure.half) { 1429 rb_tree_foreach_safe (struct ra_spill_interval, interval, 1430 &ctx->half_live_intervals, half_node) { 1431 if (all_preds_visited && 1432 is_live_in_all_preds(ctx, interval->interval.reg, block)) 1433 continue; 1434 if (interval->interval.reg->merge_set || 1435 !interval->can_rematerialize) 1436 spill_live_in(ctx, interval->interval.reg, block); 1437 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1438 if (ctx->cur_pressure.half <= ctx->limit_pressure.half) 1439 break; 1440 } 1441 } 1442 1443 if (ctx->cur_pressure.full > ctx->limit_pressure.full) { 1444 rb_tree_foreach_safe (struct ra_spill_interval, interval, 1445 &ctx->full_live_intervals, node) { 1446 if (all_preds_visited && 1447 is_live_in_all_preds(ctx, interval->interval.reg, block)) 1448 continue; 1449 spill_live_in(ctx, interval->interval.reg, block); 1450 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1451 if (ctx->cur_pressure.full <= ctx->limit_pressure.full) 1452 break; 1453 } 1454 } 1455} 1456 1457static void 1458live_in_rewrite(struct ra_spill_ctx *ctx, 1459 struct ra_spill_interval *interval, 1460 struct reg_or_immed *new_val, 1461 struct ir3_block *block, unsigned pred_idx) 1462{ 1463 struct ir3_block *pred = block->predecessors[pred_idx]; 1464 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1465 struct ir3_register *def = interval->interval.reg; 1466 if (is_live_in_phi(def, block)) { 1467 def = def->instr->srcs[pred_idx]->def; 1468 } 1469 1470 if (def) 1471 _mesa_hash_table_insert(state->remap, def, new_val); 1472 1473 rb_tree_foreach (struct ra_spill_interval, child, 1474 &interval->interval.children, interval.node) { 1475 assert(new_val->flags & IR3_REG_SSA); 1476 struct ir3_register *child_def = 1477 extract(new_val->def, 1478 (child->interval.reg->interval_start - def->interval_start) / 1479 reg_elem_size(def), reg_elems(child->interval.reg), 1480 NULL, pred); 1481 struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed); 1482 child_val->def = child_def; 1483 child_val->flags = child_def->flags; 1484 live_in_rewrite(ctx, child, child_val, block, pred_idx); 1485 } 1486} 1487 1488static void 1489reload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1490 struct ir3_block *block) 1491{ 1492 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1493 for (unsigned i = 0; i < block->predecessors_count; i++) { 1494 struct ir3_block *pred = block->predecessors[i]; 1495 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1496 if (!state->visited) 1497 continue; 1498 1499 if (is_live_in_undef(def, block, i)) 1500 continue; 1501 1502 struct reg_or_immed *new_val = read_live_in(ctx, def, block, i); 1503 1504 if (!new_val) { 1505 new_val = ralloc(ctx, struct reg_or_immed); 1506 if (interval->can_rematerialize) 1507 new_val->def = rematerialize(def, NULL, pred); 1508 else 1509 new_val->def = reload(ctx, def, NULL, pred); 1510 new_val->flags = new_val->def->flags; 1511 } 1512 live_in_rewrite(ctx, interval, new_val, block, i); 1513 } 1514} 1515 1516static void 1517reload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block) 1518{ 1519 rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals, 1520 interval.node) { 1521 reload_live_in(ctx, interval->interval.reg, block); 1522 } 1523} 1524 1525static void 1526add_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def, 1527 struct ir3_block *block) 1528{ 1529 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1530 if (!interval->interval.inserted) 1531 return; 1532 1533 bool needs_phi = false; 1534 struct ir3_register *cur_def = NULL; 1535 for (unsigned i = 0; i < block->predecessors_count; i++) { 1536 struct ir3_block *pred = block->predecessors[i]; 1537 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1538 1539 if (!state->visited) { 1540 needs_phi = true; 1541 break; 1542 } 1543 1544 struct hash_entry *entry = 1545 _mesa_hash_table_search(state->remap, def); 1546 assert(entry); 1547 struct reg_or_immed *pred_val = entry->data; 1548 if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) || 1549 !pred_val->def || 1550 (cur_def && cur_def != pred_val->def)) { 1551 needs_phi = true; 1552 break; 1553 } 1554 cur_def = pred_val->def; 1555 } 1556 1557 if (!needs_phi) { 1558 interval->dst.def = cur_def; 1559 interval->dst.flags = cur_def->flags; 1560 return; 1561 } 1562 1563 struct ir3_instruction *phi = 1564 ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count); 1565 struct ir3_register *dst = __ssa_dst(phi); 1566 dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 1567 dst->size = def->size; 1568 dst->wrmask = def->wrmask; 1569 1570 dst->interval_start = def->interval_start; 1571 dst->interval_end = def->interval_end; 1572 dst->merge_set = def->merge_set; 1573 dst->merge_set_offset = def->merge_set_offset; 1574 1575 for (unsigned i = 0; i < block->predecessors_count; i++) { 1576 struct ir3_block *pred = block->predecessors[i]; 1577 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1578 struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags); 1579 src->size = def->size; 1580 src->wrmask = def->wrmask; 1581 1582 if (state->visited) { 1583 struct hash_entry *entry = 1584 _mesa_hash_table_search(state->remap, def); 1585 assert(entry); 1586 struct reg_or_immed *new_val = entry->data; 1587 set_src_val(src, new_val); 1588 } else { 1589 src->def = def; 1590 } 1591 } 1592 1593 interval->dst.def = dst; 1594 interval->dst.flags = dst->flags; 1595 1596 ir3_instr_move_before_block(phi, block); 1597} 1598 1599/* When spilling a block with a single predecessors, the pred may have other 1600 * successors so we can't choose what's live in and we can't spill/restore 1601 * anything. Just make the inserted intervals exactly match the predecessor. If 1602 * it wasn't live in the predecessor then it must've already been spilled. Also, 1603 * there are no phi nodes and no live-ins. 1604 */ 1605static void 1606spill_single_pred_live_in(struct ra_spill_ctx *ctx, 1607 struct ir3_block *block) 1608{ 1609 unsigned name; 1610 BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1611 ctx->live->definitions_count) { 1612 struct ir3_register *reg = ctx->live->definitions[name]; 1613 struct ra_spill_interval *interval = ctx->intervals[reg->name]; 1614 struct reg_or_immed *val = read_live_in(ctx, reg, block, 0); 1615 if (val) 1616 interval->dst = *val; 1617 else 1618 ra_spill_ctx_remove(ctx, interval); 1619 } 1620} 1621 1622static void 1623rewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi, 1624 struct ir3_block *block) 1625{ 1626 if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) { 1627 phi->flags |= IR3_INSTR_UNUSED; 1628 return; 1629 } 1630 1631 for (unsigned i = 0; i < block->predecessors_count; i++) { 1632 struct ir3_block *pred = block->predecessors[i]; 1633 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1634 1635 if (!state->visited) 1636 continue; 1637 1638 struct ir3_register *src = phi->srcs[i]; 1639 if (!src->def) 1640 continue; 1641 1642 struct hash_entry *entry = 1643 _mesa_hash_table_search(state->remap, src->def); 1644 assert(entry); 1645 struct reg_or_immed *new_val = entry->data; 1646 set_src_val(src, new_val); 1647 } 1648} 1649 1650static void 1651spill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval, 1652 struct ir3_block *block) 1653{ 1654 struct ir3_register *def = interval->interval.reg; 1655 1656 if (interval->interval.reg->merge_set || 1657 !interval->can_rematerialize) 1658 spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block); 1659 ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1660} 1661 1662static void 1663spill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1664{ 1665 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1666 rb_tree_foreach_safe (struct ra_spill_interval, interval, 1667 &ctx->reg_ctx.intervals, interval.node) { 1668 if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) { 1669 spill_live_out(ctx, interval, block); 1670 } 1671 } 1672} 1673 1674static void 1675reload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def, 1676 struct ir3_block *block) 1677{ 1678 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1679 ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); 1680 1681 reload_def(ctx, def, NULL, block); 1682} 1683 1684static void 1685reload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1686{ 1687 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1688 unsigned name; 1689 BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) { 1690 struct ir3_register *reg = ctx->live->definitions[name]; 1691 struct ra_spill_interval *interval = ctx->intervals[name]; 1692 if (!interval->interval.inserted) 1693 reload_live_out(ctx, reg, block); 1694 } 1695} 1696 1697static void 1698update_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block) 1699{ 1700 assert(!block->successors[1]); 1701 struct ir3_block *succ = block->successors[0]; 1702 unsigned pred_idx = ir3_block_get_pred_index(succ, block); 1703 1704 foreach_instr (instr, &succ->instr_list) { 1705 if (instr->opc != OPC_META_PHI) 1706 break; 1707 1708 struct ir3_register *def = instr->srcs[pred_idx]->def; 1709 if (!def) 1710 continue; 1711 1712 struct ra_spill_interval *interval = ctx->intervals[def->name]; 1713 if (!interval->interval.inserted) 1714 continue; 1715 set_src_val(instr->srcs[pred_idx], &interval->dst); 1716 } 1717} 1718 1719static void 1720record_pred_live_out(struct ra_spill_ctx *ctx, 1721 struct ra_spill_interval *interval, 1722 struct ir3_block *block, unsigned pred_idx) 1723{ 1724 struct ir3_block *pred = block->predecessors[pred_idx]; 1725 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1726 1727 struct ir3_register *def = interval->interval.reg; 1728 if (is_live_in_phi(def, block)) { 1729 def = def->instr->srcs[pred_idx]->def; 1730 } 1731 BITSET_SET(state->live_out, def->name); 1732 1733 rb_tree_foreach (struct ra_spill_interval, child, 1734 &interval->interval.children, interval.node) { 1735 record_pred_live_out(ctx, child, block, pred_idx); 1736 } 1737} 1738 1739static void 1740record_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1741{ 1742 for (unsigned i = 0; i < block->predecessors_count; i++) { 1743 struct ir3_block *pred = block->predecessors[i]; 1744 struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1745 if (state->visited) 1746 continue; 1747 1748 state->live_out = rzalloc_array(ctx, BITSET_WORD, 1749 BITSET_WORDS(ctx->live->definitions_count)); 1750 1751 1752 rb_tree_foreach (struct ra_spill_interval, interval, 1753 &ctx->reg_ctx.intervals, interval.node) { 1754 record_pred_live_out(ctx, interval, block, i); 1755 } 1756 } 1757} 1758 1759static void 1760record_live_out(struct ra_spill_ctx *ctx, 1761 struct ra_spill_block_state *state, 1762 struct ra_spill_interval *interval) 1763{ 1764 if (!(interval->dst.flags & IR3_REG_SSA) || 1765 interval->dst.def) { 1766 struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed); 1767 *val = interval->dst; 1768 _mesa_hash_table_insert(state->remap, interval->interval.reg, val); 1769 } 1770 rb_tree_foreach (struct ra_spill_interval, child, 1771 &interval->interval.children, interval.node) { 1772 record_live_out(ctx, state, child); 1773 } 1774} 1775 1776static void 1777record_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1778{ 1779 struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1780 state->remap = _mesa_pointer_hash_table_create(ctx); 1781 1782 rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals, 1783 interval.node) { 1784 record_live_out(ctx, state, interval); 1785 } 1786} 1787 1788static void 1789handle_block(struct ra_spill_ctx *ctx, struct ir3_block *block) 1790{ 1791 memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure)); 1792 rb_tree_init(&ctx->reg_ctx.intervals); 1793 rb_tree_init(&ctx->full_live_intervals); 1794 rb_tree_init(&ctx->half_live_intervals); 1795 1796 unsigned name; 1797 BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1798 ctx->live->definitions_count) { 1799 struct ir3_register *reg = ctx->live->definitions[name]; 1800 handle_live_in(ctx, block, reg); 1801 } 1802 1803 foreach_instr (instr, &block->instr_list) { 1804 if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT && 1805 instr->opc != OPC_META_TEX_PREFETCH) 1806 break; 1807 handle_input_phi(ctx, instr); 1808 } 1809 1810 if (ctx->spilling) { 1811 if (block->predecessors_count == 1) { 1812 spill_single_pred_live_in(ctx, block); 1813 } else { 1814 spill_live_ins(ctx, block); 1815 reload_live_ins(ctx, block); 1816 record_pred_live_outs(ctx, block); 1817 foreach_instr (instr, &block->instr_list) { 1818 if (instr->opc != OPC_META_PHI) 1819 break; 1820 rewrite_phi(ctx, instr, block); 1821 } 1822 BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1823 ctx->live->definitions_count) { 1824 struct ir3_register *reg = ctx->live->definitions[name]; 1825 add_live_in_phi(ctx, reg, block); 1826 } 1827 } 1828 } else { 1829 update_max_pressure(ctx); 1830 } 1831 1832 foreach_instr (instr, &block->instr_list) { 1833 di(instr, "processing"); 1834 1835 if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT || 1836 instr->opc == OPC_META_TEX_PREFETCH) 1837 remove_input_phi(ctx, instr); 1838 else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY) 1839 handle_pcopy(ctx, instr); 1840 else if (ctx->spilling && instr->opc == OPC_MOV && 1841 instr->dsts[0] == ctx->base_reg) 1842 /* skip */; 1843 else 1844 handle_instr(ctx, instr); 1845 } 1846 1847 if (ctx->spilling && block->successors[0]) { 1848 struct ra_spill_block_state *state = 1849 &ctx->blocks[block->successors[0]->index]; 1850 if (state->visited) { 1851 assert(!block->successors[1]); 1852 1853 spill_live_outs(ctx, block); 1854 reload_live_outs(ctx, block); 1855 update_live_out_phis(ctx, block); 1856 } 1857 } 1858 1859 if (ctx->spilling) { 1860 record_live_outs(ctx, block); 1861 ctx->blocks[block->index].visited = true; 1862 } 1863} 1864 1865static bool 1866simplify_phi_node(struct ir3_instruction *phi) 1867{ 1868 struct ir3_register *def = NULL; 1869 foreach_src (src, phi) { 1870 /* Ignore phi sources which point to the phi itself. */ 1871 if (src->def == phi->dsts[0]) 1872 continue; 1873 /* If it's undef or it doesn't match the previous sources, bail */ 1874 if (!src->def || (def && def != src->def)) 1875 return false; 1876 def = src->def; 1877 } 1878 1879 phi->data = def; 1880 phi->flags |= IR3_INSTR_UNUSED; 1881 return true; 1882} 1883 1884static struct ir3_register * 1885simplify_phi_def(struct ir3_register *def) 1886{ 1887 if (def->instr->opc == OPC_META_PHI) { 1888 struct ir3_instruction *phi = def->instr; 1889 1890 /* Note: this function is always called at least once after visiting the 1891 * phi, so either there has been a simplified phi in the meantime, in 1892 * which case we will set progress=true and visit the definition again, or 1893 * phi->data already has the most up-to-date value. Therefore we don't 1894 * have to recursively check phi->data. 1895 */ 1896 if (phi->data) 1897 return phi->data; 1898 } 1899 1900 return def; 1901} 1902 1903static void 1904simplify_phi_srcs(struct ir3_instruction *instr) 1905{ 1906 foreach_src (src, instr) { 1907 if (src->def) 1908 src->def = simplify_phi_def(src->def); 1909 } 1910} 1911 1912/* We insert phi nodes for all live-ins of loops in case we need to split the 1913 * live range. This pass cleans that up for the case where the live range didn't 1914 * actually need to be split. 1915 */ 1916static void 1917simplify_phi_nodes(struct ir3 *ir) 1918{ 1919 foreach_block (block, &ir->block_list) { 1920 foreach_instr (instr, &block->instr_list) { 1921 if (instr->opc != OPC_META_PHI) 1922 break; 1923 instr->data = NULL; 1924 } 1925 } 1926 1927 bool progress; 1928 do { 1929 progress = false; 1930 foreach_block (block, &ir->block_list) { 1931 foreach_instr (instr, &block->instr_list) { 1932 if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED)) 1933 continue; 1934 1935 simplify_phi_srcs(instr); 1936 } 1937 1938 /* Visit phi nodes in the sucessors to make sure that phi sources are 1939 * always visited at least once after visiting the definition they 1940 * point to. See note in simplify_phi_def() for why this is necessary. 1941 */ 1942 for (unsigned i = 0; i < 2; i++) { 1943 struct ir3_block *succ = block->successors[i]; 1944 if (!succ) 1945 continue; 1946 foreach_instr (instr, &succ->instr_list) { 1947 if (instr->opc != OPC_META_PHI) 1948 break; 1949 if (instr->flags & IR3_INSTR_UNUSED) { 1950 if (instr->data) 1951 instr->data = simplify_phi_def(instr->data); 1952 } else { 1953 simplify_phi_srcs(instr); 1954 progress |= simplify_phi_node(instr); 1955 } 1956 } 1957 } 1958 } 1959 } while (progress); 1960} 1961 1962static void 1963unmark_dead(struct ir3 *ir) 1964{ 1965 foreach_block (block, &ir->block_list) { 1966 foreach_instr (instr, &block->instr_list) { 1967 instr->flags &= ~IR3_INSTR_UNUSED; 1968 } 1969 } 1970} 1971 1972/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark 1973 * which ones are dead along the way, so there's nothing to compute here. 1974 */ 1975static void 1976cleanup_dead(struct ir3 *ir) 1977{ 1978 foreach_block (block, &ir->block_list) { 1979 foreach_instr_safe (instr, &block->instr_list) { 1980 if (instr->flags & IR3_INSTR_UNUSED) 1981 list_delinit(&instr->node); 1982 } 1983 } 1984} 1985 1986/* Deal with merge sets after spilling. Spilling generally leaves the merge sets 1987 * in a mess, and even if we properly cleaned up after ourselves, we would want 1988 * to recompute the merge sets afterward anway. That's because 1989 * spilling/reloading can "break up" phi webs and split/collect webs so that 1990 * allocating them to the same register no longer gives any benefit. For 1991 * example, imagine we have this: 1992 * 1993 * if (...) { 1994 * foo = ... 1995 * } else { 1996 * bar = ... 1997 * } 1998 * baz = phi(foo, bar) 1999 * 2000 * and we spill "baz": 2001 * 2002 * if (...) { 2003 * foo = ... 2004 * spill(foo) 2005 * } else { 2006 * bar = ... 2007 * spill(bar) 2008 * } 2009 * baz = reload() 2010 * 2011 * now foo, bar, and baz don't have to be allocated to the same register. How 2012 * exactly the merge sets change can be complicated, so it's easier just to 2013 * recompute them. 2014 * 2015 * However, there's a wrinkle in this: those same merge sets determine the 2016 * register pressure, due to multiple values inhabiting the same register! And 2017 * we assume that this sharing happens when spilling. Therefore we need a 2018 * three-step procedure: 2019 * 2020 * 1. Drop the original merge sets. 2021 * 2. Calculate which values *must* be merged, being careful to only use the 2022 * interval information which isn't trashed by spilling, and forcibly merge 2023 * them. 2024 * 3. Let ir3_merge_regs() finish the job, including recalculating the 2025 * intervals. 2026 */ 2027 2028static void 2029fixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir) 2030{ 2031 foreach_block (block, &ir->block_list) { 2032 foreach_instr (instr, &block->instr_list) { 2033 ra_foreach_dst (dst, instr) { 2034 dst->merge_set = NULL; 2035 dst->merge_set_offset = 0; 2036 } 2037 } 2038 } 2039 2040 foreach_block (block, &ir->block_list) { 2041 foreach_instr (instr, &block->instr_list) { 2042 if (instr->opc != OPC_META_SPLIT && 2043 instr->opc != OPC_META_COLLECT) 2044 continue; 2045 2046 struct ir3_register *dst = instr->dsts[0]; 2047 ra_foreach_src (src, instr) { 2048 if (!(src->flags & IR3_REG_KILL) && 2049 src->def->interval_start < dst->interval_end && 2050 dst->interval_start < src->def->interval_end) { 2051 ir3_force_merge(dst, src->def, 2052 src->def->interval_start - dst->interval_start); 2053 } 2054 } 2055 } 2056 } 2057 2058 ir3_merge_regs(live, ir); 2059} 2060 2061void 2062ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live, 2063 struct ir3_pressure *max_pressure) 2064{ 2065 struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx); 2066 spill_ctx_init(ctx, v, live); 2067 2068 foreach_block (block, &v->ir->block_list) { 2069 handle_block(ctx, block); 2070 } 2071 2072 assert(ctx->cur_pressure.full == 0); 2073 assert(ctx->cur_pressure.half == 0); 2074 assert(ctx->cur_pressure.shared == 0); 2075 2076 *max_pressure = ctx->max_pressure; 2077 ralloc_free(ctx); 2078} 2079 2080bool 2081ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v, 2082 struct ir3_liveness **live, 2083 const struct ir3_pressure *limit_pressure) 2084{ 2085 void *mem_ctx = ralloc_parent(*live); 2086 struct ra_spill_ctx *ctx = rzalloc(mem_ctx, struct ra_spill_ctx); 2087 spill_ctx_init(ctx, v, *live); 2088 2089 ctx->spilling = true; 2090 2091 ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state, 2092 ctx->live->block_count); 2093 rb_tree_init(&ctx->full_live_intervals); 2094 rb_tree_init(&ctx->half_live_intervals); 2095 2096 ctx->limit_pressure = *limit_pressure; 2097 ctx->spill_slot = v->pvtmem_size; 2098 2099 add_base_reg(ctx, ir); 2100 compute_next_distance(ctx, ir); 2101 2102 unmark_dead(ir); 2103 2104 foreach_block (block, &ir->block_list) { 2105 handle_block(ctx, block); 2106 } 2107 2108 simplify_phi_nodes(ir); 2109 2110 cleanup_dead(ir); 2111 2112 ir3_create_parallel_copies(ir); 2113 2114 /* After this point, we're done mutating the IR. Liveness has been trashed, 2115 * so recalculate it. We'll need it for recalculating the merge sets. 2116 */ 2117 ralloc_free(ctx->live); 2118 *live = ir3_calc_liveness(mem_ctx, ir); 2119 2120 fixup_merge_sets(*live, ir); 2121 2122 v->pvtmem_size = ctx->spill_slot; 2123 ralloc_free(ctx); 2124 2125 return true; 2126} 2127