1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "util/rb_tree.h" 25bf215546Sopenharmony_ci#include "ir3_ra.h" 26bf215546Sopenharmony_ci#include "ir3_shader.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* 29bf215546Sopenharmony_ci * This pass does two things: 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * 1. Calculates the maximum register pressure. To do this, we need to use the 32bf215546Sopenharmony_ci * exact same technique that RA uses for combining meta_split instructions 33bf215546Sopenharmony_ci * with their sources, so that our calculation agrees with RA. 34bf215546Sopenharmony_ci * 2. Spills when the register pressure is exceeded a limit calculated by RA. 35bf215546Sopenharmony_ci * The implementation is based on "Register Spilling and Live-Range Splitting 36bf215546Sopenharmony_ci * for SSA-Form Programs" by Braun and Hack, although again care has to be 37bf215546Sopenharmony_ci * taken to handle combining split/collect instructions. 38bf215546Sopenharmony_ci */ 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistruct reg_or_immed { 41bf215546Sopenharmony_ci unsigned flags; 42bf215546Sopenharmony_ci union { 43bf215546Sopenharmony_ci struct ir3_register *def; 44bf215546Sopenharmony_ci uint32_t uimm; 45bf215546Sopenharmony_ci unsigned const_num; 46bf215546Sopenharmony_ci }; 47bf215546Sopenharmony_ci}; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_cistruct ra_spill_interval { 50bf215546Sopenharmony_ci struct ir3_reg_interval interval; 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci struct rb_node node; 53bf215546Sopenharmony_ci struct rb_node half_node; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci /* The current SSA value/const/immed this source is mapped to. */ 56bf215546Sopenharmony_ci struct reg_or_immed dst; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci /* When computing use distances we use the distance relative to the start 59bf215546Sopenharmony_ci * of the block. So, for example, a value that's defined in cycle 5 of the 60bf215546Sopenharmony_ci * block and used 6 cycles later will always have a next_use_distance of 11 61bf215546Sopenharmony_ci * until we reach that use. 62bf215546Sopenharmony_ci */ 63bf215546Sopenharmony_ci unsigned next_use_distance; 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci /* Whether this value was reloaded and therefore doesn't need to be 66bf215546Sopenharmony_ci * spilled again. Corresponds to the S set in the paper. 67bf215546Sopenharmony_ci */ 68bf215546Sopenharmony_ci bool already_spilled; 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci /* We need to add sources early for accounting purposes, but we have to 71bf215546Sopenharmony_ci * insert the reload code for them last. Keep track of whether this interval 72bf215546Sopenharmony_ci * needs to be reloaded later. 73bf215546Sopenharmony_ci */ 74bf215546Sopenharmony_ci bool needs_reload; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci /* Keep track of whether this interval currently can't be spilled because: 77bf215546Sopenharmony_ci * - It or one of its children is a source and we're making space for 78bf215546Sopenharmony_ci * sources. 79bf215546Sopenharmony_ci * - It is a destination and we're making space for destinations. 80bf215546Sopenharmony_ci */ 81bf215546Sopenharmony_ci bool cant_spill; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci /* Whether this interval can be rematerialized. */ 84bf215546Sopenharmony_ci bool can_rematerialize; 85bf215546Sopenharmony_ci}; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_cistruct ra_spill_block_state { 88bf215546Sopenharmony_ci unsigned *next_use_end; 89bf215546Sopenharmony_ci unsigned *next_use_start; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci unsigned cycles; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci /* Map from SSA def to reg_or_immed it is mapped to at the end of the block. 94bf215546Sopenharmony_ci * This map only contains values which we didn't spill, so it also serves as 95bf215546Sopenharmony_ci * a record of the new live-out set for this block. 96bf215546Sopenharmony_ci */ 97bf215546Sopenharmony_ci struct hash_table *remap; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci /* For blocks whose successors are visited first (i.e. loop backedges), which 100bf215546Sopenharmony_ci * values should be live at the end. 101bf215546Sopenharmony_ci */ 102bf215546Sopenharmony_ci BITSET_WORD *live_out; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci bool visited; 105bf215546Sopenharmony_ci}; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistruct ra_spill_ctx { 108bf215546Sopenharmony_ci struct ir3_reg_ctx reg_ctx; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci struct ra_spill_interval **intervals; 111bf215546Sopenharmony_ci unsigned intervals_count; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci /* rb tree of live intervals that we can spill, ordered by next-use distance. 114bf215546Sopenharmony_ci * full_live_intervals contains the full+shared intervals in the merged_regs 115bf215546Sopenharmony_ci * case. We use this list to determine what to spill. 116bf215546Sopenharmony_ci */ 117bf215546Sopenharmony_ci struct rb_tree full_live_intervals; 118bf215546Sopenharmony_ci struct rb_tree half_live_intervals; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci struct ir3_pressure cur_pressure, max_pressure; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci struct ir3_pressure limit_pressure; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci /* When spilling, we need to reserve a register to serve as the zero'd 125bf215546Sopenharmony_ci * "base". For simplicity we reserve a register at the beginning so that it's 126bf215546Sopenharmony_ci * always available. 127bf215546Sopenharmony_ci */ 128bf215546Sopenharmony_ci struct ir3_register *base_reg; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci /* Current pvtmem offset in bytes. */ 131bf215546Sopenharmony_ci unsigned spill_slot; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci struct ir3_liveness *live; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci const struct ir3_compiler *compiler; 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci struct ra_spill_block_state *blocks; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci bool spilling; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci bool merged_regs; 142bf215546Sopenharmony_ci}; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_cistatic void 145bf215546Sopenharmony_ciadd_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir) 146bf215546Sopenharmony_ci{ 147bf215546Sopenharmony_ci struct ir3_block *start = ir3_start_block(ir); 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci /* We need to stick it after any meta instructions which need to be first. */ 150bf215546Sopenharmony_ci struct ir3_instruction *after = NULL; 151bf215546Sopenharmony_ci foreach_instr (instr, &start->instr_list) { 152bf215546Sopenharmony_ci if (instr->opc != OPC_META_INPUT && 153bf215546Sopenharmony_ci instr->opc != OPC_META_TEX_PREFETCH) { 154bf215546Sopenharmony_ci after = instr; 155bf215546Sopenharmony_ci break; 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci struct ir3_instruction *mov = create_immed(start, 0); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci if (after) 162bf215546Sopenharmony_ci ir3_instr_move_before(mov, after); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci ctx->base_reg = mov->dsts[0]; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci /* We don't create an interval, etc. for the base reg, so just lower the 167bf215546Sopenharmony_ci * register pressure limit to account for it. We assume it's always 168bf215546Sopenharmony_ci * available for simplicity. 169bf215546Sopenharmony_ci */ 170bf215546Sopenharmony_ci ctx->limit_pressure.full -= reg_size(ctx->base_reg); 171bf215546Sopenharmony_ci} 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci/* Compute the number of cycles per instruction used for next-use-distance 175bf215546Sopenharmony_ci * analysis. This is just approximate, obviously. 176bf215546Sopenharmony_ci */ 177bf215546Sopenharmony_cistatic unsigned 178bf215546Sopenharmony_ciinstr_cycles(struct ir3_instruction *instr) 179bf215546Sopenharmony_ci{ 180bf215546Sopenharmony_ci if (instr->opc == OPC_META_PARALLEL_COPY) { 181bf215546Sopenharmony_ci unsigned cycles = 0; 182bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->dsts_count; i++) { 183bf215546Sopenharmony_ci if (!instr->srcs[i]->def || 184bf215546Sopenharmony_ci instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) { 185bf215546Sopenharmony_ci cycles += reg_elems(instr->srcs[i]); 186bf215546Sopenharmony_ci } 187bf215546Sopenharmony_ci } 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci return cycles; 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci if (instr->opc == OPC_META_COLLECT) { 193bf215546Sopenharmony_ci unsigned cycles = 0; 194bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->srcs_count; i++) { 195bf215546Sopenharmony_ci if (!instr->srcs[i]->def || 196bf215546Sopenharmony_ci instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) { 197bf215546Sopenharmony_ci cycles++; 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci return cycles; 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci if (is_meta(instr)) 205bf215546Sopenharmony_ci return 0; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci return 1 + instr->repeat; 208bf215546Sopenharmony_ci} 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_cistatic bool 211bf215546Sopenharmony_cicompute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block, 212bf215546Sopenharmony_ci unsigned *tmp_next_use) 213bf215546Sopenharmony_ci{ 214bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[block->index]; 215bf215546Sopenharmony_ci memcpy(tmp_next_use, state->next_use_end, 216bf215546Sopenharmony_ci ctx->live->definitions_count * sizeof(*tmp_next_use)); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci unsigned cycle = state->cycles; 219bf215546Sopenharmony_ci foreach_instr_rev (instr, &block->instr_list) { 220bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 221bf215546Sopenharmony_ci dst->next_use = tmp_next_use[dst->name]; 222bf215546Sopenharmony_ci } 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 225bf215546Sopenharmony_ci src->next_use = tmp_next_use[src->def->name]; 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci cycle -= instr_cycles(instr); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci if (instr->opc == OPC_META_PARALLEL_COPY) { 231bf215546Sopenharmony_ci ra_foreach_src_n (src, i, instr) { 232bf215546Sopenharmony_ci if (src->def->merge_set == instr->dsts[i]->merge_set && 233bf215546Sopenharmony_ci src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) { 234bf215546Sopenharmony_ci tmp_next_use[src->def->name] = 235bf215546Sopenharmony_ci tmp_next_use[instr->dsts[i]->name]; 236bf215546Sopenharmony_ci } else { 237bf215546Sopenharmony_ci tmp_next_use[src->def->name] = cycle; 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci } else if (instr->opc != OPC_META_PHI) { 241bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 242bf215546Sopenharmony_ci tmp_next_use[src->def->name] = cycle; 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 247bf215546Sopenharmony_ci tmp_next_use[dst->name] = UINT_MAX; 248bf215546Sopenharmony_ci } 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci memcpy(state->next_use_start, tmp_next_use, 252bf215546Sopenharmony_ci ctx->live->definitions_count * sizeof(*tmp_next_use)); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci bool progress = false; 255bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 256bf215546Sopenharmony_ci const struct ir3_block *pred = block->predecessors[i]; 257bf215546Sopenharmony_ci struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index]; 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci /* Add a large-enough distance in front of edges exiting the loop so that 260bf215546Sopenharmony_ci * variables that are live-through the loop but not used inside it are 261bf215546Sopenharmony_ci * prioritized for spilling, as per the paper. This just needs to be 262bf215546Sopenharmony_ci * larger than the longest path through the loop. 263bf215546Sopenharmony_ci */ 264bf215546Sopenharmony_ci bool loop_exit = pred->loop_depth < block->loop_depth; 265bf215546Sopenharmony_ci unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci for (unsigned j = 0; j < ctx->live->definitions_count; j++) { 268bf215546Sopenharmony_ci if (state->next_use_start[j] < UINT_MAX && 269bf215546Sopenharmony_ci state->next_use_start[j] + block_distance < 270bf215546Sopenharmony_ci pred_state->next_use_end[j]) { 271bf215546Sopenharmony_ci pred_state->next_use_end[j] = state->next_use_start[j] + 272bf215546Sopenharmony_ci block_distance; 273bf215546Sopenharmony_ci progress = true; 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci } 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci foreach_instr (phi, &block->instr_list) { 278bf215546Sopenharmony_ci if (phi->opc != OPC_META_PHI) 279bf215546Sopenharmony_ci break; 280bf215546Sopenharmony_ci if (!phi->srcs[i]->def) 281bf215546Sopenharmony_ci continue; 282bf215546Sopenharmony_ci unsigned src = phi->srcs[i]->def->name; 283bf215546Sopenharmony_ci if (phi->dsts[0]->next_use < UINT_MAX && 284bf215546Sopenharmony_ci phi->dsts[0]->next_use + block_distance < 285bf215546Sopenharmony_ci pred_state->next_use_end[src]) { 286bf215546Sopenharmony_ci pred_state->next_use_end[src] = phi->dsts[0]->next_use + 287bf215546Sopenharmony_ci block_distance; 288bf215546Sopenharmony_ci progress = true; 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci return progress; 294bf215546Sopenharmony_ci} 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_cistatic void 297bf215546Sopenharmony_cicompute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir) 298bf215546Sopenharmony_ci{ 299bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->live->block_count; i++) { 300bf215546Sopenharmony_ci ctx->blocks[i].next_use_start = 301bf215546Sopenharmony_ci ralloc_array(ctx, unsigned, ctx->live->definitions_count); 302bf215546Sopenharmony_ci ctx->blocks[i].next_use_end = 303bf215546Sopenharmony_ci ralloc_array(ctx, unsigned, ctx->live->definitions_count); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci for (unsigned j = 0; j < ctx->live->definitions_count; j++) { 306bf215546Sopenharmony_ci ctx->blocks[i].next_use_start[j] = UINT_MAX; 307bf215546Sopenharmony_ci ctx->blocks[i].next_use_end[j] = UINT_MAX; 308bf215546Sopenharmony_ci } 309bf215546Sopenharmony_ci } 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 312bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[block->index]; 313bf215546Sopenharmony_ci state->cycles = 0; 314bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 315bf215546Sopenharmony_ci state->cycles += instr_cycles(instr); 316bf215546Sopenharmony_ci foreach_dst (dst, instr) { 317bf215546Sopenharmony_ci dst->spill_slot = ~0; 318bf215546Sopenharmony_ci } 319bf215546Sopenharmony_ci } 320bf215546Sopenharmony_ci } 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci unsigned *tmp_next_use = 323bf215546Sopenharmony_ci ralloc_array(ctx, unsigned, ctx->live->definitions_count); 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci bool progress = true; 326bf215546Sopenharmony_ci while (progress) { 327bf215546Sopenharmony_ci progress = false; 328bf215546Sopenharmony_ci foreach_block_rev (block, &ir->block_list) { 329bf215546Sopenharmony_ci progress |= compute_block_next_distance(ctx, block, tmp_next_use); 330bf215546Sopenharmony_ci } 331bf215546Sopenharmony_ci } 332bf215546Sopenharmony_ci} 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_cistatic bool 335bf215546Sopenharmony_cican_rematerialize(struct ir3_register *reg) 336bf215546Sopenharmony_ci{ 337bf215546Sopenharmony_ci if (reg->flags & IR3_REG_ARRAY) 338bf215546Sopenharmony_ci return false; 339bf215546Sopenharmony_ci if (reg->instr->opc != OPC_MOV) 340bf215546Sopenharmony_ci return false; 341bf215546Sopenharmony_ci if (!(reg->instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST))) 342bf215546Sopenharmony_ci return false; 343bf215546Sopenharmony_ci if (reg->instr->srcs[0]->flags & IR3_REG_RELATIV) 344bf215546Sopenharmony_ci return false; 345bf215546Sopenharmony_ci return true; 346bf215546Sopenharmony_ci} 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_cistatic struct ir3_register * 349bf215546Sopenharmony_cirematerialize(struct ir3_register *reg, struct ir3_instruction *after, 350bf215546Sopenharmony_ci struct ir3_block *block) 351bf215546Sopenharmony_ci{ 352bf215546Sopenharmony_ci d("rematerializing ssa_%u:%u", reg->instr->serialno, reg->name); 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci struct ir3_instruction *remat = 355bf215546Sopenharmony_ci ir3_instr_create(block, reg->instr->opc, 1, reg->instr->srcs_count); 356bf215546Sopenharmony_ci struct ir3_register *dst = __ssa_dst(remat); 357bf215546Sopenharmony_ci dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 358bf215546Sopenharmony_ci for (unsigned i = 0; i < reg->instr->srcs_count; i++) { 359bf215546Sopenharmony_ci struct ir3_register *src = 360bf215546Sopenharmony_ci ir3_src_create(remat, INVALID_REG, reg->instr->srcs[i]->flags); 361bf215546Sopenharmony_ci *src = *reg->instr->srcs[i]; 362bf215546Sopenharmony_ci } 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci remat->cat1 = reg->instr->cat1; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci dst->merge_set = reg->merge_set; 367bf215546Sopenharmony_ci dst->merge_set_offset = reg->merge_set_offset; 368bf215546Sopenharmony_ci dst->interval_start = reg->interval_start; 369bf215546Sopenharmony_ci dst->interval_end = reg->interval_end; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci if (after) 372bf215546Sopenharmony_ci ir3_instr_move_before(remat, after); 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci return dst; 375bf215546Sopenharmony_ci} 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_cistatic void 378bf215546Sopenharmony_cira_spill_interval_init(struct ra_spill_interval *interval, 379bf215546Sopenharmony_ci struct ir3_register *reg) 380bf215546Sopenharmony_ci{ 381bf215546Sopenharmony_ci ir3_reg_interval_init(&interval->interval, reg); 382bf215546Sopenharmony_ci interval->dst.flags = reg->flags; 383bf215546Sopenharmony_ci interval->dst.def = reg; 384bf215546Sopenharmony_ci interval->already_spilled = false; 385bf215546Sopenharmony_ci interval->needs_reload = false; 386bf215546Sopenharmony_ci interval->cant_spill = false; 387bf215546Sopenharmony_ci interval->can_rematerialize = can_rematerialize(reg); 388bf215546Sopenharmony_ci} 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_cistatic struct ra_spill_interval * 391bf215546Sopenharmony_ciir3_reg_interval_to_interval(struct ir3_reg_interval *interval) 392bf215546Sopenharmony_ci{ 393bf215546Sopenharmony_ci return rb_node_data(struct ra_spill_interval, interval, interval); 394bf215546Sopenharmony_ci} 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_cistatic struct ra_spill_interval * 397bf215546Sopenharmony_cira_spill_interval_root(struct ra_spill_interval *interval) 398bf215546Sopenharmony_ci{ 399bf215546Sopenharmony_ci struct ir3_reg_interval *ir3_interval = &interval->interval; 400bf215546Sopenharmony_ci while (ir3_interval->parent) 401bf215546Sopenharmony_ci ir3_interval = ir3_interval->parent; 402bf215546Sopenharmony_ci return ir3_reg_interval_to_interval(ir3_interval); 403bf215546Sopenharmony_ci} 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_cistatic struct ra_spill_ctx * 406bf215546Sopenharmony_ciir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx) 407bf215546Sopenharmony_ci{ 408bf215546Sopenharmony_ci return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx); 409bf215546Sopenharmony_ci} 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_cistatic int 412bf215546Sopenharmony_cispill_interval_cmp(const struct ra_spill_interval *a, 413bf215546Sopenharmony_ci const struct ra_spill_interval *b) 414bf215546Sopenharmony_ci{ 415bf215546Sopenharmony_ci /* Prioritize intervals that we can rematerialize. */ 416bf215546Sopenharmony_ci if (a->can_rematerialize && !b->can_rematerialize) 417bf215546Sopenharmony_ci return 1; 418bf215546Sopenharmony_ci if (!a->can_rematerialize && b->can_rematerialize) 419bf215546Sopenharmony_ci return -1; 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci return a->next_use_distance - b->next_use_distance; 422bf215546Sopenharmony_ci} 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_cistatic int 425bf215546Sopenharmony_cira_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b) 426bf215546Sopenharmony_ci{ 427bf215546Sopenharmony_ci const struct ra_spill_interval *a = 428bf215546Sopenharmony_ci rb_node_data(const struct ra_spill_interval, _a, node); 429bf215546Sopenharmony_ci const struct ra_spill_interval *b = 430bf215546Sopenharmony_ci rb_node_data(const struct ra_spill_interval, _b, node); 431bf215546Sopenharmony_ci return spill_interval_cmp(a, b); 432bf215546Sopenharmony_ci} 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_cistatic int 435bf215546Sopenharmony_cira_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b) 436bf215546Sopenharmony_ci{ 437bf215546Sopenharmony_ci const struct ra_spill_interval *a = 438bf215546Sopenharmony_ci rb_node_data(const struct ra_spill_interval, _a, half_node); 439bf215546Sopenharmony_ci const struct ra_spill_interval *b = 440bf215546Sopenharmony_ci rb_node_data(const struct ra_spill_interval, _b, half_node); 441bf215546Sopenharmony_ci return spill_interval_cmp(a, b); 442bf215546Sopenharmony_ci} 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_cistatic void 445bf215546Sopenharmony_ciinterval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval) 446bf215546Sopenharmony_ci{ 447bf215546Sopenharmony_ci struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval); 448bf215546Sopenharmony_ci struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx); 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci unsigned size = reg_size(interval->interval.reg); 451bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_SHARED) { 452bf215546Sopenharmony_ci ctx->cur_pressure.shared += size; 453bf215546Sopenharmony_ci } else { 454bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_HALF) { 455bf215546Sopenharmony_ci ctx->cur_pressure.half += size; 456bf215546Sopenharmony_ci if (ctx->spilling) { 457bf215546Sopenharmony_ci rb_tree_insert(&ctx->half_live_intervals, &interval->half_node, 458bf215546Sopenharmony_ci ra_spill_interval_half_cmp); 459bf215546Sopenharmony_ci } 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) { 462bf215546Sopenharmony_ci ctx->cur_pressure.full += size; 463bf215546Sopenharmony_ci if (ctx->spilling) { 464bf215546Sopenharmony_ci rb_tree_insert(&ctx->full_live_intervals, &interval->node, 465bf215546Sopenharmony_ci ra_spill_interval_cmp); 466bf215546Sopenharmony_ci } 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci } 469bf215546Sopenharmony_ci} 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_cistatic void 472bf215546Sopenharmony_ciinterval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval) 473bf215546Sopenharmony_ci{ 474bf215546Sopenharmony_ci struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval); 475bf215546Sopenharmony_ci struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx); 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci unsigned size = reg_size(interval->interval.reg); 478bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_SHARED) { 479bf215546Sopenharmony_ci ctx->cur_pressure.shared -= size; 480bf215546Sopenharmony_ci } else { 481bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_HALF) { 482bf215546Sopenharmony_ci ctx->cur_pressure.half -= size; 483bf215546Sopenharmony_ci if (ctx->spilling) { 484bf215546Sopenharmony_ci rb_tree_remove(&ctx->half_live_intervals, &interval->half_node); 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci } 487bf215546Sopenharmony_ci if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) { 488bf215546Sopenharmony_ci ctx->cur_pressure.full -= size; 489bf215546Sopenharmony_ci if (ctx->spilling) { 490bf215546Sopenharmony_ci rb_tree_remove(&ctx->full_live_intervals, &interval->node); 491bf215546Sopenharmony_ci } 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci} 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_cistatic void 497bf215546Sopenharmony_ciinterval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent, 498bf215546Sopenharmony_ci struct ir3_reg_interval *_child) 499bf215546Sopenharmony_ci{ 500bf215546Sopenharmony_ci interval_add(_ctx, _child); 501bf215546Sopenharmony_ci} 502bf215546Sopenharmony_ci 503bf215546Sopenharmony_cistatic void 504bf215546Sopenharmony_cispill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v, 505bf215546Sopenharmony_ci struct ir3_liveness *live) 506bf215546Sopenharmony_ci{ 507bf215546Sopenharmony_ci ctx->live = live; 508bf215546Sopenharmony_ci ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *, 509bf215546Sopenharmony_ci ctx->live->definitions_count); 510bf215546Sopenharmony_ci struct ra_spill_interval *intervals = 511bf215546Sopenharmony_ci rzalloc_array(ctx, struct ra_spill_interval, 512bf215546Sopenharmony_ci ctx->live->definitions_count); 513bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->live->definitions_count; i++) 514bf215546Sopenharmony_ci ctx->intervals[i] = &intervals[i]; 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci ctx->intervals_count = ctx->live->definitions_count; 517bf215546Sopenharmony_ci ctx->compiler = v->compiler; 518bf215546Sopenharmony_ci ctx->merged_regs = v->mergedregs; 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci rb_tree_init(&ctx->reg_ctx.intervals); 521bf215546Sopenharmony_ci ctx->reg_ctx.interval_add = interval_add; 522bf215546Sopenharmony_ci ctx->reg_ctx.interval_delete = interval_delete; 523bf215546Sopenharmony_ci ctx->reg_ctx.interval_readd = interval_readd; 524bf215546Sopenharmony_ci} 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_cistatic void 527bf215546Sopenharmony_cira_spill_ctx_insert(struct ra_spill_ctx *ctx, 528bf215546Sopenharmony_ci struct ra_spill_interval *interval) 529bf215546Sopenharmony_ci{ 530bf215546Sopenharmony_ci ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); 531bf215546Sopenharmony_ci} 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_cistatic void 534bf215546Sopenharmony_cira_spill_ctx_remove(struct ra_spill_ctx *ctx, 535bf215546Sopenharmony_ci struct ra_spill_interval *interval) 536bf215546Sopenharmony_ci{ 537bf215546Sopenharmony_ci ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval); 538bf215546Sopenharmony_ci} 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_cistatic void 541bf215546Sopenharmony_ciinit_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 542bf215546Sopenharmony_ci{ 543bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[dst->name]; 544bf215546Sopenharmony_ci ra_spill_interval_init(interval, dst); 545bf215546Sopenharmony_ci if (ctx->spilling) { 546bf215546Sopenharmony_ci interval->next_use_distance = dst->next_use; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci /* We only need to keep track of used-ness if this value may be 549bf215546Sopenharmony_ci * rematerialized. This also keeps us from nuking things that may be 550bf215546Sopenharmony_ci * in the keeps list (e.g. atomics, input splits). 551bf215546Sopenharmony_ci */ 552bf215546Sopenharmony_ci if (interval->can_rematerialize) 553bf215546Sopenharmony_ci dst->instr->flags |= IR3_INSTR_UNUSED; 554bf215546Sopenharmony_ci } 555bf215546Sopenharmony_ci} 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_cistatic void 558bf215546Sopenharmony_ciinsert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 559bf215546Sopenharmony_ci{ 560bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[dst->name]; 561bf215546Sopenharmony_ci if (interval->interval.inserted) 562bf215546Sopenharmony_ci return; 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci ra_spill_ctx_insert(ctx, interval); 565bf215546Sopenharmony_ci interval->cant_spill = true; 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci /* For precolored inputs, make sure we leave enough registers to allow for 568bf215546Sopenharmony_ci * holes in the inputs. It can happen that the binning shader has a lower 569bf215546Sopenharmony_ci * register pressure than the main shader, but the main shader decided to 570bf215546Sopenharmony_ci * add holes between the inputs which means that the binning shader has a 571bf215546Sopenharmony_ci * higher register demand. 572bf215546Sopenharmony_ci */ 573bf215546Sopenharmony_ci if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) { 574bf215546Sopenharmony_ci physreg_t physreg = ra_reg_get_physreg(dst); 575bf215546Sopenharmony_ci physreg_t max = physreg + reg_size(dst); 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_SHARED) 578bf215546Sopenharmony_ci ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max); 579bf215546Sopenharmony_ci else if (interval->interval.reg->flags & IR3_REG_HALF) 580bf215546Sopenharmony_ci ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max); 581bf215546Sopenharmony_ci else 582bf215546Sopenharmony_ci ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max); 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci} 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_cistatic void 587bf215546Sopenharmony_ciinsert_src(struct ra_spill_ctx *ctx, struct ir3_register *src) 588bf215546Sopenharmony_ci{ 589bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci if (!interval->interval.inserted) { 592bf215546Sopenharmony_ci ra_spill_ctx_insert(ctx, interval); 593bf215546Sopenharmony_ci interval->needs_reload = true; 594bf215546Sopenharmony_ci interval->already_spilled = true; 595bf215546Sopenharmony_ci } 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci ra_spill_interval_root(interval)->cant_spill = true; 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci} 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_cistatic void 602bf215546Sopenharmony_ciremove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 603bf215546Sopenharmony_ci struct ir3_register *src) 604bf215546Sopenharmony_ci{ 605bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci if (!interval->interval.inserted || interval->interval.parent || 608bf215546Sopenharmony_ci !rb_tree_is_empty(&interval->interval.children)) 609bf215546Sopenharmony_ci return; 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci ra_spill_ctx_remove(ctx, interval); 612bf215546Sopenharmony_ci} 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_cistatic void 615bf215546Sopenharmony_ciremove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 616bf215546Sopenharmony_ci struct ir3_register *src) 617bf215546Sopenharmony_ci{ 618bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci if (!interval->interval.inserted) 621bf215546Sopenharmony_ci return; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci ra_spill_ctx_remove(ctx, interval); 624bf215546Sopenharmony_ci} 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_cistatic void 627bf215546Sopenharmony_cifinish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 628bf215546Sopenharmony_ci{ 629bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[dst->name]; 630bf215546Sopenharmony_ci interval->cant_spill = false; 631bf215546Sopenharmony_ci} 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_cistatic void 634bf215546Sopenharmony_ciremove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst) 635bf215546Sopenharmony_ci{ 636bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[dst->name]; 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci if (!interval->interval.inserted) 639bf215546Sopenharmony_ci return; 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci ra_spill_ctx_remove(ctx, interval); 642bf215546Sopenharmony_ci} 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_cistatic void 645bf215546Sopenharmony_ciupdate_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src) 646bf215546Sopenharmony_ci{ 647bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci assert(interval->interval.inserted); 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ci interval->next_use_distance = src->next_use; 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci /* If this node is inserted in one of the trees, then it needs to be resorted 654bf215546Sopenharmony_ci * as its key has changed. 655bf215546Sopenharmony_ci */ 656bf215546Sopenharmony_ci if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) { 657bf215546Sopenharmony_ci if (src->flags & IR3_REG_HALF) { 658bf215546Sopenharmony_ci rb_tree_remove(&ctx->half_live_intervals, &interval->half_node); 659bf215546Sopenharmony_ci rb_tree_insert(&ctx->half_live_intervals, &interval->half_node, 660bf215546Sopenharmony_ci ra_spill_interval_half_cmp); 661bf215546Sopenharmony_ci } 662bf215546Sopenharmony_ci if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) { 663bf215546Sopenharmony_ci rb_tree_remove(&ctx->full_live_intervals, &interval->node); 664bf215546Sopenharmony_ci rb_tree_insert(&ctx->full_live_intervals, &interval->node, 665bf215546Sopenharmony_ci ra_spill_interval_cmp); 666bf215546Sopenharmony_ci } 667bf215546Sopenharmony_ci } 668bf215546Sopenharmony_ci} 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_cistatic unsigned 671bf215546Sopenharmony_ciget_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg) 672bf215546Sopenharmony_ci{ 673bf215546Sopenharmony_ci if (reg->merge_set) { 674bf215546Sopenharmony_ci if (reg->merge_set->spill_slot == ~0) { 675bf215546Sopenharmony_ci reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot, 676bf215546Sopenharmony_ci reg->merge_set->alignment); 677bf215546Sopenharmony_ci ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2; 678bf215546Sopenharmony_ci } 679bf215546Sopenharmony_ci return reg->merge_set->spill_slot + reg->merge_set_offset * 2; 680bf215546Sopenharmony_ci } else { 681bf215546Sopenharmony_ci if (reg->spill_slot == ~0) { 682bf215546Sopenharmony_ci reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg)); 683bf215546Sopenharmony_ci ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2; 684bf215546Sopenharmony_ci } 685bf215546Sopenharmony_ci return reg->spill_slot; 686bf215546Sopenharmony_ci } 687bf215546Sopenharmony_ci} 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_cistatic void 690bf215546Sopenharmony_ciset_src_val(struct ir3_register *src, const struct reg_or_immed *val) 691bf215546Sopenharmony_ci{ 692bf215546Sopenharmony_ci if (val->flags & IR3_REG_IMMED) { 693bf215546Sopenharmony_ci src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF); 694bf215546Sopenharmony_ci src->uim_val = val->uimm; 695bf215546Sopenharmony_ci src->def = NULL; 696bf215546Sopenharmony_ci } else if (val->flags & IR3_REG_CONST) { 697bf215546Sopenharmony_ci src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF); 698bf215546Sopenharmony_ci src->num = val->const_num; 699bf215546Sopenharmony_ci src->def = NULL; 700bf215546Sopenharmony_ci } else { 701bf215546Sopenharmony_ci src->def = val->def; 702bf215546Sopenharmony_ci val->def->instr->flags &= ~IR3_INSTR_UNUSED; 703bf215546Sopenharmony_ci } 704bf215546Sopenharmony_ci} 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_cistatic struct ir3_register * 707bf215546Sopenharmony_cimaterialize_pcopy_src(const struct reg_or_immed *src, 708bf215546Sopenharmony_ci struct ir3_instruction *instr, 709bf215546Sopenharmony_ci struct ir3_block *block) 710bf215546Sopenharmony_ci{ 711bf215546Sopenharmony_ci struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1); 712bf215546Sopenharmony_ci struct ir3_register *dst = __ssa_dst(mov); 713bf215546Sopenharmony_ci dst->flags |= src->flags & IR3_REG_HALF; 714bf215546Sopenharmony_ci struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags); 715bf215546Sopenharmony_ci set_src_val(mov_src, src); 716bf215546Sopenharmony_ci mov->cat1.src_type = mov->cat1.dst_type = 717bf215546Sopenharmony_ci (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci if (instr) 720bf215546Sopenharmony_ci ir3_instr_move_before(mov, instr); 721bf215546Sopenharmony_ci return dst; 722bf215546Sopenharmony_ci} 723bf215546Sopenharmony_ci 724bf215546Sopenharmony_cistatic void 725bf215546Sopenharmony_cispill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val, 726bf215546Sopenharmony_ci unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block) 727bf215546Sopenharmony_ci{ 728bf215546Sopenharmony_ci struct ir3_register *reg; 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci /* If spilling an immed/const pcopy src, we need to actually materialize it 731bf215546Sopenharmony_ci * first with a mov. 732bf215546Sopenharmony_ci */ 733bf215546Sopenharmony_ci if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) { 734bf215546Sopenharmony_ci reg = materialize_pcopy_src(val, instr, block); 735bf215546Sopenharmony_ci } else { 736bf215546Sopenharmony_ci reg = val->def; 737bf215546Sopenharmony_ci reg->instr->flags &= ~IR3_INSTR_UNUSED; 738bf215546Sopenharmony_ci } 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name, 741bf215546Sopenharmony_ci spill_slot); 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci unsigned elems = reg_elems(reg); 744bf215546Sopenharmony_ci struct ir3_instruction *spill = 745bf215546Sopenharmony_ci ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3); 746bf215546Sopenharmony_ci ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg; 747bf215546Sopenharmony_ci unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED | 748bf215546Sopenharmony_ci IR3_REG_CONST | IR3_REG_SSA | 749bf215546Sopenharmony_ci IR3_REG_ARRAY); 750bf215546Sopenharmony_ci struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags); 751bf215546Sopenharmony_ci ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems; 752bf215546Sopenharmony_ci spill->cat6.dst_offset = spill_slot; 753bf215546Sopenharmony_ci spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_ci src->def = reg; 756bf215546Sopenharmony_ci if (reg->flags & IR3_REG_ARRAY) { 757bf215546Sopenharmony_ci src->size = reg->size; 758bf215546Sopenharmony_ci src->array.id = reg->array.id; 759bf215546Sopenharmony_ci src->array.offset = 0; 760bf215546Sopenharmony_ci } else { 761bf215546Sopenharmony_ci src->wrmask = reg->wrmask; 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci if (instr) 765bf215546Sopenharmony_ci ir3_instr_move_before(spill, instr); 766bf215546Sopenharmony_ci} 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_cistatic void 769bf215546Sopenharmony_cispill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval, 770bf215546Sopenharmony_ci struct ir3_instruction *instr, struct ir3_block *block) 771bf215546Sopenharmony_ci{ 772bf215546Sopenharmony_ci if (interval->can_rematerialize && !interval->interval.reg->merge_set) 773bf215546Sopenharmony_ci return; 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg), 776bf215546Sopenharmony_ci instr, block); 777bf215546Sopenharmony_ci} 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci/* This is similar to "limit" in the paper. */ 780bf215546Sopenharmony_cistatic void 781bf215546Sopenharmony_cilimit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 782bf215546Sopenharmony_ci{ 783bf215546Sopenharmony_ci if (ctx->cur_pressure.half > ctx->limit_pressure.half) { 784bf215546Sopenharmony_ci d("cur half pressure %u exceeds %u", ctx->cur_pressure.half, 785bf215546Sopenharmony_ci ctx->limit_pressure.half); 786bf215546Sopenharmony_ci rb_tree_foreach_safe (struct ra_spill_interval, interval, 787bf215546Sopenharmony_ci &ctx->half_live_intervals, half_node) { 788bf215546Sopenharmony_ci d("trying ssa_%u:%u", interval->interval.reg->instr->serialno, 789bf215546Sopenharmony_ci interval->interval.reg->name); 790bf215546Sopenharmony_ci if (!interval->cant_spill) { 791bf215546Sopenharmony_ci if (!interval->already_spilled) 792bf215546Sopenharmony_ci spill_interval(ctx, interval, instr, instr->block); 793bf215546Sopenharmony_ci ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 794bf215546Sopenharmony_ci if (ctx->cur_pressure.half <= ctx->limit_pressure.half) 795bf215546Sopenharmony_ci break; 796bf215546Sopenharmony_ci } 797bf215546Sopenharmony_ci } 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci assert(ctx->cur_pressure.half <= ctx->limit_pressure.half); 800bf215546Sopenharmony_ci } 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci if (ctx->cur_pressure.full > ctx->limit_pressure.full) { 803bf215546Sopenharmony_ci d("cur full pressure %u exceeds %u", ctx->cur_pressure.full, 804bf215546Sopenharmony_ci ctx->limit_pressure.full); 805bf215546Sopenharmony_ci rb_tree_foreach_safe (struct ra_spill_interval, interval, 806bf215546Sopenharmony_ci &ctx->full_live_intervals, node) { 807bf215546Sopenharmony_ci d("trying ssa_%u:%u", interval->interval.reg->instr->serialno, 808bf215546Sopenharmony_ci interval->interval.reg->name); 809bf215546Sopenharmony_ci if (!interval->cant_spill) { 810bf215546Sopenharmony_ci if (!interval->already_spilled) 811bf215546Sopenharmony_ci spill_interval(ctx, interval, instr, instr->block); 812bf215546Sopenharmony_ci ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 813bf215546Sopenharmony_ci if (ctx->cur_pressure.full <= ctx->limit_pressure.full) 814bf215546Sopenharmony_ci break; 815bf215546Sopenharmony_ci } else { 816bf215546Sopenharmony_ci d("can't spill"); 817bf215546Sopenharmony_ci } 818bf215546Sopenharmony_ci } 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci assert(ctx->cur_pressure.full <= ctx->limit_pressure.full); 821bf215546Sopenharmony_ci } 822bf215546Sopenharmony_ci} 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci/* There's a corner case where we reload a value which has overlapping live 825bf215546Sopenharmony_ci * values already reloaded, either because it's the child of some other interval 826bf215546Sopenharmony_ci * that was already reloaded or some of its children have already been 827bf215546Sopenharmony_ci * reloaded. Because RA only expects overlapping source/dest intervals for meta 828bf215546Sopenharmony_ci * instructions (split/collect), and we don't want to add register pressure by 829bf215546Sopenharmony_ci * creating an entirely separate value, we need to add splits and collects to 830bf215546Sopenharmony_ci * deal with this case. These splits/collects have to also have correct merge 831bf215546Sopenharmony_ci * set information, so that it doesn't result in any actual code or register 832bf215546Sopenharmony_ci * pressure in practice. 833bf215546Sopenharmony_ci */ 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_cistatic void 836bf215546Sopenharmony_ciadd_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def, 837bf215546Sopenharmony_ci unsigned offset) 838bf215546Sopenharmony_ci{ 839bf215546Sopenharmony_ci def->merge_set = set; 840bf215546Sopenharmony_ci def->merge_set_offset = offset; 841bf215546Sopenharmony_ci def->interval_start = set->interval_start + offset; 842bf215546Sopenharmony_ci def->interval_end = set->interval_start + offset + reg_size(def); 843bf215546Sopenharmony_ci} 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_cistatic struct ir3_register * 846bf215546Sopenharmony_cisplit(struct ir3_register *def, unsigned offset, 847bf215546Sopenharmony_ci struct ir3_instruction *after, struct ir3_block *block) 848bf215546Sopenharmony_ci{ 849bf215546Sopenharmony_ci if (reg_elems(def) == 1) { 850bf215546Sopenharmony_ci assert(offset == 0); 851bf215546Sopenharmony_ci return def; 852bf215546Sopenharmony_ci } 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci assert(!(def->flags & IR3_REG_ARRAY)); 855bf215546Sopenharmony_ci assert(def->merge_set); 856bf215546Sopenharmony_ci struct ir3_instruction *split = 857bf215546Sopenharmony_ci ir3_instr_create(block, OPC_META_SPLIT, 1, 1); 858bf215546Sopenharmony_ci struct ir3_register *dst = __ssa_dst(split); 859bf215546Sopenharmony_ci dst->flags |= def->flags & IR3_REG_HALF; 860bf215546Sopenharmony_ci struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags); 861bf215546Sopenharmony_ci src->wrmask = def->wrmask; 862bf215546Sopenharmony_ci src->def = def; 863bf215546Sopenharmony_ci add_to_merge_set(def->merge_set, dst, 864bf215546Sopenharmony_ci def->merge_set_offset + offset * reg_elem_size(def)); 865bf215546Sopenharmony_ci if (after) 866bf215546Sopenharmony_ci ir3_instr_move_before(split, after); 867bf215546Sopenharmony_ci return dst; 868bf215546Sopenharmony_ci} 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_cistatic struct ir3_register * 871bf215546Sopenharmony_ciextract(struct ir3_register *parent_def, unsigned offset, unsigned elems, 872bf215546Sopenharmony_ci struct ir3_instruction *after, struct ir3_block *block) 873bf215546Sopenharmony_ci{ 874bf215546Sopenharmony_ci if (offset == 0 && elems == reg_elems(parent_def)) 875bf215546Sopenharmony_ci return parent_def; 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci struct ir3_register *srcs[elems]; 878bf215546Sopenharmony_ci for (unsigned i = 0; i < elems; i++) { 879bf215546Sopenharmony_ci srcs[i] = split(parent_def, offset + i, after, block); 880bf215546Sopenharmony_ci } 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci struct ir3_instruction *collect = 883bf215546Sopenharmony_ci ir3_instr_create(block, OPC_META_COLLECT, 1, elems); 884bf215546Sopenharmony_ci struct ir3_register *dst = __ssa_dst(collect); 885bf215546Sopenharmony_ci dst->flags |= parent_def->flags & IR3_REG_HALF; 886bf215546Sopenharmony_ci dst->wrmask = MASK(elems); 887bf215546Sopenharmony_ci add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset); 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci for (unsigned i = 0; i < elems; i++) { 890bf215546Sopenharmony_ci ir3_src_create(collect, INVALID_REG, parent_def->flags)->def = srcs[i]; 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci if (after) 894bf215546Sopenharmony_ci ir3_instr_move_before(collect, after); 895bf215546Sopenharmony_ci return dst; 896bf215546Sopenharmony_ci} 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_cistatic struct ir3_register * 899bf215546Sopenharmony_cireload(struct ra_spill_ctx *ctx, struct ir3_register *reg, 900bf215546Sopenharmony_ci struct ir3_instruction *after, struct ir3_block *block) 901bf215546Sopenharmony_ci{ 902bf215546Sopenharmony_ci unsigned spill_slot = get_spill_slot(ctx, reg); 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_ci d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name, 905bf215546Sopenharmony_ci spill_slot); 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_ci unsigned elems = reg_elems(reg); 908bf215546Sopenharmony_ci struct ir3_instruction *reload = 909bf215546Sopenharmony_ci ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3); 910bf215546Sopenharmony_ci struct ir3_register *dst = __ssa_dst(reload); 911bf215546Sopenharmony_ci dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 912bf215546Sopenharmony_ci /* The reload may be split into multiple pieces, and if the destination 913bf215546Sopenharmony_ci * overlaps with the base register then it could get clobbered before the 914bf215546Sopenharmony_ci * last ldp in the sequence. Note that we always reserve space for the base 915bf215546Sopenharmony_ci * register throughout the whole program, so effectively extending its live 916bf215546Sopenharmony_ci * range past the end of the instruction isn't a problem for our pressure 917bf215546Sopenharmony_ci * accounting. 918bf215546Sopenharmony_ci */ 919bf215546Sopenharmony_ci dst->flags |= IR3_REG_EARLY_CLOBBER; 920bf215546Sopenharmony_ci ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg; 921bf215546Sopenharmony_ci struct ir3_register *offset_reg = 922bf215546Sopenharmony_ci ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED); 923bf215546Sopenharmony_ci offset_reg->uim_val = spill_slot; 924bf215546Sopenharmony_ci ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems; 925bf215546Sopenharmony_ci reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci if (reg->flags & IR3_REG_ARRAY) { 928bf215546Sopenharmony_ci dst->array.offset = 0; 929bf215546Sopenharmony_ci dst->array.id = reg->array.id; 930bf215546Sopenharmony_ci dst->size = reg->size; 931bf215546Sopenharmony_ci } else { 932bf215546Sopenharmony_ci dst->wrmask = MASK(elems); 933bf215546Sopenharmony_ci } 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_ci dst->merge_set = reg->merge_set; 936bf215546Sopenharmony_ci dst->merge_set_offset = reg->merge_set_offset; 937bf215546Sopenharmony_ci dst->interval_start = reg->interval_start; 938bf215546Sopenharmony_ci dst->interval_end = reg->interval_end; 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_ci if (after) 941bf215546Sopenharmony_ci ir3_instr_move_before(reload, after); 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci return dst; 944bf215546Sopenharmony_ci} 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_cistatic void 947bf215546Sopenharmony_cirewrite_src_interval(struct ra_spill_ctx *ctx, 948bf215546Sopenharmony_ci struct ra_spill_interval *interval, 949bf215546Sopenharmony_ci struct ir3_register *def, 950bf215546Sopenharmony_ci struct ir3_instruction *instr, 951bf215546Sopenharmony_ci struct ir3_block *block) 952bf215546Sopenharmony_ci{ 953bf215546Sopenharmony_ci interval->dst.flags = def->flags; 954bf215546Sopenharmony_ci interval->dst.def = def; 955bf215546Sopenharmony_ci interval->needs_reload = false; 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci rb_tree_foreach (struct ra_spill_interval, child, 958bf215546Sopenharmony_ci &interval->interval.children, interval.node) { 959bf215546Sopenharmony_ci struct ir3_register *child_reg = child->interval.reg; 960bf215546Sopenharmony_ci struct ir3_register *child_def = 961bf215546Sopenharmony_ci extract(def, (child_reg->interval_start - 962bf215546Sopenharmony_ci interval->interval.reg->interval_start) / reg_elem_size(def), 963bf215546Sopenharmony_ci reg_elems(child_reg), instr, block); 964bf215546Sopenharmony_ci rewrite_src_interval(ctx, child, child_def, instr, block); 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci} 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_cistatic void 969bf215546Sopenharmony_cireload_def(struct ra_spill_ctx *ctx, struct ir3_register *def, 970bf215546Sopenharmony_ci struct ir3_instruction *instr, struct ir3_block *block) 971bf215546Sopenharmony_ci{ 972bf215546Sopenharmony_ci unsigned elems = reg_elems(def); 973bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[def->name]; 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci struct ir3_reg_interval *ir3_parent = interval->interval.parent; 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci if (ir3_parent) { 978bf215546Sopenharmony_ci struct ra_spill_interval *parent = 979bf215546Sopenharmony_ci ir3_reg_interval_to_interval(ir3_parent); 980bf215546Sopenharmony_ci if (!parent->needs_reload) { 981bf215546Sopenharmony_ci interval->dst.flags = def->flags; 982bf215546Sopenharmony_ci interval->dst.def = extract( 983bf215546Sopenharmony_ci parent->dst.def, (def->interval_start - parent->dst.def->interval_start) / 984bf215546Sopenharmony_ci reg_elem_size(def), elems, instr, block); 985bf215546Sopenharmony_ci return; 986bf215546Sopenharmony_ci } 987bf215546Sopenharmony_ci } 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci struct ir3_register *dst; 990bf215546Sopenharmony_ci if (interval->can_rematerialize) 991bf215546Sopenharmony_ci dst = rematerialize(def, instr, block); 992bf215546Sopenharmony_ci else 993bf215546Sopenharmony_ci dst = reload(ctx, def, instr, block); 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_ci rewrite_src_interval(ctx, interval, dst, instr, block); 996bf215546Sopenharmony_ci} 997bf215546Sopenharmony_ci 998bf215546Sopenharmony_cistatic void 999bf215546Sopenharmony_cireload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 1000bf215546Sopenharmony_ci struct ir3_register *src) 1001bf215546Sopenharmony_ci{ 1002bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci if (interval->needs_reload) { 1005bf215546Sopenharmony_ci reload_def(ctx, src->def, instr, instr->block); 1006bf215546Sopenharmony_ci } 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci ra_spill_interval_root(interval)->cant_spill = false; 1009bf215546Sopenharmony_ci} 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_cistatic void 1012bf215546Sopenharmony_cirewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr, 1013bf215546Sopenharmony_ci struct ir3_register *src) 1014bf215546Sopenharmony_ci{ 1015bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[src->def->name]; 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci set_src_val(src, &interval->dst); 1018bf215546Sopenharmony_ci} 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_cistatic void 1021bf215546Sopenharmony_ciupdate_max_pressure(struct ra_spill_ctx *ctx) 1022bf215546Sopenharmony_ci{ 1023bf215546Sopenharmony_ci d("pressure:"); 1024bf215546Sopenharmony_ci d("\tfull: %u", ctx->cur_pressure.full); 1025bf215546Sopenharmony_ci d("\thalf: %u", ctx->cur_pressure.half); 1026bf215546Sopenharmony_ci d("\tshared: %u", ctx->cur_pressure.shared); 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci ctx->max_pressure.full = 1029bf215546Sopenharmony_ci MAX2(ctx->max_pressure.full, ctx->cur_pressure.full); 1030bf215546Sopenharmony_ci ctx->max_pressure.half = 1031bf215546Sopenharmony_ci MAX2(ctx->max_pressure.half, ctx->cur_pressure.half); 1032bf215546Sopenharmony_ci ctx->max_pressure.shared = 1033bf215546Sopenharmony_ci MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared); 1034bf215546Sopenharmony_ci} 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_cistatic void 1037bf215546Sopenharmony_cihandle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 1038bf215546Sopenharmony_ci{ 1039bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 1040bf215546Sopenharmony_ci init_dst(ctx, dst); 1041bf215546Sopenharmony_ci } 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci if (ctx->spilling) { 1044bf215546Sopenharmony_ci ra_foreach_src (src, instr) 1045bf215546Sopenharmony_ci insert_src(ctx, src); 1046bf215546Sopenharmony_ci } 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci /* Handle tied and early-kill destinations. If a destination is tied to a 1049bf215546Sopenharmony_ci * source and that source is live-through, then we need to allocate a new 1050bf215546Sopenharmony_ci * register for the destination which is live-through itself and cannot 1051bf215546Sopenharmony_ci * overlap the sources. Similarly early-kill destinations cannot overlap 1052bf215546Sopenharmony_ci * sources. 1053bf215546Sopenharmony_ci */ 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 1056bf215546Sopenharmony_ci struct ir3_register *tied_src = dst->tied; 1057bf215546Sopenharmony_ci if ((tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL)) || 1058bf215546Sopenharmony_ci (dst->flags & IR3_REG_EARLY_CLOBBER)) 1059bf215546Sopenharmony_ci insert_dst(ctx, dst); 1060bf215546Sopenharmony_ci } 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci if (ctx->spilling) 1063bf215546Sopenharmony_ci limit(ctx, instr); 1064bf215546Sopenharmony_ci else 1065bf215546Sopenharmony_ci update_max_pressure(ctx); 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_ci if (ctx->spilling) { 1068bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1069bf215546Sopenharmony_ci reload_src(ctx, instr, src); 1070bf215546Sopenharmony_ci update_src_next_use(ctx, src); 1071bf215546Sopenharmony_ci } 1072bf215546Sopenharmony_ci } 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1075bf215546Sopenharmony_ci if (src->flags & IR3_REG_FIRST_KILL) 1076bf215546Sopenharmony_ci remove_src_early(ctx, instr, src); 1077bf215546Sopenharmony_ci } 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 1080bf215546Sopenharmony_ci insert_dst(ctx, dst); 1081bf215546Sopenharmony_ci } 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci if (ctx->spilling) 1084bf215546Sopenharmony_ci limit(ctx, instr); 1085bf215546Sopenharmony_ci else 1086bf215546Sopenharmony_ci update_max_pressure(ctx); 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci /* We have to remove sources before rewriting them so that we can lookup the 1089bf215546Sopenharmony_ci * interval to remove before the source itself is changed. 1090bf215546Sopenharmony_ci */ 1091bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1092bf215546Sopenharmony_ci if (src->flags & IR3_REG_FIRST_KILL) 1093bf215546Sopenharmony_ci remove_src(ctx, instr, src); 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci if (ctx->spilling) { 1097bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1098bf215546Sopenharmony_ci rewrite_src(ctx, instr, src); 1099bf215546Sopenharmony_ci } 1100bf215546Sopenharmony_ci } 1101bf215546Sopenharmony_ci 1102bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 1103bf215546Sopenharmony_ci finish_dst(ctx, dst); 1104bf215546Sopenharmony_ci } 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->dsts_count; i++) { 1107bf215546Sopenharmony_ci if (ra_reg_is_dst(instr->dsts[i]) && 1108bf215546Sopenharmony_ci (instr->dsts[i]->flags & IR3_REG_UNUSED)) 1109bf215546Sopenharmony_ci remove_dst(ctx, instr->dsts[i]); 1110bf215546Sopenharmony_ci } 1111bf215546Sopenharmony_ci} 1112bf215546Sopenharmony_ci 1113bf215546Sopenharmony_cistatic struct ra_spill_interval * 1114bf215546Sopenharmony_cicreate_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def) 1115bf215546Sopenharmony_ci{ 1116bf215546Sopenharmony_ci unsigned name = ctx->intervals_count++; 1117bf215546Sopenharmony_ci unsigned offset = ctx->live->interval_offset; 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci /* This is kinda hacky, but we need to create a fake SSA def here that is 1120bf215546Sopenharmony_ci * only used as part of the pcopy accounting. See below. 1121bf215546Sopenharmony_ci */ 1122bf215546Sopenharmony_ci struct ir3_register *reg = rzalloc(ctx, struct ir3_register); 1123bf215546Sopenharmony_ci *reg = *def; 1124bf215546Sopenharmony_ci reg->name = name; 1125bf215546Sopenharmony_ci reg->interval_start = offset; 1126bf215546Sopenharmony_ci reg->interval_end = offset + reg_size(def); 1127bf215546Sopenharmony_ci reg->merge_set = NULL; 1128bf215546Sopenharmony_ci 1129bf215546Sopenharmony_ci ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *, 1130bf215546Sopenharmony_ci ctx->intervals_count); 1131bf215546Sopenharmony_ci struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval); 1132bf215546Sopenharmony_ci ra_spill_interval_init(interval, reg); 1133bf215546Sopenharmony_ci ctx->intervals[name] = interval; 1134bf215546Sopenharmony_ci ctx->live->interval_offset += reg_size(def); 1135bf215546Sopenharmony_ci return interval; 1136bf215546Sopenharmony_ci} 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci/* In the sequence of copies generated (see below), would this source be killed? 1139bf215546Sopenharmony_ci */ 1140bf215546Sopenharmony_cistatic bool 1141bf215546Sopenharmony_ciis_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n) 1142bf215546Sopenharmony_ci{ 1143bf215546Sopenharmony_ci struct ir3_register *src = pcopy->srcs[src_n]; 1144bf215546Sopenharmony_ci if (!(src->flags & IR3_REG_KILL)) 1145bf215546Sopenharmony_ci return false; 1146bf215546Sopenharmony_ci for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) { 1147bf215546Sopenharmony_ci if (pcopy->srcs[j]->def == src->def) 1148bf215546Sopenharmony_ci return false; 1149bf215546Sopenharmony_ci } 1150bf215546Sopenharmony_ci return true; 1151bf215546Sopenharmony_ci} 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci/* Parallel copies are different from normal instructions. The sources together 1154bf215546Sopenharmony_ci * may be larger than the entire register file, so we cannot just reload every 1155bf215546Sopenharmony_ci * source like normal, and indeed that probably wouldn't be a great idea. 1156bf215546Sopenharmony_ci * Instead we essentially need to lower the parallel copy to "copies," just like 1157bf215546Sopenharmony_ci * in the normal CSSA construction, although we implement the copies by 1158bf215546Sopenharmony_ci * reloading and then possibly spilling values. We essentially just shuffle 1159bf215546Sopenharmony_ci * around the sources until each source either (a) is live or (b) has the same 1160bf215546Sopenharmony_ci * spill slot as its corresponding destination. We do this by decomposing the 1161bf215546Sopenharmony_ci * copy into a series of copies, so: 1162bf215546Sopenharmony_ci * 1163bf215546Sopenharmony_ci * a, b, c = d, e, f 1164bf215546Sopenharmony_ci * 1165bf215546Sopenharmony_ci * becomes: 1166bf215546Sopenharmony_ci * 1167bf215546Sopenharmony_ci * d' = d 1168bf215546Sopenharmony_ci * e' = e 1169bf215546Sopenharmony_ci * f' = f 1170bf215546Sopenharmony_ci * a = d' 1171bf215546Sopenharmony_ci * b = e' 1172bf215546Sopenharmony_ci * c = f' 1173bf215546Sopenharmony_ci * 1174bf215546Sopenharmony_ci * the temporary SSA values d', e', and f' never actually show up in the result. 1175bf215546Sopenharmony_ci * They are only used for our internal accounting. They may, however, have their 1176bf215546Sopenharmony_ci * own spill slot created for them. Similarly, we don't actually emit any copy 1177bf215546Sopenharmony_ci * instructions, although we emit the spills/reloads that *would've* been 1178bf215546Sopenharmony_ci * required if those copies were there. 1179bf215546Sopenharmony_ci * 1180bf215546Sopenharmony_ci * TODO: in order to reduce the number of temporaries and therefore spill slots, 1181bf215546Sopenharmony_ci * we could instead do a more complicated analysis that considers the location 1182bf215546Sopenharmony_ci * transfer graph. 1183bf215546Sopenharmony_ci * 1184bf215546Sopenharmony_ci * In addition, we actually remove the parallel copy and rewrite all its uses 1185bf215546Sopenharmony_ci * (in the phi nodes) rather than rewrite its sources at the end. Recreating it 1186bf215546Sopenharmony_ci * later turns out to be easier than keeping it up-to-date throughout this pass, 1187bf215546Sopenharmony_ci * since we may have to remove entries for phi sources that are spilled and add 1188bf215546Sopenharmony_ci * entries for live-outs that are spilled and reloaded, which can happen here 1189bf215546Sopenharmony_ci * and then possibly be undone or done again when processing live-ins of the 1190bf215546Sopenharmony_ci * successor block. 1191bf215546Sopenharmony_ci */ 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_cistatic void 1194bf215546Sopenharmony_cihandle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy) 1195bf215546Sopenharmony_ci{ 1196bf215546Sopenharmony_ci foreach_dst (dst, pcopy) { 1197bf215546Sopenharmony_ci struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1198bf215546Sopenharmony_ci ra_spill_interval_init(dst_interval, dst); 1199bf215546Sopenharmony_ci } 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci foreach_src_n (src, i, pcopy) { 1202bf215546Sopenharmony_ci d("processing src %u", i); 1203bf215546Sopenharmony_ci struct ir3_register *dst = pcopy->dsts[i]; 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci /* Skip the intermediate copy for cases where the source is merged with 1206bf215546Sopenharmony_ci * the destination. Crucially this means that we also don't reload/spill 1207bf215546Sopenharmony_ci * it if it's been spilled, because it shares the same spill slot. 1208bf215546Sopenharmony_ci */ 1209bf215546Sopenharmony_ci if (src->def && src->def->merge_set && 1210bf215546Sopenharmony_ci src->def->merge_set == dst->merge_set && 1211bf215546Sopenharmony_ci src->def->merge_set_offset == dst->merge_set_offset) { 1212bf215546Sopenharmony_ci struct ra_spill_interval *src_interval = ctx->intervals[src->def->name]; 1213bf215546Sopenharmony_ci struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1214bf215546Sopenharmony_ci if (src_interval->interval.inserted) { 1215bf215546Sopenharmony_ci update_src_next_use(ctx, src); 1216bf215546Sopenharmony_ci if (is_last_pcopy_src(pcopy, i)) 1217bf215546Sopenharmony_ci ra_spill_ctx_remove(ctx, src_interval); 1218bf215546Sopenharmony_ci dst_interval->cant_spill = true; 1219bf215546Sopenharmony_ci ra_spill_ctx_insert(ctx, dst_interval); 1220bf215546Sopenharmony_ci limit(ctx, pcopy); 1221bf215546Sopenharmony_ci dst_interval->cant_spill = false; 1222bf215546Sopenharmony_ci dst_interval->dst = src_interval->dst; 1223bf215546Sopenharmony_ci } 1224bf215546Sopenharmony_ci } else if (src->def) { 1225bf215546Sopenharmony_ci struct ra_spill_interval *temp_interval = 1226bf215546Sopenharmony_ci create_temp_interval(ctx, dst); 1227bf215546Sopenharmony_ci struct ir3_register *temp = temp_interval->interval.reg; 1228bf215546Sopenharmony_ci temp_interval->next_use_distance = src->next_use; 1229bf215546Sopenharmony_ci 1230bf215546Sopenharmony_ci insert_src(ctx, src); 1231bf215546Sopenharmony_ci limit(ctx, pcopy); 1232bf215546Sopenharmony_ci reload_src(ctx, pcopy, src); 1233bf215546Sopenharmony_ci update_src_next_use(ctx, src); 1234bf215546Sopenharmony_ci if (is_last_pcopy_src(pcopy, i)) 1235bf215546Sopenharmony_ci remove_src(ctx, pcopy, src); 1236bf215546Sopenharmony_ci struct ra_spill_interval *src_interval = 1237bf215546Sopenharmony_ci ctx->intervals[src->def->name]; 1238bf215546Sopenharmony_ci temp_interval->dst = src_interval->dst; 1239bf215546Sopenharmony_ci 1240bf215546Sopenharmony_ci temp_interval->cant_spill = true; 1241bf215546Sopenharmony_ci ra_spill_ctx_insert(ctx, temp_interval); 1242bf215546Sopenharmony_ci limit(ctx, pcopy); 1243bf215546Sopenharmony_ci temp_interval->cant_spill = false; 1244bf215546Sopenharmony_ci 1245bf215546Sopenharmony_ci src->flags = temp->flags; 1246bf215546Sopenharmony_ci src->def = temp; 1247bf215546Sopenharmony_ci } 1248bf215546Sopenharmony_ci } 1249bf215546Sopenharmony_ci 1250bf215546Sopenharmony_ci d("done with pcopy srcs"); 1251bf215546Sopenharmony_ci 1252bf215546Sopenharmony_ci foreach_src_n (src, i, pcopy) { 1253bf215546Sopenharmony_ci struct ir3_register *dst = pcopy->dsts[i]; 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci if (src->def && src->def->merge_set && 1256bf215546Sopenharmony_ci src->def->merge_set == dst->merge_set && 1257bf215546Sopenharmony_ci src->def->merge_set_offset == dst->merge_set_offset) 1258bf215546Sopenharmony_ci continue; 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_ci struct ra_spill_interval *dst_interval = ctx->intervals[dst->name]; 1261bf215546Sopenharmony_ci 1262bf215546Sopenharmony_ci if (!src->def) { 1263bf215546Sopenharmony_ci dst_interval->cant_spill = true; 1264bf215546Sopenharmony_ci ra_spill_ctx_insert(ctx, dst_interval); 1265bf215546Sopenharmony_ci limit(ctx, pcopy); 1266bf215546Sopenharmony_ci dst_interval->cant_spill = false; 1267bf215546Sopenharmony_ci 1268bf215546Sopenharmony_ci assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED)); 1269bf215546Sopenharmony_ci if (src->flags & IR3_REG_CONST) { 1270bf215546Sopenharmony_ci dst_interval->dst.flags = src->flags; 1271bf215546Sopenharmony_ci dst_interval->dst.const_num = src->num; 1272bf215546Sopenharmony_ci } else { 1273bf215546Sopenharmony_ci dst_interval->dst.flags = src->flags; 1274bf215546Sopenharmony_ci dst_interval->dst.uimm = src->uim_val; 1275bf215546Sopenharmony_ci } 1276bf215546Sopenharmony_ci } else { 1277bf215546Sopenharmony_ci struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name]; 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_ci insert_src(ctx, src); 1280bf215546Sopenharmony_ci limit(ctx, pcopy); 1281bf215546Sopenharmony_ci reload_src(ctx, pcopy, src); 1282bf215546Sopenharmony_ci remove_src(ctx, pcopy, src); 1283bf215546Sopenharmony_ci 1284bf215546Sopenharmony_ci dst_interval->dst = temp_interval->dst; 1285bf215546Sopenharmony_ci ra_spill_ctx_insert(ctx, dst_interval); 1286bf215546Sopenharmony_ci } 1287bf215546Sopenharmony_ci } 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci pcopy->flags |= IR3_INSTR_UNUSED; 1290bf215546Sopenharmony_ci} 1291bf215546Sopenharmony_ci 1292bf215546Sopenharmony_cistatic void 1293bf215546Sopenharmony_cihandle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 1294bf215546Sopenharmony_ci{ 1295bf215546Sopenharmony_ci init_dst(ctx, instr->dsts[0]); 1296bf215546Sopenharmony_ci insert_dst(ctx, instr->dsts[0]); 1297bf215546Sopenharmony_ci finish_dst(ctx, instr->dsts[0]); 1298bf215546Sopenharmony_ci} 1299bf215546Sopenharmony_ci 1300bf215546Sopenharmony_cistatic void 1301bf215546Sopenharmony_ciremove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr) 1302bf215546Sopenharmony_ci{ 1303bf215546Sopenharmony_ci if (instr->opc == OPC_META_TEX_PREFETCH) { 1304bf215546Sopenharmony_ci ra_foreach_src (src, instr) 1305bf215546Sopenharmony_ci remove_src(ctx, instr, src); 1306bf215546Sopenharmony_ci } 1307bf215546Sopenharmony_ci if (instr->dsts[0]->flags & IR3_REG_UNUSED) 1308bf215546Sopenharmony_ci remove_dst(ctx, instr->dsts[0]); 1309bf215546Sopenharmony_ci} 1310bf215546Sopenharmony_ci 1311bf215546Sopenharmony_cistatic void 1312bf215546Sopenharmony_cihandle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block, 1313bf215546Sopenharmony_ci struct ir3_register *def) 1314bf215546Sopenharmony_ci{ 1315bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[def->name]; 1316bf215546Sopenharmony_ci ra_spill_interval_init(interval, def); 1317bf215546Sopenharmony_ci if (ctx->spilling) { 1318bf215546Sopenharmony_ci interval->next_use_distance = 1319bf215546Sopenharmony_ci ctx->blocks[block->index].next_use_start[def->name]; 1320bf215546Sopenharmony_ci } 1321bf215546Sopenharmony_ci 1322bf215546Sopenharmony_ci ra_spill_ctx_insert(ctx, interval); 1323bf215546Sopenharmony_ci} 1324bf215546Sopenharmony_ci 1325bf215546Sopenharmony_cistatic bool 1326bf215546Sopenharmony_ciis_live_in_phi(struct ir3_register *def, struct ir3_block *block) 1327bf215546Sopenharmony_ci{ 1328bf215546Sopenharmony_ci return def->instr->opc == OPC_META_PHI && def->instr->block == block; 1329bf215546Sopenharmony_ci} 1330bf215546Sopenharmony_ci 1331bf215546Sopenharmony_cistatic bool 1332bf215546Sopenharmony_ciis_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def, 1333bf215546Sopenharmony_ci struct ir3_block *block, unsigned pred_idx) 1334bf215546Sopenharmony_ci{ 1335bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[pred_idx]; 1336bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1337bf215546Sopenharmony_ci if (is_live_in_phi(def, block)) { 1338bf215546Sopenharmony_ci def = def->instr->srcs[pred_idx]->def; 1339bf215546Sopenharmony_ci if (!def) 1340bf215546Sopenharmony_ci return false; 1341bf215546Sopenharmony_ci } 1342bf215546Sopenharmony_ci 1343bf215546Sopenharmony_ci return _mesa_hash_table_search(state->remap, def); 1344bf215546Sopenharmony_ci} 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_cistatic bool 1347bf215546Sopenharmony_ciis_live_in_undef(struct ir3_register *def, 1348bf215546Sopenharmony_ci struct ir3_block *block, unsigned pred_idx) 1349bf215546Sopenharmony_ci{ 1350bf215546Sopenharmony_ci if (!is_live_in_phi(def, block)) 1351bf215546Sopenharmony_ci return false; 1352bf215546Sopenharmony_ci 1353bf215546Sopenharmony_ci return !def->instr->srcs[pred_idx]->def; 1354bf215546Sopenharmony_ci} 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_cistatic struct reg_or_immed * 1357bf215546Sopenharmony_ciread_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1358bf215546Sopenharmony_ci struct ir3_block *block, unsigned pred_idx) 1359bf215546Sopenharmony_ci{ 1360bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[pred_idx]; 1361bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_ci if (is_live_in_phi(def, block)) { 1364bf215546Sopenharmony_ci def = def->instr->srcs[pred_idx]->def; 1365bf215546Sopenharmony_ci if (!def) 1366bf215546Sopenharmony_ci return NULL; 1367bf215546Sopenharmony_ci } 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search(state->remap, def); 1370bf215546Sopenharmony_ci if (entry) 1371bf215546Sopenharmony_ci return entry->data; 1372bf215546Sopenharmony_ci else 1373bf215546Sopenharmony_ci return NULL; 1374bf215546Sopenharmony_ci} 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_cistatic bool 1377bf215546Sopenharmony_ciis_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def, 1378bf215546Sopenharmony_ci struct ir3_block *block) 1379bf215546Sopenharmony_ci{ 1380bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 1381bf215546Sopenharmony_ci if (!is_live_in_pred(ctx, def, block, i)) 1382bf215546Sopenharmony_ci return false; 1383bf215546Sopenharmony_ci } 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci return true; 1386bf215546Sopenharmony_ci} 1387bf215546Sopenharmony_ci 1388bf215546Sopenharmony_cistatic void 1389bf215546Sopenharmony_cispill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1390bf215546Sopenharmony_ci struct ir3_block *block) 1391bf215546Sopenharmony_ci{ 1392bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 1393bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[i]; 1394bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1395bf215546Sopenharmony_ci 1396bf215546Sopenharmony_ci if (!state->visited) 1397bf215546Sopenharmony_ci continue; 1398bf215546Sopenharmony_ci 1399bf215546Sopenharmony_ci struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i); 1400bf215546Sopenharmony_ci if (pred_def) { 1401bf215546Sopenharmony_ci spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred); 1402bf215546Sopenharmony_ci } 1403bf215546Sopenharmony_ci } 1404bf215546Sopenharmony_ci} 1405bf215546Sopenharmony_ci 1406bf215546Sopenharmony_cistatic void 1407bf215546Sopenharmony_cispill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block) 1408bf215546Sopenharmony_ci{ 1409bf215546Sopenharmony_ci bool all_preds_visited = true; 1410bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 1411bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[i]; 1412bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1413bf215546Sopenharmony_ci if (!state->visited) { 1414bf215546Sopenharmony_ci all_preds_visited = false; 1415bf215546Sopenharmony_ci break; 1416bf215546Sopenharmony_ci } 1417bf215546Sopenharmony_ci } 1418bf215546Sopenharmony_ci 1419bf215546Sopenharmony_ci /* Note: in the paper they explicitly spill live-through values first, but we 1420bf215546Sopenharmony_ci * should be doing that automatically by virtue of picking the largest 1421bf215546Sopenharmony_ci * distance due to the extra distance added to edges out of loops. 1422bf215546Sopenharmony_ci * 1423bf215546Sopenharmony_ci * TODO: Keep track of pressure in each block and preemptively spill 1424bf215546Sopenharmony_ci * live-through values as described in the paper to avoid spilling them 1425bf215546Sopenharmony_ci * inside the loop. 1426bf215546Sopenharmony_ci */ 1427bf215546Sopenharmony_ci 1428bf215546Sopenharmony_ci if (ctx->cur_pressure.half > ctx->limit_pressure.half) { 1429bf215546Sopenharmony_ci rb_tree_foreach_safe (struct ra_spill_interval, interval, 1430bf215546Sopenharmony_ci &ctx->half_live_intervals, half_node) { 1431bf215546Sopenharmony_ci if (all_preds_visited && 1432bf215546Sopenharmony_ci is_live_in_all_preds(ctx, interval->interval.reg, block)) 1433bf215546Sopenharmony_ci continue; 1434bf215546Sopenharmony_ci if (interval->interval.reg->merge_set || 1435bf215546Sopenharmony_ci !interval->can_rematerialize) 1436bf215546Sopenharmony_ci spill_live_in(ctx, interval->interval.reg, block); 1437bf215546Sopenharmony_ci ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1438bf215546Sopenharmony_ci if (ctx->cur_pressure.half <= ctx->limit_pressure.half) 1439bf215546Sopenharmony_ci break; 1440bf215546Sopenharmony_ci } 1441bf215546Sopenharmony_ci } 1442bf215546Sopenharmony_ci 1443bf215546Sopenharmony_ci if (ctx->cur_pressure.full > ctx->limit_pressure.full) { 1444bf215546Sopenharmony_ci rb_tree_foreach_safe (struct ra_spill_interval, interval, 1445bf215546Sopenharmony_ci &ctx->full_live_intervals, node) { 1446bf215546Sopenharmony_ci if (all_preds_visited && 1447bf215546Sopenharmony_ci is_live_in_all_preds(ctx, interval->interval.reg, block)) 1448bf215546Sopenharmony_ci continue; 1449bf215546Sopenharmony_ci spill_live_in(ctx, interval->interval.reg, block); 1450bf215546Sopenharmony_ci ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1451bf215546Sopenharmony_ci if (ctx->cur_pressure.full <= ctx->limit_pressure.full) 1452bf215546Sopenharmony_ci break; 1453bf215546Sopenharmony_ci } 1454bf215546Sopenharmony_ci } 1455bf215546Sopenharmony_ci} 1456bf215546Sopenharmony_ci 1457bf215546Sopenharmony_cistatic void 1458bf215546Sopenharmony_cilive_in_rewrite(struct ra_spill_ctx *ctx, 1459bf215546Sopenharmony_ci struct ra_spill_interval *interval, 1460bf215546Sopenharmony_ci struct reg_or_immed *new_val, 1461bf215546Sopenharmony_ci struct ir3_block *block, unsigned pred_idx) 1462bf215546Sopenharmony_ci{ 1463bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[pred_idx]; 1464bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1465bf215546Sopenharmony_ci struct ir3_register *def = interval->interval.reg; 1466bf215546Sopenharmony_ci if (is_live_in_phi(def, block)) { 1467bf215546Sopenharmony_ci def = def->instr->srcs[pred_idx]->def; 1468bf215546Sopenharmony_ci } 1469bf215546Sopenharmony_ci 1470bf215546Sopenharmony_ci if (def) 1471bf215546Sopenharmony_ci _mesa_hash_table_insert(state->remap, def, new_val); 1472bf215546Sopenharmony_ci 1473bf215546Sopenharmony_ci rb_tree_foreach (struct ra_spill_interval, child, 1474bf215546Sopenharmony_ci &interval->interval.children, interval.node) { 1475bf215546Sopenharmony_ci assert(new_val->flags & IR3_REG_SSA); 1476bf215546Sopenharmony_ci struct ir3_register *child_def = 1477bf215546Sopenharmony_ci extract(new_val->def, 1478bf215546Sopenharmony_ci (child->interval.reg->interval_start - def->interval_start) / 1479bf215546Sopenharmony_ci reg_elem_size(def), reg_elems(child->interval.reg), 1480bf215546Sopenharmony_ci NULL, pred); 1481bf215546Sopenharmony_ci struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed); 1482bf215546Sopenharmony_ci child_val->def = child_def; 1483bf215546Sopenharmony_ci child_val->flags = child_def->flags; 1484bf215546Sopenharmony_ci live_in_rewrite(ctx, child, child_val, block, pred_idx); 1485bf215546Sopenharmony_ci } 1486bf215546Sopenharmony_ci} 1487bf215546Sopenharmony_ci 1488bf215546Sopenharmony_cistatic void 1489bf215546Sopenharmony_cireload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def, 1490bf215546Sopenharmony_ci struct ir3_block *block) 1491bf215546Sopenharmony_ci{ 1492bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[def->name]; 1493bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 1494bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[i]; 1495bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1496bf215546Sopenharmony_ci if (!state->visited) 1497bf215546Sopenharmony_ci continue; 1498bf215546Sopenharmony_ci 1499bf215546Sopenharmony_ci if (is_live_in_undef(def, block, i)) 1500bf215546Sopenharmony_ci continue; 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci struct reg_or_immed *new_val = read_live_in(ctx, def, block, i); 1503bf215546Sopenharmony_ci 1504bf215546Sopenharmony_ci if (!new_val) { 1505bf215546Sopenharmony_ci new_val = ralloc(ctx, struct reg_or_immed); 1506bf215546Sopenharmony_ci if (interval->can_rematerialize) 1507bf215546Sopenharmony_ci new_val->def = rematerialize(def, NULL, pred); 1508bf215546Sopenharmony_ci else 1509bf215546Sopenharmony_ci new_val->def = reload(ctx, def, NULL, pred); 1510bf215546Sopenharmony_ci new_val->flags = new_val->def->flags; 1511bf215546Sopenharmony_ci } 1512bf215546Sopenharmony_ci live_in_rewrite(ctx, interval, new_val, block, i); 1513bf215546Sopenharmony_ci } 1514bf215546Sopenharmony_ci} 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_cistatic void 1517bf215546Sopenharmony_cireload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block) 1518bf215546Sopenharmony_ci{ 1519bf215546Sopenharmony_ci rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals, 1520bf215546Sopenharmony_ci interval.node) { 1521bf215546Sopenharmony_ci reload_live_in(ctx, interval->interval.reg, block); 1522bf215546Sopenharmony_ci } 1523bf215546Sopenharmony_ci} 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_cistatic void 1526bf215546Sopenharmony_ciadd_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def, 1527bf215546Sopenharmony_ci struct ir3_block *block) 1528bf215546Sopenharmony_ci{ 1529bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[def->name]; 1530bf215546Sopenharmony_ci if (!interval->interval.inserted) 1531bf215546Sopenharmony_ci return; 1532bf215546Sopenharmony_ci 1533bf215546Sopenharmony_ci bool needs_phi = false; 1534bf215546Sopenharmony_ci struct ir3_register *cur_def = NULL; 1535bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 1536bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[i]; 1537bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1538bf215546Sopenharmony_ci 1539bf215546Sopenharmony_ci if (!state->visited) { 1540bf215546Sopenharmony_ci needs_phi = true; 1541bf215546Sopenharmony_ci break; 1542bf215546Sopenharmony_ci } 1543bf215546Sopenharmony_ci 1544bf215546Sopenharmony_ci struct hash_entry *entry = 1545bf215546Sopenharmony_ci _mesa_hash_table_search(state->remap, def); 1546bf215546Sopenharmony_ci assert(entry); 1547bf215546Sopenharmony_ci struct reg_or_immed *pred_val = entry->data; 1548bf215546Sopenharmony_ci if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) || 1549bf215546Sopenharmony_ci !pred_val->def || 1550bf215546Sopenharmony_ci (cur_def && cur_def != pred_val->def)) { 1551bf215546Sopenharmony_ci needs_phi = true; 1552bf215546Sopenharmony_ci break; 1553bf215546Sopenharmony_ci } 1554bf215546Sopenharmony_ci cur_def = pred_val->def; 1555bf215546Sopenharmony_ci } 1556bf215546Sopenharmony_ci 1557bf215546Sopenharmony_ci if (!needs_phi) { 1558bf215546Sopenharmony_ci interval->dst.def = cur_def; 1559bf215546Sopenharmony_ci interval->dst.flags = cur_def->flags; 1560bf215546Sopenharmony_ci return; 1561bf215546Sopenharmony_ci } 1562bf215546Sopenharmony_ci 1563bf215546Sopenharmony_ci struct ir3_instruction *phi = 1564bf215546Sopenharmony_ci ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count); 1565bf215546Sopenharmony_ci struct ir3_register *dst = __ssa_dst(phi); 1566bf215546Sopenharmony_ci dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 1567bf215546Sopenharmony_ci dst->size = def->size; 1568bf215546Sopenharmony_ci dst->wrmask = def->wrmask; 1569bf215546Sopenharmony_ci 1570bf215546Sopenharmony_ci dst->interval_start = def->interval_start; 1571bf215546Sopenharmony_ci dst->interval_end = def->interval_end; 1572bf215546Sopenharmony_ci dst->merge_set = def->merge_set; 1573bf215546Sopenharmony_ci dst->merge_set_offset = def->merge_set_offset; 1574bf215546Sopenharmony_ci 1575bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 1576bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[i]; 1577bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1578bf215546Sopenharmony_ci struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags); 1579bf215546Sopenharmony_ci src->size = def->size; 1580bf215546Sopenharmony_ci src->wrmask = def->wrmask; 1581bf215546Sopenharmony_ci 1582bf215546Sopenharmony_ci if (state->visited) { 1583bf215546Sopenharmony_ci struct hash_entry *entry = 1584bf215546Sopenharmony_ci _mesa_hash_table_search(state->remap, def); 1585bf215546Sopenharmony_ci assert(entry); 1586bf215546Sopenharmony_ci struct reg_or_immed *new_val = entry->data; 1587bf215546Sopenharmony_ci set_src_val(src, new_val); 1588bf215546Sopenharmony_ci } else { 1589bf215546Sopenharmony_ci src->def = def; 1590bf215546Sopenharmony_ci } 1591bf215546Sopenharmony_ci } 1592bf215546Sopenharmony_ci 1593bf215546Sopenharmony_ci interval->dst.def = dst; 1594bf215546Sopenharmony_ci interval->dst.flags = dst->flags; 1595bf215546Sopenharmony_ci 1596bf215546Sopenharmony_ci ir3_instr_move_before_block(phi, block); 1597bf215546Sopenharmony_ci} 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci/* When spilling a block with a single predecessors, the pred may have other 1600bf215546Sopenharmony_ci * successors so we can't choose what's live in and we can't spill/restore 1601bf215546Sopenharmony_ci * anything. Just make the inserted intervals exactly match the predecessor. If 1602bf215546Sopenharmony_ci * it wasn't live in the predecessor then it must've already been spilled. Also, 1603bf215546Sopenharmony_ci * there are no phi nodes and no live-ins. 1604bf215546Sopenharmony_ci */ 1605bf215546Sopenharmony_cistatic void 1606bf215546Sopenharmony_cispill_single_pred_live_in(struct ra_spill_ctx *ctx, 1607bf215546Sopenharmony_ci struct ir3_block *block) 1608bf215546Sopenharmony_ci{ 1609bf215546Sopenharmony_ci unsigned name; 1610bf215546Sopenharmony_ci BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1611bf215546Sopenharmony_ci ctx->live->definitions_count) { 1612bf215546Sopenharmony_ci struct ir3_register *reg = ctx->live->definitions[name]; 1613bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[reg->name]; 1614bf215546Sopenharmony_ci struct reg_or_immed *val = read_live_in(ctx, reg, block, 0); 1615bf215546Sopenharmony_ci if (val) 1616bf215546Sopenharmony_ci interval->dst = *val; 1617bf215546Sopenharmony_ci else 1618bf215546Sopenharmony_ci ra_spill_ctx_remove(ctx, interval); 1619bf215546Sopenharmony_ci } 1620bf215546Sopenharmony_ci} 1621bf215546Sopenharmony_ci 1622bf215546Sopenharmony_cistatic void 1623bf215546Sopenharmony_cirewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi, 1624bf215546Sopenharmony_ci struct ir3_block *block) 1625bf215546Sopenharmony_ci{ 1626bf215546Sopenharmony_ci if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) { 1627bf215546Sopenharmony_ci phi->flags |= IR3_INSTR_UNUSED; 1628bf215546Sopenharmony_ci return; 1629bf215546Sopenharmony_ci } 1630bf215546Sopenharmony_ci 1631bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 1632bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[i]; 1633bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1634bf215546Sopenharmony_ci 1635bf215546Sopenharmony_ci if (!state->visited) 1636bf215546Sopenharmony_ci continue; 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci struct ir3_register *src = phi->srcs[i]; 1639bf215546Sopenharmony_ci if (!src->def) 1640bf215546Sopenharmony_ci continue; 1641bf215546Sopenharmony_ci 1642bf215546Sopenharmony_ci struct hash_entry *entry = 1643bf215546Sopenharmony_ci _mesa_hash_table_search(state->remap, src->def); 1644bf215546Sopenharmony_ci assert(entry); 1645bf215546Sopenharmony_ci struct reg_or_immed *new_val = entry->data; 1646bf215546Sopenharmony_ci set_src_val(src, new_val); 1647bf215546Sopenharmony_ci } 1648bf215546Sopenharmony_ci} 1649bf215546Sopenharmony_ci 1650bf215546Sopenharmony_cistatic void 1651bf215546Sopenharmony_cispill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval, 1652bf215546Sopenharmony_ci struct ir3_block *block) 1653bf215546Sopenharmony_ci{ 1654bf215546Sopenharmony_ci struct ir3_register *def = interval->interval.reg; 1655bf215546Sopenharmony_ci 1656bf215546Sopenharmony_ci if (interval->interval.reg->merge_set || 1657bf215546Sopenharmony_ci !interval->can_rematerialize) 1658bf215546Sopenharmony_ci spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block); 1659bf215546Sopenharmony_ci ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval); 1660bf215546Sopenharmony_ci} 1661bf215546Sopenharmony_ci 1662bf215546Sopenharmony_cistatic void 1663bf215546Sopenharmony_cispill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1664bf215546Sopenharmony_ci{ 1665bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1666bf215546Sopenharmony_ci rb_tree_foreach_safe (struct ra_spill_interval, interval, 1667bf215546Sopenharmony_ci &ctx->reg_ctx.intervals, interval.node) { 1668bf215546Sopenharmony_ci if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) { 1669bf215546Sopenharmony_ci spill_live_out(ctx, interval, block); 1670bf215546Sopenharmony_ci } 1671bf215546Sopenharmony_ci } 1672bf215546Sopenharmony_ci} 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_cistatic void 1675bf215546Sopenharmony_cireload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def, 1676bf215546Sopenharmony_ci struct ir3_block *block) 1677bf215546Sopenharmony_ci{ 1678bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[def->name]; 1679bf215546Sopenharmony_ci ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval); 1680bf215546Sopenharmony_ci 1681bf215546Sopenharmony_ci reload_def(ctx, def, NULL, block); 1682bf215546Sopenharmony_ci} 1683bf215546Sopenharmony_ci 1684bf215546Sopenharmony_cistatic void 1685bf215546Sopenharmony_cireload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1686bf215546Sopenharmony_ci{ 1687bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1688bf215546Sopenharmony_ci unsigned name; 1689bf215546Sopenharmony_ci BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) { 1690bf215546Sopenharmony_ci struct ir3_register *reg = ctx->live->definitions[name]; 1691bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[name]; 1692bf215546Sopenharmony_ci if (!interval->interval.inserted) 1693bf215546Sopenharmony_ci reload_live_out(ctx, reg, block); 1694bf215546Sopenharmony_ci } 1695bf215546Sopenharmony_ci} 1696bf215546Sopenharmony_ci 1697bf215546Sopenharmony_cistatic void 1698bf215546Sopenharmony_ciupdate_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block) 1699bf215546Sopenharmony_ci{ 1700bf215546Sopenharmony_ci assert(!block->successors[1]); 1701bf215546Sopenharmony_ci struct ir3_block *succ = block->successors[0]; 1702bf215546Sopenharmony_ci unsigned pred_idx = ir3_block_get_pred_index(succ, block); 1703bf215546Sopenharmony_ci 1704bf215546Sopenharmony_ci foreach_instr (instr, &succ->instr_list) { 1705bf215546Sopenharmony_ci if (instr->opc != OPC_META_PHI) 1706bf215546Sopenharmony_ci break; 1707bf215546Sopenharmony_ci 1708bf215546Sopenharmony_ci struct ir3_register *def = instr->srcs[pred_idx]->def; 1709bf215546Sopenharmony_ci if (!def) 1710bf215546Sopenharmony_ci continue; 1711bf215546Sopenharmony_ci 1712bf215546Sopenharmony_ci struct ra_spill_interval *interval = ctx->intervals[def->name]; 1713bf215546Sopenharmony_ci if (!interval->interval.inserted) 1714bf215546Sopenharmony_ci continue; 1715bf215546Sopenharmony_ci set_src_val(instr->srcs[pred_idx], &interval->dst); 1716bf215546Sopenharmony_ci } 1717bf215546Sopenharmony_ci} 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_cistatic void 1720bf215546Sopenharmony_cirecord_pred_live_out(struct ra_spill_ctx *ctx, 1721bf215546Sopenharmony_ci struct ra_spill_interval *interval, 1722bf215546Sopenharmony_ci struct ir3_block *block, unsigned pred_idx) 1723bf215546Sopenharmony_ci{ 1724bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[pred_idx]; 1725bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1726bf215546Sopenharmony_ci 1727bf215546Sopenharmony_ci struct ir3_register *def = interval->interval.reg; 1728bf215546Sopenharmony_ci if (is_live_in_phi(def, block)) { 1729bf215546Sopenharmony_ci def = def->instr->srcs[pred_idx]->def; 1730bf215546Sopenharmony_ci } 1731bf215546Sopenharmony_ci BITSET_SET(state->live_out, def->name); 1732bf215546Sopenharmony_ci 1733bf215546Sopenharmony_ci rb_tree_foreach (struct ra_spill_interval, child, 1734bf215546Sopenharmony_ci &interval->interval.children, interval.node) { 1735bf215546Sopenharmony_ci record_pred_live_out(ctx, child, block, pred_idx); 1736bf215546Sopenharmony_ci } 1737bf215546Sopenharmony_ci} 1738bf215546Sopenharmony_ci 1739bf215546Sopenharmony_cistatic void 1740bf215546Sopenharmony_cirecord_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1741bf215546Sopenharmony_ci{ 1742bf215546Sopenharmony_ci for (unsigned i = 0; i < block->predecessors_count; i++) { 1743bf215546Sopenharmony_ci struct ir3_block *pred = block->predecessors[i]; 1744bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[pred->index]; 1745bf215546Sopenharmony_ci if (state->visited) 1746bf215546Sopenharmony_ci continue; 1747bf215546Sopenharmony_ci 1748bf215546Sopenharmony_ci state->live_out = rzalloc_array(ctx, BITSET_WORD, 1749bf215546Sopenharmony_ci BITSET_WORDS(ctx->live->definitions_count)); 1750bf215546Sopenharmony_ci 1751bf215546Sopenharmony_ci 1752bf215546Sopenharmony_ci rb_tree_foreach (struct ra_spill_interval, interval, 1753bf215546Sopenharmony_ci &ctx->reg_ctx.intervals, interval.node) { 1754bf215546Sopenharmony_ci record_pred_live_out(ctx, interval, block, i); 1755bf215546Sopenharmony_ci } 1756bf215546Sopenharmony_ci } 1757bf215546Sopenharmony_ci} 1758bf215546Sopenharmony_ci 1759bf215546Sopenharmony_cistatic void 1760bf215546Sopenharmony_cirecord_live_out(struct ra_spill_ctx *ctx, 1761bf215546Sopenharmony_ci struct ra_spill_block_state *state, 1762bf215546Sopenharmony_ci struct ra_spill_interval *interval) 1763bf215546Sopenharmony_ci{ 1764bf215546Sopenharmony_ci if (!(interval->dst.flags & IR3_REG_SSA) || 1765bf215546Sopenharmony_ci interval->dst.def) { 1766bf215546Sopenharmony_ci struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed); 1767bf215546Sopenharmony_ci *val = interval->dst; 1768bf215546Sopenharmony_ci _mesa_hash_table_insert(state->remap, interval->interval.reg, val); 1769bf215546Sopenharmony_ci } 1770bf215546Sopenharmony_ci rb_tree_foreach (struct ra_spill_interval, child, 1771bf215546Sopenharmony_ci &interval->interval.children, interval.node) { 1772bf215546Sopenharmony_ci record_live_out(ctx, state, child); 1773bf215546Sopenharmony_ci } 1774bf215546Sopenharmony_ci} 1775bf215546Sopenharmony_ci 1776bf215546Sopenharmony_cistatic void 1777bf215546Sopenharmony_cirecord_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block) 1778bf215546Sopenharmony_ci{ 1779bf215546Sopenharmony_ci struct ra_spill_block_state *state = &ctx->blocks[block->index]; 1780bf215546Sopenharmony_ci state->remap = _mesa_pointer_hash_table_create(ctx); 1781bf215546Sopenharmony_ci 1782bf215546Sopenharmony_ci rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals, 1783bf215546Sopenharmony_ci interval.node) { 1784bf215546Sopenharmony_ci record_live_out(ctx, state, interval); 1785bf215546Sopenharmony_ci } 1786bf215546Sopenharmony_ci} 1787bf215546Sopenharmony_ci 1788bf215546Sopenharmony_cistatic void 1789bf215546Sopenharmony_cihandle_block(struct ra_spill_ctx *ctx, struct ir3_block *block) 1790bf215546Sopenharmony_ci{ 1791bf215546Sopenharmony_ci memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure)); 1792bf215546Sopenharmony_ci rb_tree_init(&ctx->reg_ctx.intervals); 1793bf215546Sopenharmony_ci rb_tree_init(&ctx->full_live_intervals); 1794bf215546Sopenharmony_ci rb_tree_init(&ctx->half_live_intervals); 1795bf215546Sopenharmony_ci 1796bf215546Sopenharmony_ci unsigned name; 1797bf215546Sopenharmony_ci BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1798bf215546Sopenharmony_ci ctx->live->definitions_count) { 1799bf215546Sopenharmony_ci struct ir3_register *reg = ctx->live->definitions[name]; 1800bf215546Sopenharmony_ci handle_live_in(ctx, block, reg); 1801bf215546Sopenharmony_ci } 1802bf215546Sopenharmony_ci 1803bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1804bf215546Sopenharmony_ci if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT && 1805bf215546Sopenharmony_ci instr->opc != OPC_META_TEX_PREFETCH) 1806bf215546Sopenharmony_ci break; 1807bf215546Sopenharmony_ci handle_input_phi(ctx, instr); 1808bf215546Sopenharmony_ci } 1809bf215546Sopenharmony_ci 1810bf215546Sopenharmony_ci if (ctx->spilling) { 1811bf215546Sopenharmony_ci if (block->predecessors_count == 1) { 1812bf215546Sopenharmony_ci spill_single_pred_live_in(ctx, block); 1813bf215546Sopenharmony_ci } else { 1814bf215546Sopenharmony_ci spill_live_ins(ctx, block); 1815bf215546Sopenharmony_ci reload_live_ins(ctx, block); 1816bf215546Sopenharmony_ci record_pred_live_outs(ctx, block); 1817bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1818bf215546Sopenharmony_ci if (instr->opc != OPC_META_PHI) 1819bf215546Sopenharmony_ci break; 1820bf215546Sopenharmony_ci rewrite_phi(ctx, instr, block); 1821bf215546Sopenharmony_ci } 1822bf215546Sopenharmony_ci BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 1823bf215546Sopenharmony_ci ctx->live->definitions_count) { 1824bf215546Sopenharmony_ci struct ir3_register *reg = ctx->live->definitions[name]; 1825bf215546Sopenharmony_ci add_live_in_phi(ctx, reg, block); 1826bf215546Sopenharmony_ci } 1827bf215546Sopenharmony_ci } 1828bf215546Sopenharmony_ci } else { 1829bf215546Sopenharmony_ci update_max_pressure(ctx); 1830bf215546Sopenharmony_ci } 1831bf215546Sopenharmony_ci 1832bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1833bf215546Sopenharmony_ci di(instr, "processing"); 1834bf215546Sopenharmony_ci 1835bf215546Sopenharmony_ci if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT || 1836bf215546Sopenharmony_ci instr->opc == OPC_META_TEX_PREFETCH) 1837bf215546Sopenharmony_ci remove_input_phi(ctx, instr); 1838bf215546Sopenharmony_ci else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY) 1839bf215546Sopenharmony_ci handle_pcopy(ctx, instr); 1840bf215546Sopenharmony_ci else if (ctx->spilling && instr->opc == OPC_MOV && 1841bf215546Sopenharmony_ci instr->dsts[0] == ctx->base_reg) 1842bf215546Sopenharmony_ci /* skip */; 1843bf215546Sopenharmony_ci else 1844bf215546Sopenharmony_ci handle_instr(ctx, instr); 1845bf215546Sopenharmony_ci } 1846bf215546Sopenharmony_ci 1847bf215546Sopenharmony_ci if (ctx->spilling && block->successors[0]) { 1848bf215546Sopenharmony_ci struct ra_spill_block_state *state = 1849bf215546Sopenharmony_ci &ctx->blocks[block->successors[0]->index]; 1850bf215546Sopenharmony_ci if (state->visited) { 1851bf215546Sopenharmony_ci assert(!block->successors[1]); 1852bf215546Sopenharmony_ci 1853bf215546Sopenharmony_ci spill_live_outs(ctx, block); 1854bf215546Sopenharmony_ci reload_live_outs(ctx, block); 1855bf215546Sopenharmony_ci update_live_out_phis(ctx, block); 1856bf215546Sopenharmony_ci } 1857bf215546Sopenharmony_ci } 1858bf215546Sopenharmony_ci 1859bf215546Sopenharmony_ci if (ctx->spilling) { 1860bf215546Sopenharmony_ci record_live_outs(ctx, block); 1861bf215546Sopenharmony_ci ctx->blocks[block->index].visited = true; 1862bf215546Sopenharmony_ci } 1863bf215546Sopenharmony_ci} 1864bf215546Sopenharmony_ci 1865bf215546Sopenharmony_cistatic bool 1866bf215546Sopenharmony_cisimplify_phi_node(struct ir3_instruction *phi) 1867bf215546Sopenharmony_ci{ 1868bf215546Sopenharmony_ci struct ir3_register *def = NULL; 1869bf215546Sopenharmony_ci foreach_src (src, phi) { 1870bf215546Sopenharmony_ci /* Ignore phi sources which point to the phi itself. */ 1871bf215546Sopenharmony_ci if (src->def == phi->dsts[0]) 1872bf215546Sopenharmony_ci continue; 1873bf215546Sopenharmony_ci /* If it's undef or it doesn't match the previous sources, bail */ 1874bf215546Sopenharmony_ci if (!src->def || (def && def != src->def)) 1875bf215546Sopenharmony_ci return false; 1876bf215546Sopenharmony_ci def = src->def; 1877bf215546Sopenharmony_ci } 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_ci phi->data = def; 1880bf215546Sopenharmony_ci phi->flags |= IR3_INSTR_UNUSED; 1881bf215546Sopenharmony_ci return true; 1882bf215546Sopenharmony_ci} 1883bf215546Sopenharmony_ci 1884bf215546Sopenharmony_cistatic struct ir3_register * 1885bf215546Sopenharmony_cisimplify_phi_def(struct ir3_register *def) 1886bf215546Sopenharmony_ci{ 1887bf215546Sopenharmony_ci if (def->instr->opc == OPC_META_PHI) { 1888bf215546Sopenharmony_ci struct ir3_instruction *phi = def->instr; 1889bf215546Sopenharmony_ci 1890bf215546Sopenharmony_ci /* Note: this function is always called at least once after visiting the 1891bf215546Sopenharmony_ci * phi, so either there has been a simplified phi in the meantime, in 1892bf215546Sopenharmony_ci * which case we will set progress=true and visit the definition again, or 1893bf215546Sopenharmony_ci * phi->data already has the most up-to-date value. Therefore we don't 1894bf215546Sopenharmony_ci * have to recursively check phi->data. 1895bf215546Sopenharmony_ci */ 1896bf215546Sopenharmony_ci if (phi->data) 1897bf215546Sopenharmony_ci return phi->data; 1898bf215546Sopenharmony_ci } 1899bf215546Sopenharmony_ci 1900bf215546Sopenharmony_ci return def; 1901bf215546Sopenharmony_ci} 1902bf215546Sopenharmony_ci 1903bf215546Sopenharmony_cistatic void 1904bf215546Sopenharmony_cisimplify_phi_srcs(struct ir3_instruction *instr) 1905bf215546Sopenharmony_ci{ 1906bf215546Sopenharmony_ci foreach_src (src, instr) { 1907bf215546Sopenharmony_ci if (src->def) 1908bf215546Sopenharmony_ci src->def = simplify_phi_def(src->def); 1909bf215546Sopenharmony_ci } 1910bf215546Sopenharmony_ci} 1911bf215546Sopenharmony_ci 1912bf215546Sopenharmony_ci/* We insert phi nodes for all live-ins of loops in case we need to split the 1913bf215546Sopenharmony_ci * live range. This pass cleans that up for the case where the live range didn't 1914bf215546Sopenharmony_ci * actually need to be split. 1915bf215546Sopenharmony_ci */ 1916bf215546Sopenharmony_cistatic void 1917bf215546Sopenharmony_cisimplify_phi_nodes(struct ir3 *ir) 1918bf215546Sopenharmony_ci{ 1919bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 1920bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1921bf215546Sopenharmony_ci if (instr->opc != OPC_META_PHI) 1922bf215546Sopenharmony_ci break; 1923bf215546Sopenharmony_ci instr->data = NULL; 1924bf215546Sopenharmony_ci } 1925bf215546Sopenharmony_ci } 1926bf215546Sopenharmony_ci 1927bf215546Sopenharmony_ci bool progress; 1928bf215546Sopenharmony_ci do { 1929bf215546Sopenharmony_ci progress = false; 1930bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 1931bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1932bf215546Sopenharmony_ci if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED)) 1933bf215546Sopenharmony_ci continue; 1934bf215546Sopenharmony_ci 1935bf215546Sopenharmony_ci simplify_phi_srcs(instr); 1936bf215546Sopenharmony_ci } 1937bf215546Sopenharmony_ci 1938bf215546Sopenharmony_ci /* Visit phi nodes in the sucessors to make sure that phi sources are 1939bf215546Sopenharmony_ci * always visited at least once after visiting the definition they 1940bf215546Sopenharmony_ci * point to. See note in simplify_phi_def() for why this is necessary. 1941bf215546Sopenharmony_ci */ 1942bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 1943bf215546Sopenharmony_ci struct ir3_block *succ = block->successors[i]; 1944bf215546Sopenharmony_ci if (!succ) 1945bf215546Sopenharmony_ci continue; 1946bf215546Sopenharmony_ci foreach_instr (instr, &succ->instr_list) { 1947bf215546Sopenharmony_ci if (instr->opc != OPC_META_PHI) 1948bf215546Sopenharmony_ci break; 1949bf215546Sopenharmony_ci if (instr->flags & IR3_INSTR_UNUSED) { 1950bf215546Sopenharmony_ci if (instr->data) 1951bf215546Sopenharmony_ci instr->data = simplify_phi_def(instr->data); 1952bf215546Sopenharmony_ci } else { 1953bf215546Sopenharmony_ci simplify_phi_srcs(instr); 1954bf215546Sopenharmony_ci progress |= simplify_phi_node(instr); 1955bf215546Sopenharmony_ci } 1956bf215546Sopenharmony_ci } 1957bf215546Sopenharmony_ci } 1958bf215546Sopenharmony_ci } 1959bf215546Sopenharmony_ci } while (progress); 1960bf215546Sopenharmony_ci} 1961bf215546Sopenharmony_ci 1962bf215546Sopenharmony_cistatic void 1963bf215546Sopenharmony_ciunmark_dead(struct ir3 *ir) 1964bf215546Sopenharmony_ci{ 1965bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 1966bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1967bf215546Sopenharmony_ci instr->flags &= ~IR3_INSTR_UNUSED; 1968bf215546Sopenharmony_ci } 1969bf215546Sopenharmony_ci } 1970bf215546Sopenharmony_ci} 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark 1973bf215546Sopenharmony_ci * which ones are dead along the way, so there's nothing to compute here. 1974bf215546Sopenharmony_ci */ 1975bf215546Sopenharmony_cistatic void 1976bf215546Sopenharmony_cicleanup_dead(struct ir3 *ir) 1977bf215546Sopenharmony_ci{ 1978bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 1979bf215546Sopenharmony_ci foreach_instr_safe (instr, &block->instr_list) { 1980bf215546Sopenharmony_ci if (instr->flags & IR3_INSTR_UNUSED) 1981bf215546Sopenharmony_ci list_delinit(&instr->node); 1982bf215546Sopenharmony_ci } 1983bf215546Sopenharmony_ci } 1984bf215546Sopenharmony_ci} 1985bf215546Sopenharmony_ci 1986bf215546Sopenharmony_ci/* Deal with merge sets after spilling. Spilling generally leaves the merge sets 1987bf215546Sopenharmony_ci * in a mess, and even if we properly cleaned up after ourselves, we would want 1988bf215546Sopenharmony_ci * to recompute the merge sets afterward anway. That's because 1989bf215546Sopenharmony_ci * spilling/reloading can "break up" phi webs and split/collect webs so that 1990bf215546Sopenharmony_ci * allocating them to the same register no longer gives any benefit. For 1991bf215546Sopenharmony_ci * example, imagine we have this: 1992bf215546Sopenharmony_ci * 1993bf215546Sopenharmony_ci * if (...) { 1994bf215546Sopenharmony_ci * foo = ... 1995bf215546Sopenharmony_ci * } else { 1996bf215546Sopenharmony_ci * bar = ... 1997bf215546Sopenharmony_ci * } 1998bf215546Sopenharmony_ci * baz = phi(foo, bar) 1999bf215546Sopenharmony_ci * 2000bf215546Sopenharmony_ci * and we spill "baz": 2001bf215546Sopenharmony_ci * 2002bf215546Sopenharmony_ci * if (...) { 2003bf215546Sopenharmony_ci * foo = ... 2004bf215546Sopenharmony_ci * spill(foo) 2005bf215546Sopenharmony_ci * } else { 2006bf215546Sopenharmony_ci * bar = ... 2007bf215546Sopenharmony_ci * spill(bar) 2008bf215546Sopenharmony_ci * } 2009bf215546Sopenharmony_ci * baz = reload() 2010bf215546Sopenharmony_ci * 2011bf215546Sopenharmony_ci * now foo, bar, and baz don't have to be allocated to the same register. How 2012bf215546Sopenharmony_ci * exactly the merge sets change can be complicated, so it's easier just to 2013bf215546Sopenharmony_ci * recompute them. 2014bf215546Sopenharmony_ci * 2015bf215546Sopenharmony_ci * However, there's a wrinkle in this: those same merge sets determine the 2016bf215546Sopenharmony_ci * register pressure, due to multiple values inhabiting the same register! And 2017bf215546Sopenharmony_ci * we assume that this sharing happens when spilling. Therefore we need a 2018bf215546Sopenharmony_ci * three-step procedure: 2019bf215546Sopenharmony_ci * 2020bf215546Sopenharmony_ci * 1. Drop the original merge sets. 2021bf215546Sopenharmony_ci * 2. Calculate which values *must* be merged, being careful to only use the 2022bf215546Sopenharmony_ci * interval information which isn't trashed by spilling, and forcibly merge 2023bf215546Sopenharmony_ci * them. 2024bf215546Sopenharmony_ci * 3. Let ir3_merge_regs() finish the job, including recalculating the 2025bf215546Sopenharmony_ci * intervals. 2026bf215546Sopenharmony_ci */ 2027bf215546Sopenharmony_ci 2028bf215546Sopenharmony_cistatic void 2029bf215546Sopenharmony_cifixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir) 2030bf215546Sopenharmony_ci{ 2031bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 2032bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 2033bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 2034bf215546Sopenharmony_ci dst->merge_set = NULL; 2035bf215546Sopenharmony_ci dst->merge_set_offset = 0; 2036bf215546Sopenharmony_ci } 2037bf215546Sopenharmony_ci } 2038bf215546Sopenharmony_ci } 2039bf215546Sopenharmony_ci 2040bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 2041bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 2042bf215546Sopenharmony_ci if (instr->opc != OPC_META_SPLIT && 2043bf215546Sopenharmony_ci instr->opc != OPC_META_COLLECT) 2044bf215546Sopenharmony_ci continue; 2045bf215546Sopenharmony_ci 2046bf215546Sopenharmony_ci struct ir3_register *dst = instr->dsts[0]; 2047bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 2048bf215546Sopenharmony_ci if (!(src->flags & IR3_REG_KILL) && 2049bf215546Sopenharmony_ci src->def->interval_start < dst->interval_end && 2050bf215546Sopenharmony_ci dst->interval_start < src->def->interval_end) { 2051bf215546Sopenharmony_ci ir3_force_merge(dst, src->def, 2052bf215546Sopenharmony_ci src->def->interval_start - dst->interval_start); 2053bf215546Sopenharmony_ci } 2054bf215546Sopenharmony_ci } 2055bf215546Sopenharmony_ci } 2056bf215546Sopenharmony_ci } 2057bf215546Sopenharmony_ci 2058bf215546Sopenharmony_ci ir3_merge_regs(live, ir); 2059bf215546Sopenharmony_ci} 2060bf215546Sopenharmony_ci 2061bf215546Sopenharmony_civoid 2062bf215546Sopenharmony_ciir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live, 2063bf215546Sopenharmony_ci struct ir3_pressure *max_pressure) 2064bf215546Sopenharmony_ci{ 2065bf215546Sopenharmony_ci struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx); 2066bf215546Sopenharmony_ci spill_ctx_init(ctx, v, live); 2067bf215546Sopenharmony_ci 2068bf215546Sopenharmony_ci foreach_block (block, &v->ir->block_list) { 2069bf215546Sopenharmony_ci handle_block(ctx, block); 2070bf215546Sopenharmony_ci } 2071bf215546Sopenharmony_ci 2072bf215546Sopenharmony_ci assert(ctx->cur_pressure.full == 0); 2073bf215546Sopenharmony_ci assert(ctx->cur_pressure.half == 0); 2074bf215546Sopenharmony_ci assert(ctx->cur_pressure.shared == 0); 2075bf215546Sopenharmony_ci 2076bf215546Sopenharmony_ci *max_pressure = ctx->max_pressure; 2077bf215546Sopenharmony_ci ralloc_free(ctx); 2078bf215546Sopenharmony_ci} 2079bf215546Sopenharmony_ci 2080bf215546Sopenharmony_cibool 2081bf215546Sopenharmony_ciir3_spill(struct ir3 *ir, struct ir3_shader_variant *v, 2082bf215546Sopenharmony_ci struct ir3_liveness **live, 2083bf215546Sopenharmony_ci const struct ir3_pressure *limit_pressure) 2084bf215546Sopenharmony_ci{ 2085bf215546Sopenharmony_ci void *mem_ctx = ralloc_parent(*live); 2086bf215546Sopenharmony_ci struct ra_spill_ctx *ctx = rzalloc(mem_ctx, struct ra_spill_ctx); 2087bf215546Sopenharmony_ci spill_ctx_init(ctx, v, *live); 2088bf215546Sopenharmony_ci 2089bf215546Sopenharmony_ci ctx->spilling = true; 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_ci ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state, 2092bf215546Sopenharmony_ci ctx->live->block_count); 2093bf215546Sopenharmony_ci rb_tree_init(&ctx->full_live_intervals); 2094bf215546Sopenharmony_ci rb_tree_init(&ctx->half_live_intervals); 2095bf215546Sopenharmony_ci 2096bf215546Sopenharmony_ci ctx->limit_pressure = *limit_pressure; 2097bf215546Sopenharmony_ci ctx->spill_slot = v->pvtmem_size; 2098bf215546Sopenharmony_ci 2099bf215546Sopenharmony_ci add_base_reg(ctx, ir); 2100bf215546Sopenharmony_ci compute_next_distance(ctx, ir); 2101bf215546Sopenharmony_ci 2102bf215546Sopenharmony_ci unmark_dead(ir); 2103bf215546Sopenharmony_ci 2104bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 2105bf215546Sopenharmony_ci handle_block(ctx, block); 2106bf215546Sopenharmony_ci } 2107bf215546Sopenharmony_ci 2108bf215546Sopenharmony_ci simplify_phi_nodes(ir); 2109bf215546Sopenharmony_ci 2110bf215546Sopenharmony_ci cleanup_dead(ir); 2111bf215546Sopenharmony_ci 2112bf215546Sopenharmony_ci ir3_create_parallel_copies(ir); 2113bf215546Sopenharmony_ci 2114bf215546Sopenharmony_ci /* After this point, we're done mutating the IR. Liveness has been trashed, 2115bf215546Sopenharmony_ci * so recalculate it. We'll need it for recalculating the merge sets. 2116bf215546Sopenharmony_ci */ 2117bf215546Sopenharmony_ci ralloc_free(ctx->live); 2118bf215546Sopenharmony_ci *live = ir3_calc_liveness(mem_ctx, ir); 2119bf215546Sopenharmony_ci 2120bf215546Sopenharmony_ci fixup_merge_sets(*live, ir); 2121bf215546Sopenharmony_ci 2122bf215546Sopenharmony_ci v->pvtmem_size = ctx->spill_slot; 2123bf215546Sopenharmony_ci ralloc_free(ctx); 2124bf215546Sopenharmony_ci 2125bf215546Sopenharmony_ci return true; 2126bf215546Sopenharmony_ci} 2127