1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "ir3_compiler.h" 25bf215546Sopenharmony_ci#include "ir3_ra.h" 26bf215546Sopenharmony_ci#include "ralloc.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* This pass "merges" compatible phi-web SSA values. First, we insert a bunch 29bf215546Sopenharmony_ci * of parallelcopy's to trivially turn the program into CSSA form. Then we 30bf215546Sopenharmony_ci * try to "merge" SSA def's into "merge sets" which could be allocated to a 31bf215546Sopenharmony_ci * single register in order to eliminate copies. First we merge phi nodes, 32bf215546Sopenharmony_ci * which should always succeed because of the parallelcopy's we inserted, and 33bf215546Sopenharmony_ci * then we try to coalesce the copies we introduced. 34bf215546Sopenharmony_ci * 35bf215546Sopenharmony_ci * The merged registers are used for three purposes: 36bf215546Sopenharmony_ci * 37bf215546Sopenharmony_ci * 1. We always use the same pvtmem slot for spilling all SSA defs in each 38bf215546Sopenharmony_ci * merge set. This prevents us from having to insert memory-to-memory copies 39bf215546Sopenharmony_ci * in the spiller and makes sure we don't insert unecessary copies. 40bf215546Sopenharmony_ci * 2. When two values are live at the same time, part of the same merge 41bf215546Sopenharmony_ci * set, and they overlap each other in the merge set, they always occupy 42bf215546Sopenharmony_ci * overlapping physical registers in RA. This reduces register pressure and 43bf215546Sopenharmony_ci * copies in several important scenarios: 44bf215546Sopenharmony_ci * - When sources of a collect are used later by something else, we don't 45bf215546Sopenharmony_ci * have to introduce copies. 46bf215546Sopenharmony_ci * - We can handle sequences of extracts that "explode" a vector into its 47bf215546Sopenharmony_ci * components without any additional copying. 48bf215546Sopenharmony_ci * 3. We use the merge sets for affinities in register allocation: That is, we 49bf215546Sopenharmony_ci * try to allocate all the definitions in the same merge set to the 50bf215546Sopenharmony_ci * same/compatible registers. This helps us e.g. allocate sources of a collect 51bf215546Sopenharmony_ci * to contiguous registers without too much special code in RA. 52bf215546Sopenharmony_ci * 53bf215546Sopenharmony_ci * In a "normal" register allocator, or when spilling, we'd just merge 54bf215546Sopenharmony_ci * registers in the same merge set to the same register, but with SSA-based 55bf215546Sopenharmony_ci * register allocation we may have to split the live interval. 56bf215546Sopenharmony_ci * 57bf215546Sopenharmony_ci * The implementation is based on "Revisiting Out-of-SSA Translation for 58bf215546Sopenharmony_ci * Correctness, CodeQuality, and Efficiency," and is broadly similar to the 59bf215546Sopenharmony_ci * implementation in nir_from_ssa, with the twist that we also try to coalesce 60bf215546Sopenharmony_ci * META_SPLIT and META_COLLECT. This makes this pass more complicated but 61bf215546Sopenharmony_ci * prevents us from needing to handle these specially in RA and the spiller, 62bf215546Sopenharmony_ci * which are already complicated enough. This also forces us to implement that 63bf215546Sopenharmony_ci * value-comparison optimization they explain, as without it we wouldn't be 64bf215546Sopenharmony_ci * able to coalesce META_SPLIT even in the simplest of cases. 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci/* In order to dynamically reconstruct the dominance forest, we need the 68bf215546Sopenharmony_ci * instructions ordered by a preorder traversal of the dominance tree: 69bf215546Sopenharmony_ci */ 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_cistatic unsigned 72bf215546Sopenharmony_ciindex_instrs(struct ir3_block *block, unsigned index) 73bf215546Sopenharmony_ci{ 74bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) 75bf215546Sopenharmony_ci instr->ip = index++; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci for (unsigned i = 0; i < block->dom_children_count; i++) 78bf215546Sopenharmony_ci index = index_instrs(block->dom_children[i], index); 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci return index; 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci/* Definitions within a merge set are ordered by instr->ip as set above: */ 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_cistatic bool 86bf215546Sopenharmony_cidef_after(struct ir3_register *a, struct ir3_register *b) 87bf215546Sopenharmony_ci{ 88bf215546Sopenharmony_ci return a->instr->ip > b->instr->ip; 89bf215546Sopenharmony_ci} 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_cistatic bool 92bf215546Sopenharmony_cidef_dominates(struct ir3_register *a, struct ir3_register *b) 93bf215546Sopenharmony_ci{ 94bf215546Sopenharmony_ci if (def_after(a, b)) { 95bf215546Sopenharmony_ci return false; 96bf215546Sopenharmony_ci } else if (a->instr->block == b->instr->block) { 97bf215546Sopenharmony_ci return def_after(b, a); 98bf215546Sopenharmony_ci } else { 99bf215546Sopenharmony_ci return ir3_block_dominates(a->instr->block, b->instr->block); 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci} 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci/* This represents a region inside a register. The offset is relative to the 104bf215546Sopenharmony_ci * start of the register, and offset + size <= size(reg). 105bf215546Sopenharmony_ci */ 106bf215546Sopenharmony_cistruct def_value { 107bf215546Sopenharmony_ci struct ir3_register *reg; 108bf215546Sopenharmony_ci unsigned offset, size; 109bf215546Sopenharmony_ci}; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci/* Chase any copies to get the source of a region inside a register. This is 112bf215546Sopenharmony_ci * Value(a) in the paper. 113bf215546Sopenharmony_ci */ 114bf215546Sopenharmony_cistatic struct def_value 115bf215546Sopenharmony_cichase_copies(struct def_value value) 116bf215546Sopenharmony_ci{ 117bf215546Sopenharmony_ci while (true) { 118bf215546Sopenharmony_ci struct ir3_instruction *instr = value.reg->instr; 119bf215546Sopenharmony_ci if (instr->opc == OPC_META_SPLIT) { 120bf215546Sopenharmony_ci value.offset += instr->split.off * reg_elem_size(value.reg); 121bf215546Sopenharmony_ci value.reg = instr->srcs[0]->def; 122bf215546Sopenharmony_ci } else if (instr->opc == OPC_META_COLLECT) { 123bf215546Sopenharmony_ci if (value.offset % reg_elem_size(value.reg) != 0 || 124bf215546Sopenharmony_ci value.size > reg_elem_size(value.reg) || 125bf215546Sopenharmony_ci value.offset + value.size > reg_size(value.reg)) 126bf215546Sopenharmony_ci break; 127bf215546Sopenharmony_ci struct ir3_register *src = 128bf215546Sopenharmony_ci instr->srcs[value.offset / reg_elem_size(value.reg)]; 129bf215546Sopenharmony_ci if (!src->def) 130bf215546Sopenharmony_ci break; 131bf215546Sopenharmony_ci value.offset = 0; 132bf215546Sopenharmony_ci value.reg = src->def; 133bf215546Sopenharmony_ci } else { 134bf215546Sopenharmony_ci /* TODO: parallelcopy */ 135bf215546Sopenharmony_ci break; 136bf215546Sopenharmony_ci } 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci return value; 140bf215546Sopenharmony_ci} 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci/* This represents an entry in the merge set, and consists of a register + 143bf215546Sopenharmony_ci * offset from the merge set base. 144bf215546Sopenharmony_ci */ 145bf215546Sopenharmony_cistruct merge_def { 146bf215546Sopenharmony_ci struct ir3_register *reg; 147bf215546Sopenharmony_ci unsigned offset; 148bf215546Sopenharmony_ci}; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_cistatic bool 151bf215546Sopenharmony_cican_skip_interference(const struct merge_def *a, const struct merge_def *b) 152bf215546Sopenharmony_ci{ 153bf215546Sopenharmony_ci unsigned a_start = a->offset; 154bf215546Sopenharmony_ci unsigned b_start = b->offset; 155bf215546Sopenharmony_ci unsigned a_end = a_start + reg_size(a->reg); 156bf215546Sopenharmony_ci unsigned b_end = b_start + reg_size(b->reg); 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci /* Registers that don't overlap never interfere */ 159bf215546Sopenharmony_ci if (a_end <= b_start || b_end <= a_start) 160bf215546Sopenharmony_ci return true; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci /* Disallow skipping interference unless one definition contains the 163bf215546Sopenharmony_ci * other. This restriction is important for register allocation, because 164bf215546Sopenharmony_ci * it means that at any given point in the program, the live values in a 165bf215546Sopenharmony_ci * given merge set will form a tree. If they didn't, then one live value 166bf215546Sopenharmony_ci * would partially overlap another, and they would have overlapping live 167bf215546Sopenharmony_ci * ranges because they're live at the same point. This simplifies register 168bf215546Sopenharmony_ci * allocation and spilling. 169bf215546Sopenharmony_ci */ 170bf215546Sopenharmony_ci if (!((a_start <= b_start && a_end >= b_end) || 171bf215546Sopenharmony_ci (b_start <= a_start && b_end >= a_end))) 172bf215546Sopenharmony_ci return false; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* For each register, chase the intersection of a and b to find the 175bf215546Sopenharmony_ci * ultimate source. 176bf215546Sopenharmony_ci */ 177bf215546Sopenharmony_ci unsigned start = MAX2(a_start, b_start); 178bf215546Sopenharmony_ci unsigned end = MIN2(a_end, b_end); 179bf215546Sopenharmony_ci struct def_value a_value = chase_copies((struct def_value){ 180bf215546Sopenharmony_ci .reg = a->reg, 181bf215546Sopenharmony_ci .offset = start - a_start, 182bf215546Sopenharmony_ci .size = end - start, 183bf215546Sopenharmony_ci }); 184bf215546Sopenharmony_ci struct def_value b_value = chase_copies((struct def_value){ 185bf215546Sopenharmony_ci .reg = b->reg, 186bf215546Sopenharmony_ci .offset = start - b_start, 187bf215546Sopenharmony_ci .size = end - start, 188bf215546Sopenharmony_ci }); 189bf215546Sopenharmony_ci return a_value.reg == b_value.reg && a_value.offset == b_value.offset; 190bf215546Sopenharmony_ci} 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_cistatic struct ir3_merge_set * 193bf215546Sopenharmony_ciget_merge_set(struct ir3_register *def) 194bf215546Sopenharmony_ci{ 195bf215546Sopenharmony_ci if (def->merge_set) 196bf215546Sopenharmony_ci return def->merge_set; 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci struct ir3_merge_set *set = ralloc(def, struct ir3_merge_set); 199bf215546Sopenharmony_ci set->preferred_reg = ~0; 200bf215546Sopenharmony_ci set->interval_start = ~0; 201bf215546Sopenharmony_ci set->spill_slot = ~0; 202bf215546Sopenharmony_ci set->size = reg_size(def); 203bf215546Sopenharmony_ci set->alignment = (def->flags & IR3_REG_HALF) ? 1 : 2; 204bf215546Sopenharmony_ci set->regs_count = 1; 205bf215546Sopenharmony_ci set->regs = ralloc(set, struct ir3_register *); 206bf215546Sopenharmony_ci set->regs[0] = def; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci return set; 209bf215546Sopenharmony_ci} 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci/* Merges b into a */ 212bf215546Sopenharmony_cistatic struct ir3_merge_set * 213bf215546Sopenharmony_cimerge_merge_sets(struct ir3_merge_set *a, struct ir3_merge_set *b, int b_offset) 214bf215546Sopenharmony_ci{ 215bf215546Sopenharmony_ci if (b_offset < 0) 216bf215546Sopenharmony_ci return merge_merge_sets(b, a, -b_offset); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci struct ir3_register **new_regs = 219bf215546Sopenharmony_ci rzalloc_array(a, struct ir3_register *, a->regs_count + b->regs_count); 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci unsigned a_index = 0, b_index = 0, new_index = 0; 222bf215546Sopenharmony_ci for (; a_index < a->regs_count || b_index < b->regs_count; new_index++) { 223bf215546Sopenharmony_ci if (b_index < b->regs_count && 224bf215546Sopenharmony_ci (a_index == a->regs_count || 225bf215546Sopenharmony_ci def_after(a->regs[a_index], b->regs[b_index]))) { 226bf215546Sopenharmony_ci new_regs[new_index] = b->regs[b_index++]; 227bf215546Sopenharmony_ci new_regs[new_index]->merge_set_offset += b_offset; 228bf215546Sopenharmony_ci } else { 229bf215546Sopenharmony_ci new_regs[new_index] = a->regs[a_index++]; 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci new_regs[new_index]->merge_set = a; 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci assert(new_index == a->regs_count + b->regs_count); 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci /* Technically this should be the lcm, but because alignment is only 1 or 237bf215546Sopenharmony_ci * 2 so far this should be ok. 238bf215546Sopenharmony_ci */ 239bf215546Sopenharmony_ci a->alignment = MAX2(a->alignment, b->alignment); 240bf215546Sopenharmony_ci a->regs_count += b->regs_count; 241bf215546Sopenharmony_ci ralloc_free(a->regs); 242bf215546Sopenharmony_ci a->regs = new_regs; 243bf215546Sopenharmony_ci a->size = MAX2(a->size, b->size + b_offset); 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci return a; 246bf215546Sopenharmony_ci} 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_cistatic bool 249bf215546Sopenharmony_cimerge_sets_interfere(struct ir3_liveness *live, struct ir3_merge_set *a, 250bf215546Sopenharmony_ci struct ir3_merge_set *b, int b_offset) 251bf215546Sopenharmony_ci{ 252bf215546Sopenharmony_ci if (b_offset < 0) 253bf215546Sopenharmony_ci return merge_sets_interfere(live, b, a, -b_offset); 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci struct merge_def dom[a->regs_count + b->regs_count]; 256bf215546Sopenharmony_ci unsigned a_index = 0, b_index = 0; 257bf215546Sopenharmony_ci int dom_index = -1; 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci /* Reject trying to merge the sets if the alignment doesn't work out */ 260bf215546Sopenharmony_ci if (b_offset % a->alignment != 0) 261bf215546Sopenharmony_ci return true; 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci while (a_index < a->regs_count || b_index < b->regs_count) { 264bf215546Sopenharmony_ci struct merge_def current; 265bf215546Sopenharmony_ci if (a_index == a->regs_count) { 266bf215546Sopenharmony_ci current.reg = b->regs[b_index]; 267bf215546Sopenharmony_ci current.offset = current.reg->merge_set_offset + b_offset; 268bf215546Sopenharmony_ci b_index++; 269bf215546Sopenharmony_ci } else if (b_index == b->regs_count) { 270bf215546Sopenharmony_ci current.reg = a->regs[a_index]; 271bf215546Sopenharmony_ci current.offset = current.reg->merge_set_offset; 272bf215546Sopenharmony_ci a_index++; 273bf215546Sopenharmony_ci } else { 274bf215546Sopenharmony_ci if (def_after(b->regs[b_index], a->regs[a_index])) { 275bf215546Sopenharmony_ci current.reg = a->regs[a_index]; 276bf215546Sopenharmony_ci current.offset = current.reg->merge_set_offset; 277bf215546Sopenharmony_ci a_index++; 278bf215546Sopenharmony_ci } else { 279bf215546Sopenharmony_ci current.reg = b->regs[b_index]; 280bf215546Sopenharmony_ci current.offset = current.reg->merge_set_offset + b_offset; 281bf215546Sopenharmony_ci b_index++; 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci while (dom_index >= 0 && 286bf215546Sopenharmony_ci !def_dominates(dom[dom_index].reg, current.reg)) { 287bf215546Sopenharmony_ci dom_index--; 288bf215546Sopenharmony_ci } 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci /* TODO: in the original paper, just dom[dom_index] needs to be 291bf215546Sopenharmony_ci * checked for interference. We implement the value-chasing extension 292bf215546Sopenharmony_ci * as well as support for sub-registers, which complicates this 293bf215546Sopenharmony_ci * significantly because it's no longer the case that if a dominates b 294bf215546Sopenharmony_ci * dominates c and a and b don't interfere then we only need to check 295bf215546Sopenharmony_ci * interference between b and c to be sure a and c don't interfere -- 296bf215546Sopenharmony_ci * this means we may have to check for interference against values 297bf215546Sopenharmony_ci * higher in the stack then dom[dom_index]. In the paper there's a 298bf215546Sopenharmony_ci * description of a way to do less interference tests with the 299bf215546Sopenharmony_ci * value-chasing extension, but we'd have to come up with something 300bf215546Sopenharmony_ci * ourselves for handling the similar problems that come up with 301bf215546Sopenharmony_ci * allowing values to contain subregisters. For now we just test 302bf215546Sopenharmony_ci * everything in the stack. 303bf215546Sopenharmony_ci */ 304bf215546Sopenharmony_ci for (int i = 0; i <= dom_index; i++) { 305bf215546Sopenharmony_ci if (can_skip_interference(¤t, &dom[i])) 306bf215546Sopenharmony_ci continue; 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci /* Ok, now we actually have to check interference. Since we know 309bf215546Sopenharmony_ci * that dom[i] dominates current, this boils down to checking 310bf215546Sopenharmony_ci * whether dom[i] is live after current. 311bf215546Sopenharmony_ci */ 312bf215546Sopenharmony_ci if (ir3_def_live_after(live, dom[i].reg, current.reg->instr)) 313bf215546Sopenharmony_ci return true; 314bf215546Sopenharmony_ci } 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci dom[++dom_index] = current; 317bf215546Sopenharmony_ci } 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci return false; 320bf215546Sopenharmony_ci} 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_cistatic void 323bf215546Sopenharmony_citry_merge_defs(struct ir3_liveness *live, struct ir3_register *a, 324bf215546Sopenharmony_ci struct ir3_register *b, unsigned b_offset) 325bf215546Sopenharmony_ci{ 326bf215546Sopenharmony_ci struct ir3_merge_set *a_set = get_merge_set(a); 327bf215546Sopenharmony_ci struct ir3_merge_set *b_set = get_merge_set(b); 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci if (a_set == b_set) { 330bf215546Sopenharmony_ci /* Note: Even in this case we may not always successfully be able to 331bf215546Sopenharmony_ci * coalesce this copy, if the offsets don't line up. But in any 332bf215546Sopenharmony_ci * case, we can't do anything. 333bf215546Sopenharmony_ci */ 334bf215546Sopenharmony_ci return; 335bf215546Sopenharmony_ci } 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci int b_set_offset = a->merge_set_offset + b_offset - b->merge_set_offset; 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci if (!merge_sets_interfere(live, a_set, b_set, b_set_offset)) 340bf215546Sopenharmony_ci merge_merge_sets(a_set, b_set, b_set_offset); 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_civoid 344bf215546Sopenharmony_ciir3_force_merge(struct ir3_register *a, struct ir3_register *b, int b_offset) 345bf215546Sopenharmony_ci{ 346bf215546Sopenharmony_ci struct ir3_merge_set *a_set = get_merge_set(a); 347bf215546Sopenharmony_ci struct ir3_merge_set *b_set = get_merge_set(b); 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci if (a_set == b_set) 350bf215546Sopenharmony_ci return; 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci int b_set_offset = a->merge_set_offset + b_offset - b->merge_set_offset; 353bf215546Sopenharmony_ci merge_merge_sets(a_set, b_set, b_set_offset); 354bf215546Sopenharmony_ci} 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_cistatic void 357bf215546Sopenharmony_cicoalesce_phi(struct ir3_liveness *live, struct ir3_instruction *phi) 358bf215546Sopenharmony_ci{ 359bf215546Sopenharmony_ci for (unsigned i = 0; i < phi->srcs_count; i++) { 360bf215546Sopenharmony_ci if (phi->srcs[i]->def) 361bf215546Sopenharmony_ci try_merge_defs(live, phi->dsts[0], phi->srcs[i]->def, 0); 362bf215546Sopenharmony_ci } 363bf215546Sopenharmony_ci} 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_cistatic void 366bf215546Sopenharmony_ciaggressive_coalesce_parallel_copy(struct ir3_liveness *live, 367bf215546Sopenharmony_ci struct ir3_instruction *pcopy) 368bf215546Sopenharmony_ci{ 369bf215546Sopenharmony_ci for (unsigned i = 0; i < pcopy->dsts_count; i++) { 370bf215546Sopenharmony_ci if (!(pcopy->srcs[i]->flags & IR3_REG_SSA)) 371bf215546Sopenharmony_ci continue; 372bf215546Sopenharmony_ci try_merge_defs(live, pcopy->dsts[i], pcopy->srcs[i]->def, 0); 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci} 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_cistatic void 377bf215546Sopenharmony_ciaggressive_coalesce_split(struct ir3_liveness *live, 378bf215546Sopenharmony_ci struct ir3_instruction *split) 379bf215546Sopenharmony_ci{ 380bf215546Sopenharmony_ci try_merge_defs(live, split->srcs[0]->def, split->dsts[0], 381bf215546Sopenharmony_ci split->split.off * reg_elem_size(split->dsts[0])); 382bf215546Sopenharmony_ci} 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_cistatic void 385bf215546Sopenharmony_ciaggressive_coalesce_collect(struct ir3_liveness *live, 386bf215546Sopenharmony_ci struct ir3_instruction *collect) 387bf215546Sopenharmony_ci{ 388bf215546Sopenharmony_ci for (unsigned i = 0, offset = 0; i < collect->srcs_count; 389bf215546Sopenharmony_ci offset += reg_elem_size(collect->srcs[i]), i++) { 390bf215546Sopenharmony_ci if (!(collect->srcs[i]->flags & IR3_REG_SSA)) 391bf215546Sopenharmony_ci continue; 392bf215546Sopenharmony_ci try_merge_defs(live, collect->dsts[0], collect->srcs[i]->def, offset); 393bf215546Sopenharmony_ci } 394bf215546Sopenharmony_ci} 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_cistatic void 397bf215546Sopenharmony_cicreate_parallel_copy(struct ir3_block *block) 398bf215546Sopenharmony_ci{ 399bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 400bf215546Sopenharmony_ci if (!block->successors[i]) 401bf215546Sopenharmony_ci continue; 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci struct ir3_block *succ = block->successors[i]; 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci unsigned pred_idx = ir3_block_get_pred_index(succ, block); 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci unsigned phi_count = 0; 408bf215546Sopenharmony_ci foreach_instr (phi, &succ->instr_list) { 409bf215546Sopenharmony_ci if (phi->opc != OPC_META_PHI) 410bf215546Sopenharmony_ci break; 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci /* Avoid undef */ 413bf215546Sopenharmony_ci if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) && 414bf215546Sopenharmony_ci !phi->srcs[pred_idx]->def) 415bf215546Sopenharmony_ci continue; 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci /* We don't support critical edges. If we were to support them, 418bf215546Sopenharmony_ci * we'd need to insert parallel copies after the phi node to solve 419bf215546Sopenharmony_ci * the lost-copy problem. 420bf215546Sopenharmony_ci */ 421bf215546Sopenharmony_ci assert(i == 0 && !block->successors[1]); 422bf215546Sopenharmony_ci phi_count++; 423bf215546Sopenharmony_ci } 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci if (phi_count == 0) 426bf215546Sopenharmony_ci continue; 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci struct ir3_register *src[phi_count]; 429bf215546Sopenharmony_ci unsigned j = 0; 430bf215546Sopenharmony_ci foreach_instr (phi, &succ->instr_list) { 431bf215546Sopenharmony_ci if (phi->opc != OPC_META_PHI) 432bf215546Sopenharmony_ci break; 433bf215546Sopenharmony_ci if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) && 434bf215546Sopenharmony_ci !phi->srcs[pred_idx]->def) 435bf215546Sopenharmony_ci continue; 436bf215546Sopenharmony_ci src[j++] = phi->srcs[pred_idx]; 437bf215546Sopenharmony_ci } 438bf215546Sopenharmony_ci assert(j == phi_count); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci struct ir3_instruction *pcopy = 441bf215546Sopenharmony_ci ir3_instr_create(block, OPC_META_PARALLEL_COPY, phi_count, phi_count); 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci for (j = 0; j < phi_count; j++) { 444bf215546Sopenharmony_ci struct ir3_register *reg = __ssa_dst(pcopy); 445bf215546Sopenharmony_ci reg->flags |= src[j]->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 446bf215546Sopenharmony_ci reg->size = src[j]->size; 447bf215546Sopenharmony_ci reg->wrmask = src[j]->wrmask; 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci for (j = 0; j < phi_count; j++) { 451bf215546Sopenharmony_ci pcopy->srcs[pcopy->srcs_count++] = 452bf215546Sopenharmony_ci ir3_reg_clone(block->shader, src[j]); 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci j = 0; 456bf215546Sopenharmony_ci foreach_instr (phi, &succ->instr_list) { 457bf215546Sopenharmony_ci if (phi->opc != OPC_META_PHI) 458bf215546Sopenharmony_ci break; 459bf215546Sopenharmony_ci if ((phi->srcs[pred_idx]->flags & IR3_REG_SSA) && 460bf215546Sopenharmony_ci !phi->srcs[pred_idx]->def) 461bf215546Sopenharmony_ci continue; 462bf215546Sopenharmony_ci phi->srcs[pred_idx]->def = pcopy->dsts[j]; 463bf215546Sopenharmony_ci phi->srcs[pred_idx]->flags = pcopy->dsts[j]->flags; 464bf215546Sopenharmony_ci j++; 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci assert(j == phi_count); 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci} 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_civoid 471bf215546Sopenharmony_ciir3_create_parallel_copies(struct ir3 *ir) 472bf215546Sopenharmony_ci{ 473bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 474bf215546Sopenharmony_ci create_parallel_copy(block); 475bf215546Sopenharmony_ci } 476bf215546Sopenharmony_ci} 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_cistatic void 479bf215546Sopenharmony_ciindex_merge_sets(struct ir3_liveness *live, struct ir3 *ir) 480bf215546Sopenharmony_ci{ 481bf215546Sopenharmony_ci unsigned offset = 0; 482bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 483bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 484bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->dsts_count; i++) { 485bf215546Sopenharmony_ci struct ir3_register *dst = instr->dsts[i]; 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci unsigned dst_offset; 488bf215546Sopenharmony_ci struct ir3_merge_set *merge_set = dst->merge_set; 489bf215546Sopenharmony_ci unsigned size = reg_size(dst); 490bf215546Sopenharmony_ci if (merge_set) { 491bf215546Sopenharmony_ci if (merge_set->interval_start == ~0) { 492bf215546Sopenharmony_ci merge_set->interval_start = offset; 493bf215546Sopenharmony_ci offset += merge_set->size; 494bf215546Sopenharmony_ci } 495bf215546Sopenharmony_ci dst_offset = merge_set->interval_start + dst->merge_set_offset; 496bf215546Sopenharmony_ci } else { 497bf215546Sopenharmony_ci dst_offset = offset; 498bf215546Sopenharmony_ci offset += size; 499bf215546Sopenharmony_ci } 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci dst->interval_start = dst_offset; 502bf215546Sopenharmony_ci dst->interval_end = dst_offset + size; 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci } 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci live->interval_offset = offset; 508bf215546Sopenharmony_ci} 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci#define RESET "\x1b[0m" 511bf215546Sopenharmony_ci#define BLUE "\x1b[0;34m" 512bf215546Sopenharmony_ci#define SYN_SSA(x) BLUE x RESET 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_cistatic void 515bf215546Sopenharmony_cidump_merge_sets(struct ir3 *ir) 516bf215546Sopenharmony_ci{ 517bf215546Sopenharmony_ci d("merge sets:"); 518bf215546Sopenharmony_ci struct set *merge_sets = _mesa_pointer_set_create(NULL); 519bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 520bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 521bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->dsts_count; i++) { 522bf215546Sopenharmony_ci struct ir3_register *dst = instr->dsts[i]; 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci struct ir3_merge_set *merge_set = dst->merge_set; 525bf215546Sopenharmony_ci if (!merge_set || _mesa_set_search(merge_sets, merge_set)) 526bf215546Sopenharmony_ci continue; 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci d("merge set, size %u, align %u:", merge_set->size, 529bf215546Sopenharmony_ci merge_set->alignment); 530bf215546Sopenharmony_ci for (unsigned j = 0; j < merge_set->regs_count; j++) { 531bf215546Sopenharmony_ci struct ir3_register *reg = merge_set->regs[j]; 532bf215546Sopenharmony_ci d("\t" SYN_SSA("ssa_%u") ":%u, offset %u", 533bf215546Sopenharmony_ci reg->instr->serialno, reg->name, reg->merge_set_offset); 534bf215546Sopenharmony_ci } 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci _mesa_set_add(merge_sets, merge_set); 537bf215546Sopenharmony_ci } 538bf215546Sopenharmony_ci } 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_ci ralloc_free(merge_sets); 542bf215546Sopenharmony_ci} 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_civoid 545bf215546Sopenharmony_ciir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir) 546bf215546Sopenharmony_ci{ 547bf215546Sopenharmony_ci index_instrs(ir3_start_block(ir), 0); 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci /* First pass: coalesce phis, which must be together. */ 550bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 551bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 552bf215546Sopenharmony_ci if (instr->opc != OPC_META_PHI) 553bf215546Sopenharmony_ci break; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci coalesce_phi(live, instr); 556bf215546Sopenharmony_ci } 557bf215546Sopenharmony_ci } 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci /* Second pass: aggressively coalesce parallelcopy, split, collect */ 560bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 561bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 562bf215546Sopenharmony_ci switch (instr->opc) { 563bf215546Sopenharmony_ci case OPC_META_SPLIT: 564bf215546Sopenharmony_ci aggressive_coalesce_split(live, instr); 565bf215546Sopenharmony_ci break; 566bf215546Sopenharmony_ci case OPC_META_COLLECT: 567bf215546Sopenharmony_ci aggressive_coalesce_collect(live, instr); 568bf215546Sopenharmony_ci break; 569bf215546Sopenharmony_ci case OPC_META_PARALLEL_COPY: 570bf215546Sopenharmony_ci aggressive_coalesce_parallel_copy(live, instr); 571bf215546Sopenharmony_ci break; 572bf215546Sopenharmony_ci default: 573bf215546Sopenharmony_ci break; 574bf215546Sopenharmony_ci } 575bf215546Sopenharmony_ci } 576bf215546Sopenharmony_ci } 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci index_merge_sets(live, ir); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci if (ir3_shader_debug & IR3_DBG_RAMSGS) 581bf215546Sopenharmony_ci dump_merge_sets(ir); 582bf215546Sopenharmony_ci} 583