1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (c) 2017 Lima Project 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 12bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 13bf215546Sopenharmony_ci * of the Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "gpir.h" 26bf215546Sopenharmony_ci#include "util/u_dynarray.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* Per-register information */ 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_cistruct reg_info { 31bf215546Sopenharmony_ci BITSET_WORD *conflicts; 32bf215546Sopenharmony_ci struct util_dynarray conflict_list; 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci unsigned num_conflicts; 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci int assigned_color; 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci bool visited; 39bf215546Sopenharmony_ci}; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cistruct regalloc_ctx { 42bf215546Sopenharmony_ci unsigned bitset_words; 43bf215546Sopenharmony_ci struct reg_info *registers; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci /* Reusable scratch liveness array */ 46bf215546Sopenharmony_ci BITSET_WORD *live; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci unsigned *worklist; 49bf215546Sopenharmony_ci unsigned worklist_start, worklist_end; 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci unsigned *stack; 52bf215546Sopenharmony_ci unsigned stack_size; 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci gpir_compiler *comp; 55bf215546Sopenharmony_ci void *mem_ctx; 56bf215546Sopenharmony_ci}; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci/* Liveness analysis */ 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cistatic void propagate_liveness_node(gpir_node *node, BITSET_WORD *live, 61bf215546Sopenharmony_ci gpir_compiler *comp) 62bf215546Sopenharmony_ci{ 63bf215546Sopenharmony_ci /* KILL */ 64bf215546Sopenharmony_ci if (node->type == gpir_node_type_store) { 65bf215546Sopenharmony_ci if (node->op == gpir_op_store_reg) { 66bf215546Sopenharmony_ci gpir_store_node *store = gpir_node_to_store(node); 67bf215546Sopenharmony_ci BITSET_CLEAR(live, store->reg->index); 68bf215546Sopenharmony_ci } 69bf215546Sopenharmony_ci } 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci /* GEN */ 72bf215546Sopenharmony_ci if (node->type == gpir_node_type_load) { 73bf215546Sopenharmony_ci if (node->op == gpir_op_load_reg) { 74bf215546Sopenharmony_ci gpir_load_node *load = gpir_node_to_load(node); 75bf215546Sopenharmony_ci BITSET_SET(live, load->reg->index); 76bf215546Sopenharmony_ci } 77bf215546Sopenharmony_ci } 78bf215546Sopenharmony_ci} 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_cistatic bool propagate_liveness_block(gpir_block *block, struct regalloc_ctx *ctx) 81bf215546Sopenharmony_ci{ 82bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 83bf215546Sopenharmony_ci if (block->successors[i]) { 84bf215546Sopenharmony_ci for (unsigned j = 0; j < ctx->bitset_words; j++) 85bf215546Sopenharmony_ci block->live_out[j] |= block->successors[i]->live_in[j]; 86bf215546Sopenharmony_ci } 87bf215546Sopenharmony_ci } 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci memcpy(ctx->live, block->live_out, ctx->bitset_words * sizeof(BITSET_WORD)); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci list_for_each_entry_rev(gpir_node, node, &block->node_list, list) { 92bf215546Sopenharmony_ci propagate_liveness_node(node, ctx->live, block->comp); 93bf215546Sopenharmony_ci } 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci bool changed = false; 96bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->bitset_words; i++) { 97bf215546Sopenharmony_ci changed |= (block->live_in[i] != ctx->live[i]); 98bf215546Sopenharmony_ci block->live_in[i] = ctx->live[i]; 99bf215546Sopenharmony_ci } 100bf215546Sopenharmony_ci return changed; 101bf215546Sopenharmony_ci} 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_cistatic void calc_def_block(gpir_block *block) 104bf215546Sopenharmony_ci{ 105bf215546Sopenharmony_ci list_for_each_entry(gpir_node, node, &block->node_list, list) { 106bf215546Sopenharmony_ci if (node->op == gpir_op_store_reg) { 107bf215546Sopenharmony_ci gpir_store_node *store = gpir_node_to_store(node); 108bf215546Sopenharmony_ci BITSET_SET(block->def_out, store->reg->index); 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci} 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_cistatic void calc_liveness(struct regalloc_ctx *ctx) 114bf215546Sopenharmony_ci{ 115bf215546Sopenharmony_ci bool changed = true; 116bf215546Sopenharmony_ci while (changed) { 117bf215546Sopenharmony_ci changed = false; 118bf215546Sopenharmony_ci list_for_each_entry_rev(gpir_block, block, &ctx->comp->block_list, list) { 119bf215546Sopenharmony_ci changed |= propagate_liveness_block(block, ctx); 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci } 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) { 124bf215546Sopenharmony_ci calc_def_block(block); 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci changed = true; 128bf215546Sopenharmony_ci while (changed) { 129bf215546Sopenharmony_ci changed = false; 130bf215546Sopenharmony_ci list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) { 131bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 132bf215546Sopenharmony_ci gpir_block *succ = block->successors[i]; 133bf215546Sopenharmony_ci if (!succ) 134bf215546Sopenharmony_ci continue; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci for (unsigned j = 0; j < ctx->bitset_words; j++) { 137bf215546Sopenharmony_ci BITSET_WORD new = block->def_out[j] & ~succ->def_out[j]; 138bf215546Sopenharmony_ci changed |= (new != 0); 139bf215546Sopenharmony_ci succ->def_out[j] |= block->def_out[j]; 140bf215546Sopenharmony_ci } 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci } 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci} 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci/* Interference calculation */ 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_cistatic void add_interference(struct regalloc_ctx *ctx, unsigned i, unsigned j) 149bf215546Sopenharmony_ci{ 150bf215546Sopenharmony_ci if (i == j) 151bf215546Sopenharmony_ci return; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci struct reg_info *a = &ctx->registers[i]; 154bf215546Sopenharmony_ci struct reg_info *b = &ctx->registers[j]; 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci if (BITSET_TEST(a->conflicts, j)) 157bf215546Sopenharmony_ci return; 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci BITSET_SET(a->conflicts, j); 160bf215546Sopenharmony_ci BITSET_SET(b->conflicts, i); 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci a->num_conflicts++; 163bf215546Sopenharmony_ci b->num_conflicts++; 164bf215546Sopenharmony_ci util_dynarray_append(&a->conflict_list, unsigned, j); 165bf215546Sopenharmony_ci util_dynarray_append(&b->conflict_list, unsigned, i); 166bf215546Sopenharmony_ci} 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci/* Make the register or node "i" interfere with all the other live registers 169bf215546Sopenharmony_ci * and nodes. 170bf215546Sopenharmony_ci */ 171bf215546Sopenharmony_cistatic void add_all_interferences(struct regalloc_ctx *ctx, 172bf215546Sopenharmony_ci unsigned i, 173bf215546Sopenharmony_ci BITSET_WORD *live_regs) 174bf215546Sopenharmony_ci{ 175bf215546Sopenharmony_ci int live_reg; 176bf215546Sopenharmony_ci BITSET_FOREACH_SET(live_reg, ctx->live, ctx->comp->cur_reg) { 177bf215546Sopenharmony_ci add_interference(ctx, i, live_reg); 178bf215546Sopenharmony_ci } 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci} 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_cistatic void print_liveness(struct regalloc_ctx *ctx, 183bf215546Sopenharmony_ci BITSET_WORD *live_reg) 184bf215546Sopenharmony_ci{ 185bf215546Sopenharmony_ci if (!(lima_debug & LIMA_DEBUG_GP)) 186bf215546Sopenharmony_ci return; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci int live_idx; 189bf215546Sopenharmony_ci BITSET_FOREACH_SET(live_idx, live_reg, ctx->comp->cur_reg) { 190bf215546Sopenharmony_ci printf("reg%d ", live_idx); 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci printf("\n"); 193bf215546Sopenharmony_ci} 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_cistatic void calc_interference(struct regalloc_ctx *ctx) 196bf215546Sopenharmony_ci{ 197bf215546Sopenharmony_ci list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) { 198bf215546Sopenharmony_ci /* Initialize liveness at the end of the block, but exclude values that 199bf215546Sopenharmony_ci * definitely aren't defined by the end. This helps out with 200bf215546Sopenharmony_ci * partially-defined registers, like: 201bf215546Sopenharmony_ci * 202bf215546Sopenharmony_ci * if (condition) { 203bf215546Sopenharmony_ci * foo = ...; 204bf215546Sopenharmony_ci * } 205bf215546Sopenharmony_ci * if (condition) { 206bf215546Sopenharmony_ci * ... = foo; 207bf215546Sopenharmony_ci * } 208bf215546Sopenharmony_ci * 209bf215546Sopenharmony_ci * If we naively propagated liveness backwards, foo would be live from 210bf215546Sopenharmony_ci * the beginning of the program, but if we're not inside a loop then 211bf215546Sopenharmony_ci * its value is undefined before the first if and we don't have to 212bf215546Sopenharmony_ci * consider it live. Mask out registers like foo here. 213bf215546Sopenharmony_ci */ 214bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->bitset_words; i++) { 215bf215546Sopenharmony_ci ctx->live[i] = block->live_out[i] & block->def_out[i]; 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci list_for_each_entry_rev(gpir_node, node, &block->node_list, list) { 219bf215546Sopenharmony_ci gpir_debug("processing node %d\n", node->index); 220bf215546Sopenharmony_ci print_liveness(ctx, ctx->live); 221bf215546Sopenharmony_ci if (node->op == gpir_op_store_reg) { 222bf215546Sopenharmony_ci gpir_store_node *store = gpir_node_to_store(node); 223bf215546Sopenharmony_ci add_all_interferences(ctx, store->reg->index, ctx->live); 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci /* KILL */ 226bf215546Sopenharmony_ci BITSET_CLEAR(ctx->live, store->reg->index); 227bf215546Sopenharmony_ci } else if (node->op == gpir_op_load_reg) { 228bf215546Sopenharmony_ci /* GEN */ 229bf215546Sopenharmony_ci gpir_load_node *load = gpir_node_to_load(node); 230bf215546Sopenharmony_ci BITSET_SET(ctx->live, load->reg->index); 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci} 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci/* Register allocation */ 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_cistatic bool can_simplify(struct regalloc_ctx *ctx, unsigned i) 239bf215546Sopenharmony_ci{ 240bf215546Sopenharmony_ci struct reg_info *info = &ctx->registers[i]; 241bf215546Sopenharmony_ci return info->num_conflicts < GPIR_PHYSICAL_REG_NUM; 242bf215546Sopenharmony_ci} 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_cistatic void push_stack(struct regalloc_ctx *ctx, unsigned i) 245bf215546Sopenharmony_ci{ 246bf215546Sopenharmony_ci ctx->stack[ctx->stack_size++] = i; 247bf215546Sopenharmony_ci gpir_debug("pushing reg%u\n", i); 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci struct reg_info *info = &ctx->registers[i]; 250bf215546Sopenharmony_ci assert(info->visited); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci util_dynarray_foreach(&info->conflict_list, unsigned, conflict) { 253bf215546Sopenharmony_ci struct reg_info *conflict_info = &ctx->registers[*conflict]; 254bf215546Sopenharmony_ci assert(conflict_info->num_conflicts > 0); 255bf215546Sopenharmony_ci conflict_info->num_conflicts--; 256bf215546Sopenharmony_ci if (!ctx->registers[*conflict].visited && can_simplify(ctx, *conflict)) { 257bf215546Sopenharmony_ci ctx->worklist[ctx->worklist_end++] = *conflict; 258bf215546Sopenharmony_ci ctx->registers[*conflict].visited = true; 259bf215546Sopenharmony_ci } 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci} 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_cistatic bool do_regalloc(struct regalloc_ctx *ctx) 264bf215546Sopenharmony_ci{ 265bf215546Sopenharmony_ci ctx->worklist_start = 0; 266bf215546Sopenharmony_ci ctx->worklist_end = 0; 267bf215546Sopenharmony_ci ctx->stack_size = 0; 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci /* Step 1: find the initially simplifiable registers */ 270bf215546Sopenharmony_ci for (int i = 0; i < ctx->comp->cur_reg; i++) { 271bf215546Sopenharmony_ci if (can_simplify(ctx, i)) { 272bf215546Sopenharmony_ci ctx->worklist[ctx->worklist_end++] = i; 273bf215546Sopenharmony_ci ctx->registers[i].visited = true; 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci } 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci while (true) { 278bf215546Sopenharmony_ci /* Step 2: push onto the stack whatever we can */ 279bf215546Sopenharmony_ci while (ctx->worklist_start != ctx->worklist_end) { 280bf215546Sopenharmony_ci push_stack(ctx, ctx->worklist[ctx->worklist_start++]); 281bf215546Sopenharmony_ci } 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci if (ctx->stack_size < ctx->comp->cur_reg) { 284bf215546Sopenharmony_ci /* If there are still unsimplifiable nodes left, we need to 285bf215546Sopenharmony_ci * optimistically push a node onto the stack. Choose the one with 286bf215546Sopenharmony_ci * the smallest number of current neighbors, since that's the most 287bf215546Sopenharmony_ci * likely to succeed. 288bf215546Sopenharmony_ci */ 289bf215546Sopenharmony_ci unsigned min_conflicts = UINT_MAX; 290bf215546Sopenharmony_ci unsigned best_reg = 0; 291bf215546Sopenharmony_ci for (int reg = 0; reg < ctx->comp->cur_reg; reg++) { 292bf215546Sopenharmony_ci struct reg_info *info = &ctx->registers[reg]; 293bf215546Sopenharmony_ci if (info->visited) 294bf215546Sopenharmony_ci continue; 295bf215546Sopenharmony_ci if (info->num_conflicts < min_conflicts) { 296bf215546Sopenharmony_ci best_reg = reg; 297bf215546Sopenharmony_ci min_conflicts = info->num_conflicts; 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci } 300bf215546Sopenharmony_ci gpir_debug("optimistic triggered\n"); 301bf215546Sopenharmony_ci ctx->registers[best_reg].visited = true; 302bf215546Sopenharmony_ci push_stack(ctx, best_reg); 303bf215546Sopenharmony_ci } else { 304bf215546Sopenharmony_ci break; 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci } 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci /* Step 4: pop off the stack and assign colors */ 309bf215546Sopenharmony_ci for (int i = ctx->comp->cur_reg - 1; i >= 0; i--) { 310bf215546Sopenharmony_ci unsigned idx = ctx->stack[i]; 311bf215546Sopenharmony_ci struct reg_info *reg = &ctx->registers[idx]; 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci bool found = false; 314bf215546Sopenharmony_ci unsigned start = i % GPIR_PHYSICAL_REG_NUM; 315bf215546Sopenharmony_ci for (unsigned j = 0; j < GPIR_PHYSICAL_REG_NUM; j++) { 316bf215546Sopenharmony_ci unsigned candidate = (j + start) % GPIR_PHYSICAL_REG_NUM; 317bf215546Sopenharmony_ci bool available = true; 318bf215546Sopenharmony_ci util_dynarray_foreach(®->conflict_list, unsigned, conflict_idx) { 319bf215546Sopenharmony_ci struct reg_info *conflict = &ctx->registers[*conflict_idx]; 320bf215546Sopenharmony_ci if (conflict->assigned_color >= 0 && 321bf215546Sopenharmony_ci conflict->assigned_color == (int) candidate) { 322bf215546Sopenharmony_ci available = false; 323bf215546Sopenharmony_ci break; 324bf215546Sopenharmony_ci } 325bf215546Sopenharmony_ci } 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci if (available) { 328bf215546Sopenharmony_ci reg->assigned_color = candidate; 329bf215546Sopenharmony_ci found = true; 330bf215546Sopenharmony_ci break; 331bf215546Sopenharmony_ci } 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci /* TODO: spilling */ 335bf215546Sopenharmony_ci if (!found) { 336bf215546Sopenharmony_ci gpir_error("Failed to allocate registers\n"); 337bf215546Sopenharmony_ci return false; 338bf215546Sopenharmony_ci } 339bf215546Sopenharmony_ci } 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci return true; 342bf215546Sopenharmony_ci} 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_cistatic void assign_regs(struct regalloc_ctx *ctx) 345bf215546Sopenharmony_ci{ 346bf215546Sopenharmony_ci list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) { 347bf215546Sopenharmony_ci list_for_each_entry(gpir_node, node, &block->node_list, list) { 348bf215546Sopenharmony_ci if (node->op == gpir_op_load_reg) { 349bf215546Sopenharmony_ci gpir_load_node *load = gpir_node_to_load(node); 350bf215546Sopenharmony_ci unsigned color = ctx->registers[load->reg->index].assigned_color; 351bf215546Sopenharmony_ci load->index = color / 4; 352bf215546Sopenharmony_ci load->component = color % 4; 353bf215546Sopenharmony_ci } 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci if (node->op == gpir_op_store_reg) { 356bf215546Sopenharmony_ci gpir_store_node *store = gpir_node_to_store(node); 357bf215546Sopenharmony_ci unsigned color = ctx->registers[store->reg->index].assigned_color; 358bf215546Sopenharmony_ci store->index = color / 4; 359bf215546Sopenharmony_ci store->component = color % 4; 360bf215546Sopenharmony_ci node->value_reg = color; 361bf215546Sopenharmony_ci } 362bf215546Sopenharmony_ci } 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci block->live_out_phys = 0; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci int reg_idx; 367bf215546Sopenharmony_ci BITSET_FOREACH_SET(reg_idx, block->live_out, ctx->comp->cur_reg) { 368bf215546Sopenharmony_ci if (BITSET_TEST(block->def_out, reg_idx)) { 369bf215546Sopenharmony_ci block->live_out_phys |= (1ull << ctx->registers[reg_idx].assigned_color); 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci } 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci} 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci/* Value register allocation */ 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci/* Define a special token for when the register is occupied by a preallocated 378bf215546Sopenharmony_ci * physical register (i.e. load_reg/store_reg). Normally entries in the "live" 379bf215546Sopenharmony_ci * array points to the definition of the value, but there may be multiple 380bf215546Sopenharmony_ci * definitions in this case, and they will certainly come from other basic 381bf215546Sopenharmony_ci * blocks, so it doesn't make sense to do that here. 382bf215546Sopenharmony_ci */ 383bf215546Sopenharmony_cistatic gpir_node __physreg_live; 384bf215546Sopenharmony_ci#define PHYSREG_LIVE (&__physreg_live) 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_cistruct value_regalloc_ctx { 387bf215546Sopenharmony_ci gpir_node *last_written[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM]; 388bf215546Sopenharmony_ci gpir_node *complex1_last_written[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM]; 389bf215546Sopenharmony_ci gpir_node *live[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM]; 390bf215546Sopenharmony_ci gpir_node *last_complex1; 391bf215546Sopenharmony_ci unsigned alloc_start; 392bf215546Sopenharmony_ci}; 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_cistatic unsigned find_free_value_reg(struct value_regalloc_ctx *ctx) 395bf215546Sopenharmony_ci{ 396bf215546Sopenharmony_ci /* Implement round-robin allocation */ 397bf215546Sopenharmony_ci unsigned reg_offset = ctx->alloc_start++; 398bf215546Sopenharmony_ci if (ctx->alloc_start == GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM) 399bf215546Sopenharmony_ci ctx->alloc_start = 0; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci unsigned reg = UINT_MAX; 402bf215546Sopenharmony_ci for (unsigned reg_base = 0; 403bf215546Sopenharmony_ci reg_base < GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM; 404bf215546Sopenharmony_ci reg_base++) { 405bf215546Sopenharmony_ci unsigned cur_reg = (reg_base + reg_offset) % (GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM); 406bf215546Sopenharmony_ci if (!ctx->live[cur_reg]) { 407bf215546Sopenharmony_ci reg = cur_reg; 408bf215546Sopenharmony_ci break; 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci return reg; 413bf215546Sopenharmony_ci} 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_cistatic void add_fake_dep(gpir_node *node, gpir_node *src, 416bf215546Sopenharmony_ci struct value_regalloc_ctx *ctx) 417bf215546Sopenharmony_ci{ 418bf215546Sopenharmony_ci assert(src->value_reg >= 0); 419bf215546Sopenharmony_ci if (ctx->last_written[src->value_reg] && 420bf215546Sopenharmony_ci ctx->last_written[src->value_reg] != node) { 421bf215546Sopenharmony_ci gpir_node_add_dep(ctx->last_written[src->value_reg], node, 422bf215546Sopenharmony_ci GPIR_DEP_WRITE_AFTER_READ); 423bf215546Sopenharmony_ci } 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci /* For a sequence of schedule_first nodes right before a complex1 426bf215546Sopenharmony_ci * node, add any extra fake dependencies necessary so that the 427bf215546Sopenharmony_ci * schedule_first nodes can be scheduled right after the complex1 is 428bf215546Sopenharmony_ci * scheduled. We have to save the last_written before complex1 happens to 429bf215546Sopenharmony_ci * avoid adding dependencies to children of the complex1 node which would 430bf215546Sopenharmony_ci * create a cycle. 431bf215546Sopenharmony_ci */ 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci if (gpir_op_infos[node->op].schedule_first && 434bf215546Sopenharmony_ci ctx->last_complex1 && 435bf215546Sopenharmony_ci ctx->complex1_last_written[src->value_reg]) { 436bf215546Sopenharmony_ci gpir_node_add_dep(ctx->complex1_last_written[src->value_reg], 437bf215546Sopenharmony_ci ctx->last_complex1, 438bf215546Sopenharmony_ci GPIR_DEP_WRITE_AFTER_READ); 439bf215546Sopenharmony_ci } 440bf215546Sopenharmony_ci} 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_cistatic bool handle_value_read(gpir_node *node, gpir_node *src, 443bf215546Sopenharmony_ci struct value_regalloc_ctx *ctx) 444bf215546Sopenharmony_ci{ 445bf215546Sopenharmony_ci /* If already allocated, don't allocate it */ 446bf215546Sopenharmony_ci if (src->value_reg < 0) { 447bf215546Sopenharmony_ci unsigned reg = find_free_value_reg(ctx); 448bf215546Sopenharmony_ci if (reg == UINT_MAX) 449bf215546Sopenharmony_ci return false; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci src->value_reg = reg; 452bf215546Sopenharmony_ci ctx->live[reg] = src; 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci /* Add any fake dependencies. Note: this is the actual result of value 456bf215546Sopenharmony_ci * register allocation. We throw away node->value_reg afterwards, since 457bf215546Sopenharmony_ci * it's really the fake dependencies which constrain the post-RA scheduler 458bf215546Sopenharmony_ci * enough to make sure it never needs to spill to temporaries. 459bf215546Sopenharmony_ci */ 460bf215546Sopenharmony_ci add_fake_dep(node, src, ctx); 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci return true; 463bf215546Sopenharmony_ci} 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_cistatic bool handle_reg_read(gpir_load_node *load, 466bf215546Sopenharmony_ci struct value_regalloc_ctx *ctx) 467bf215546Sopenharmony_ci{ 468bf215546Sopenharmony_ci unsigned idx = load->index * 4 + load->component; 469bf215546Sopenharmony_ci if (!ctx->live[idx]) { 470bf215546Sopenharmony_ci ctx->live[idx] = PHYSREG_LIVE; 471bf215546Sopenharmony_ci } else if (ctx->live[idx] != PHYSREG_LIVE) { 472bf215546Sopenharmony_ci /* This slot is occupied by some other value register, so we need to 473bf215546Sopenharmony_ci * evict it. This effectively splits the live range of the value 474bf215546Sopenharmony_ci * register. NB: since we create fake dependencies on the fly, and the 475bf215546Sopenharmony_ci * fake dependencies are the only output of this pass, we don't actually 476bf215546Sopenharmony_ci * have to record where the split happened or that this value was 477bf215546Sopenharmony_ci * assigned to two different registers. Any actual live range splitting 478bf215546Sopenharmony_ci * happens in the post-RA scheduler, which moves the value to and from 479bf215546Sopenharmony_ci * the register file. This will just cause some reads of the value 480bf215546Sopenharmony_ci * register to have different fake dependencies. 481bf215546Sopenharmony_ci */ 482bf215546Sopenharmony_ci unsigned new_reg = find_free_value_reg(ctx); 483bf215546Sopenharmony_ci if (new_reg == UINT_MAX) 484bf215546Sopenharmony_ci return false; 485bf215546Sopenharmony_ci ctx->live[new_reg] = ctx->live[idx]; 486bf215546Sopenharmony_ci ctx->live[new_reg]->value_reg = new_reg; 487bf215546Sopenharmony_ci ctx->live[idx] = PHYSREG_LIVE; 488bf215546Sopenharmony_ci } 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci if (ctx->last_written[idx]) { 491bf215546Sopenharmony_ci gpir_node_add_dep(ctx->last_written[idx], &load->node, 492bf215546Sopenharmony_ci GPIR_DEP_WRITE_AFTER_READ); 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci return true; 496bf215546Sopenharmony_ci} 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_cistatic void handle_reg_write(gpir_store_node *store, 499bf215546Sopenharmony_ci struct value_regalloc_ctx *ctx) 500bf215546Sopenharmony_ci{ 501bf215546Sopenharmony_ci unsigned idx = store->index * 4 + store->component; 502bf215546Sopenharmony_ci store->node.value_reg = idx; 503bf215546Sopenharmony_ci ctx->last_written[idx] = &store->node; 504bf215546Sopenharmony_ci ctx->live[idx] = NULL; 505bf215546Sopenharmony_ci} 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_cistatic void handle_value_write(gpir_node *node, 508bf215546Sopenharmony_ci struct value_regalloc_ctx *ctx) 509bf215546Sopenharmony_ci{ 510bf215546Sopenharmony_ci /* TODO: why does an uninitialized node->value_reg 511bf215546Sopenharmony_ci * sometimes end up here? */ 512bf215546Sopenharmony_ci if (node->value_reg < 0) 513bf215546Sopenharmony_ci return; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci ctx->last_written[node->value_reg] = node; 516bf215546Sopenharmony_ci ctx->live[node->value_reg] = NULL; 517bf215546Sopenharmony_ci} 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_cistatic bool regalloc_value_regs(gpir_block *block) 520bf215546Sopenharmony_ci{ 521bf215546Sopenharmony_ci struct value_regalloc_ctx ctx = { { 0 } }; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci list_for_each_entry(gpir_node, node, &block->node_list, list) { 524bf215546Sopenharmony_ci node->value_reg = -1; 525bf215546Sopenharmony_ci } 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci list_for_each_entry_rev(gpir_node, node, &block->node_list, list) { 528bf215546Sopenharmony_ci if (node->op == gpir_op_complex1) { 529bf215546Sopenharmony_ci ctx.last_complex1 = node; 530bf215546Sopenharmony_ci memcpy(ctx.complex1_last_written, ctx.last_written, 531bf215546Sopenharmony_ci sizeof(ctx.complex1_last_written)); 532bf215546Sopenharmony_ci } 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci if (node->type != gpir_node_type_store && 535bf215546Sopenharmony_ci node->type != gpir_node_type_branch) { 536bf215546Sopenharmony_ci handle_value_write(node, &ctx); 537bf215546Sopenharmony_ci } else if (node->op == gpir_op_store_reg) { 538bf215546Sopenharmony_ci handle_reg_write(gpir_node_to_store(node), &ctx); 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_ci if (node->type == gpir_node_type_store) { 542bf215546Sopenharmony_ci gpir_store_node *store = gpir_node_to_store(node); 543bf215546Sopenharmony_ci if (!handle_value_read(&store->node, store->child, &ctx)) 544bf215546Sopenharmony_ci return false; 545bf215546Sopenharmony_ci } else if (node->type == gpir_node_type_alu) { 546bf215546Sopenharmony_ci gpir_alu_node *alu = gpir_node_to_alu(node); 547bf215546Sopenharmony_ci for (int i = 0; i < alu->num_child; i++) { 548bf215546Sopenharmony_ci if (!handle_value_read(&alu->node, alu->children[i], &ctx)) 549bf215546Sopenharmony_ci return false; 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci } else if (node->type == gpir_node_type_branch) { 552bf215546Sopenharmony_ci /* At the end of a block the top 11 values are always free, so 553bf215546Sopenharmony_ci * branches should always succeed. 554bf215546Sopenharmony_ci */ 555bf215546Sopenharmony_ci gpir_branch_node *branch = gpir_node_to_branch(node); 556bf215546Sopenharmony_ci ASSERTED bool result = handle_value_read(&branch->node, 557bf215546Sopenharmony_ci branch->cond, &ctx); 558bf215546Sopenharmony_ci assert(result); 559bf215546Sopenharmony_ci } else if (node->op == gpir_op_load_reg) { 560bf215546Sopenharmony_ci gpir_load_node *load = gpir_node_to_load(node); 561bf215546Sopenharmony_ci if (!handle_reg_read(load, &ctx)) 562bf215546Sopenharmony_ci return false; 563bf215546Sopenharmony_ci } 564bf215546Sopenharmony_ci } 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci return true; 567bf215546Sopenharmony_ci} 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_cistatic void regalloc_print_result(gpir_compiler *comp) 570bf215546Sopenharmony_ci{ 571bf215546Sopenharmony_ci if (!(lima_debug & LIMA_DEBUG_GP)) 572bf215546Sopenharmony_ci return; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci int index = 0; 575bf215546Sopenharmony_ci printf("======== regalloc ========\n"); 576bf215546Sopenharmony_ci list_for_each_entry(gpir_block, block, &comp->block_list, list) { 577bf215546Sopenharmony_ci list_for_each_entry(gpir_node, node, &block->node_list, list) { 578bf215546Sopenharmony_ci printf("%03d: %d/%d %s ", index++, node->index, node->value_reg, 579bf215546Sopenharmony_ci gpir_op_infos[node->op].name); 580bf215546Sopenharmony_ci gpir_node_foreach_pred(node, dep) { 581bf215546Sopenharmony_ci gpir_node *pred = dep->pred; 582bf215546Sopenharmony_ci printf(" %d/%d", pred->index, pred->value_reg); 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci if (node->op == gpir_op_load_reg) { 585bf215546Sopenharmony_ci gpir_load_node *load = gpir_node_to_load(node); 586bf215546Sopenharmony_ci printf(" -/%d", 4 * load->index + load->component); 587bf215546Sopenharmony_ci printf(" (%d)", load->reg->index); 588bf215546Sopenharmony_ci } else if (node->op == gpir_op_store_reg) { 589bf215546Sopenharmony_ci gpir_store_node *store = gpir_node_to_store(node); 590bf215546Sopenharmony_ci printf(" (%d)", store->reg->index); 591bf215546Sopenharmony_ci } 592bf215546Sopenharmony_ci printf("\n"); 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci printf("----------------------------\n"); 595bf215546Sopenharmony_ci } 596bf215546Sopenharmony_ci} 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_cibool gpir_regalloc_prog(gpir_compiler *comp) 599bf215546Sopenharmony_ci{ 600bf215546Sopenharmony_ci struct regalloc_ctx ctx; 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci ctx.mem_ctx = ralloc_context(NULL); 603bf215546Sopenharmony_ci ctx.bitset_words = BITSET_WORDS(comp->cur_reg); 604bf215546Sopenharmony_ci ctx.live = ralloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words); 605bf215546Sopenharmony_ci ctx.worklist = ralloc_array(ctx.mem_ctx, unsigned, comp->cur_reg); 606bf215546Sopenharmony_ci ctx.stack = ralloc_array(ctx.mem_ctx, unsigned, comp->cur_reg); 607bf215546Sopenharmony_ci ctx.comp = comp; 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci ctx.registers = rzalloc_array(ctx.mem_ctx, struct reg_info, comp->cur_reg); 610bf215546Sopenharmony_ci for (int i = 0; i < comp->cur_reg; i++) { 611bf215546Sopenharmony_ci ctx.registers[i].conflicts = rzalloc_array(ctx.mem_ctx, BITSET_WORD, 612bf215546Sopenharmony_ci ctx.bitset_words); 613bf215546Sopenharmony_ci util_dynarray_init(&ctx.registers[i].conflict_list, ctx.mem_ctx); 614bf215546Sopenharmony_ci } 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci list_for_each_entry(gpir_block, block, &comp->block_list, list) { 617bf215546Sopenharmony_ci block->live_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words); 618bf215546Sopenharmony_ci block->live_in = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words); 619bf215546Sopenharmony_ci block->def_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words); 620bf215546Sopenharmony_ci } 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci calc_liveness(&ctx); 623bf215546Sopenharmony_ci calc_interference(&ctx); 624bf215546Sopenharmony_ci if (!do_regalloc(&ctx)) { 625bf215546Sopenharmony_ci ralloc_free(ctx.mem_ctx); 626bf215546Sopenharmony_ci return false; 627bf215546Sopenharmony_ci } 628bf215546Sopenharmony_ci assign_regs(&ctx); 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci list_for_each_entry(gpir_block, block, &comp->block_list, list) { 631bf215546Sopenharmony_ci if (!regalloc_value_regs(block)) 632bf215546Sopenharmony_ci return false; 633bf215546Sopenharmony_ci } 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci regalloc_print_result(comp); 636bf215546Sopenharmony_ci ralloc_free(ctx.mem_ctx); 637bf215546Sopenharmony_ci return true; 638bf215546Sopenharmony_ci} 639