1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (c) 2017 Lima Project 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 12bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 13bf215546Sopenharmony_ci * of the Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "util/ralloc.h" 26bf215546Sopenharmony_ci#include "util/register_allocate.h" 27bf215546Sopenharmony_ci#include "util/u_debug.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "ppir.h" 30bf215546Sopenharmony_ci#include "lima_context.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#define PPIR_REG_COUNT (6 * 4) 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_cienum ppir_ra_reg_class { 35bf215546Sopenharmony_ci ppir_ra_reg_class_vec1, 36bf215546Sopenharmony_ci ppir_ra_reg_class_vec2, 37bf215546Sopenharmony_ci ppir_ra_reg_class_vec3, 38bf215546Sopenharmony_ci ppir_ra_reg_class_vec4, 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci /* 4 reg class for load/store instr regs: 41bf215546Sopenharmony_ci * load/store instr has no swizzle field, so the (virtual) register 42bf215546Sopenharmony_ci * must be allocated at the beginning of a (physical) register, 43bf215546Sopenharmony_ci */ 44bf215546Sopenharmony_ci ppir_ra_reg_class_head_vec1, 45bf215546Sopenharmony_ci ppir_ra_reg_class_head_vec2, 46bf215546Sopenharmony_ci ppir_ra_reg_class_head_vec3, 47bf215546Sopenharmony_ci ppir_ra_reg_class_head_vec4, 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci ppir_ra_reg_class_num, 50bf215546Sopenharmony_ci}; 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_cistruct ra_regs *ppir_regalloc_init(void *mem_ctx) 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false); 55bf215546Sopenharmony_ci if (!ret) 56bf215546Sopenharmony_ci return NULL; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci /* Classes for contiguous 1-4 channel groups anywhere within a register. */ 59bf215546Sopenharmony_ci struct ra_class *classes[ppir_ra_reg_class_num]; 60bf215546Sopenharmony_ci for (int i = 0; i < ppir_ra_reg_class_head_vec1; i++) { 61bf215546Sopenharmony_ci classes[i] = ra_alloc_contig_reg_class(ret, i + 1); 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci for (int j = 0; j < PPIR_REG_COUNT; j += 4) { 64bf215546Sopenharmony_ci for (int swiz = 0; swiz < (4 - i); swiz++) 65bf215546Sopenharmony_ci ra_class_add_reg(classes[i], j + swiz); 66bf215546Sopenharmony_ci } 67bf215546Sopenharmony_ci } 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci /* Classes for contiguous 1-4 channels with a start channel of .x */ 70bf215546Sopenharmony_ci for (int i = ppir_ra_reg_class_head_vec1; i < ppir_ra_reg_class_num; i++) { 71bf215546Sopenharmony_ci classes[i] = ra_alloc_contig_reg_class(ret, i - ppir_ra_reg_class_head_vec1 + 1); 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci for (int j = 0; j < PPIR_REG_COUNT; j += 4) 74bf215546Sopenharmony_ci ra_class_add_reg(classes[i], j); 75bf215546Sopenharmony_ci } 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci ra_set_finalize(ret, NULL); 78bf215546Sopenharmony_ci return ret; 79bf215546Sopenharmony_ci} 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_cistatic void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp) 82bf215546Sopenharmony_ci{ 83bf215546Sopenharmony_ci list_for_each_entry(ppir_block, block, &comp->block_list, list) { 84bf215546Sopenharmony_ci list_for_each_entry(ppir_node, node, &block->node_list, list) { 85bf215546Sopenharmony_ci if (!node->instr || node->op == ppir_op_const) 86bf215546Sopenharmony_ci continue; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci ppir_dest *dest = ppir_node_get_dest(node); 89bf215546Sopenharmony_ci if (dest) { 90bf215546Sopenharmony_ci ppir_reg *reg = NULL; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci if (dest->type == ppir_target_ssa) { 93bf215546Sopenharmony_ci reg = &dest->ssa; 94bf215546Sopenharmony_ci if (node->is_out) 95bf215546Sopenharmony_ci reg->out_reg = true; 96bf215546Sopenharmony_ci list_addtail(®->list, &comp->reg_list); 97bf215546Sopenharmony_ci comp->reg_num++; 98bf215546Sopenharmony_ci } 99bf215546Sopenharmony_ci } 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci } 102bf215546Sopenharmony_ci} 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_cistatic void ppir_regalloc_print_result(ppir_compiler *comp) 105bf215546Sopenharmony_ci{ 106bf215546Sopenharmony_ci printf("======ppir regalloc result======\n"); 107bf215546Sopenharmony_ci list_for_each_entry(ppir_block, block, &comp->block_list, list) { 108bf215546Sopenharmony_ci list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 109bf215546Sopenharmony_ci printf("%03d:", instr->index); 110bf215546Sopenharmony_ci for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 111bf215546Sopenharmony_ci ppir_node *node = instr->slots[i]; 112bf215546Sopenharmony_ci if (!node) 113bf215546Sopenharmony_ci continue; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci printf(" (%d|", node->index); 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci ppir_dest *dest = ppir_node_get_dest(node); 118bf215546Sopenharmony_ci if (dest) 119bf215546Sopenharmony_ci printf("%d", ppir_target_get_dest_reg_index(dest)); 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci printf("|"); 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci for (int i = 0; i < ppir_node_get_src_num(node); i++) { 124bf215546Sopenharmony_ci if (i) 125bf215546Sopenharmony_ci printf(" "); 126bf215546Sopenharmony_ci printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node, i))); 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci printf(")"); 130bf215546Sopenharmony_ci } 131bf215546Sopenharmony_ci printf("\n"); 132bf215546Sopenharmony_ci } 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci printf("--------------------------\n"); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci printf("======ppir output regs======\n"); 137bf215546Sopenharmony_ci for (int i = 0; i < ppir_output_num; i++) { 138bf215546Sopenharmony_ci if (comp->out_type_to_reg[i] != -1) 139bf215546Sopenharmony_ci printf("%s: $%d\n", ppir_output_type_to_str(i), 140bf215546Sopenharmony_ci (int)comp->out_type_to_reg[i]); 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci printf("--------------------------\n"); 143bf215546Sopenharmony_ci} 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_cistatic bool create_new_instr_after(ppir_block *block, ppir_instr *ref, 146bf215546Sopenharmony_ci ppir_node *node) 147bf215546Sopenharmony_ci{ 148bf215546Sopenharmony_ci ppir_instr *newinstr = ppir_instr_create(block); 149bf215546Sopenharmony_ci if (unlikely(!newinstr)) 150bf215546Sopenharmony_ci return false; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci list_del(&newinstr->list); 153bf215546Sopenharmony_ci list_add(&newinstr->list, &ref->list); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci if (!ppir_instr_insert_node(newinstr, node)) 156bf215546Sopenharmony_ci return false; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) { 159bf215546Sopenharmony_ci instr->seq++; 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci newinstr->seq = ref->seq+1; 162bf215546Sopenharmony_ci newinstr->scheduled = true; 163bf215546Sopenharmony_ci return true; 164bf215546Sopenharmony_ci} 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_cistatic bool create_new_instr_before(ppir_block *block, ppir_instr *ref, 167bf215546Sopenharmony_ci ppir_node *node) 168bf215546Sopenharmony_ci{ 169bf215546Sopenharmony_ci ppir_instr *newinstr = ppir_instr_create(block); 170bf215546Sopenharmony_ci if (unlikely(!newinstr)) 171bf215546Sopenharmony_ci return false; 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci list_del(&newinstr->list); 174bf215546Sopenharmony_ci list_addtail(&newinstr->list, &ref->list); 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci if (!ppir_instr_insert_node(newinstr, node)) 177bf215546Sopenharmony_ci return false; 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) { 180bf215546Sopenharmony_ci instr->seq++; 181bf215546Sopenharmony_ci } 182bf215546Sopenharmony_ci newinstr->seq = ref->seq-1; 183bf215546Sopenharmony_ci newinstr->scheduled = true; 184bf215546Sopenharmony_ci return true; 185bf215546Sopenharmony_ci} 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_cistatic bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block, 188bf215546Sopenharmony_ci ppir_node *node, ppir_src *src, 189bf215546Sopenharmony_ci ppir_node **fill_node) 190bf215546Sopenharmony_ci{ 191bf215546Sopenharmony_ci /* nodes might have multiple references to the same value. 192bf215546Sopenharmony_ci * avoid creating unnecessary loads for the same fill by 193bf215546Sopenharmony_ci * saving the node resulting from the temporary load */ 194bf215546Sopenharmony_ci if (*fill_node) 195bf215546Sopenharmony_ci goto update_src; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci int num_components = src->reg->num_components; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci /* alloc new node to load value */ 200bf215546Sopenharmony_ci ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0); 201bf215546Sopenharmony_ci if (!load_node) 202bf215546Sopenharmony_ci return false; 203bf215546Sopenharmony_ci list_addtail(&load_node->list, &node->list); 204bf215546Sopenharmony_ci comp->num_fills++; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci ppir_load_node *load = ppir_node_to_load(load_node); 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci load->index = -comp->prog->state.stack_size; /* index sizes are negative */ 209bf215546Sopenharmony_ci load->num_components = num_components; 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci ppir_dest *ld_dest = &load->dest; 212bf215546Sopenharmony_ci ld_dest->type = ppir_target_pipeline; 213bf215546Sopenharmony_ci ld_dest->pipeline = ppir_pipeline_reg_uniform; 214bf215546Sopenharmony_ci ld_dest->write_mask = u_bit_consecutive(0, num_components); 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci /* If the uniform slot is empty, we can insert the load_temp 217bf215546Sopenharmony_ci * there and use it directly. Exceptionally, if the node is in the 218bf215546Sopenharmony_ci * varying or texld slot, this doesn't work. */ 219bf215546Sopenharmony_ci if (!node->instr->slots[PPIR_INSTR_SLOT_UNIFORM] && 220bf215546Sopenharmony_ci node->instr_pos != PPIR_INSTR_SLOT_VARYING && 221bf215546Sopenharmony_ci node->instr_pos != PPIR_INSTR_SLOT_TEXLD) { 222bf215546Sopenharmony_ci ppir_node_target_assign(src, load_node); 223bf215546Sopenharmony_ci *fill_node = load_node; 224bf215546Sopenharmony_ci return ppir_instr_insert_node(node->instr, load_node); 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci /* Uniform slot was taken, so fall back to a new instruction with a mov */ 228bf215546Sopenharmony_ci if (!create_new_instr_before(block, node->instr, load_node)) 229bf215546Sopenharmony_ci return false; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci /* Create move node */ 232bf215546Sopenharmony_ci ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0); 233bf215546Sopenharmony_ci if (unlikely(!move_node)) 234bf215546Sopenharmony_ci return false; 235bf215546Sopenharmony_ci list_addtail(&move_node->list, &node->list); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci ppir_alu_node *move_alu = ppir_node_to_alu(move_node); 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci move_alu->num_src = 1; 240bf215546Sopenharmony_ci move_alu->src->type = ppir_target_pipeline; 241bf215546Sopenharmony_ci move_alu->src->pipeline = ppir_pipeline_reg_uniform; 242bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) 243bf215546Sopenharmony_ci move_alu->src->swizzle[i] = i; 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci ppir_dest *alu_dest = &move_alu->dest; 246bf215546Sopenharmony_ci alu_dest->type = ppir_target_ssa; 247bf215546Sopenharmony_ci alu_dest->ssa.num_components = num_components; 248bf215546Sopenharmony_ci alu_dest->ssa.spilled = true; 249bf215546Sopenharmony_ci alu_dest->write_mask = u_bit_consecutive(0, num_components); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci list_addtail(&alu_dest->ssa.list, &comp->reg_list); 252bf215546Sopenharmony_ci comp->reg_num++; 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci if (!ppir_instr_insert_node(load_node->instr, move_node)) 255bf215546Sopenharmony_ci return false; 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci /* insert the new node as predecessor */ 258bf215546Sopenharmony_ci ppir_node_foreach_pred_safe(node, dep) { 259bf215546Sopenharmony_ci ppir_node *pred = dep->pred; 260bf215546Sopenharmony_ci ppir_node_remove_dep(dep); 261bf215546Sopenharmony_ci ppir_node_add_dep(load_node, pred, ppir_dep_src); 262bf215546Sopenharmony_ci } 263bf215546Sopenharmony_ci ppir_node_add_dep(node, move_node, ppir_dep_src); 264bf215546Sopenharmony_ci ppir_node_add_dep(move_node, load_node, ppir_dep_src); 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci *fill_node = move_node; 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ciupdate_src: 269bf215546Sopenharmony_ci /* switch node src to use the fill node dest */ 270bf215546Sopenharmony_ci ppir_node_target_assign(src, *fill_node); 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci return true; 273bf215546Sopenharmony_ci} 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_cistatic bool ppir_update_spilled_dest_load(ppir_compiler *comp, ppir_block *block, 276bf215546Sopenharmony_ci ppir_node *node) 277bf215546Sopenharmony_ci{ 278bf215546Sopenharmony_ci ppir_dest *dest = ppir_node_get_dest(node); 279bf215546Sopenharmony_ci assert(dest != NULL); 280bf215546Sopenharmony_ci assert(dest->type == ppir_target_register); 281bf215546Sopenharmony_ci ppir_reg *reg = dest->reg; 282bf215546Sopenharmony_ci int num_components = reg->num_components; 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci /* alloc new node to load value */ 285bf215546Sopenharmony_ci ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0); 286bf215546Sopenharmony_ci if (!load_node) 287bf215546Sopenharmony_ci return NULL; 288bf215546Sopenharmony_ci list_addtail(&load_node->list, &node->list); 289bf215546Sopenharmony_ci comp->num_fills++; 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci ppir_load_node *load = ppir_node_to_load(load_node); 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci load->index = -comp->prog->state.stack_size; /* index sizes are negative */ 294bf215546Sopenharmony_ci load->num_components = num_components; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci load->dest.type = ppir_target_pipeline; 297bf215546Sopenharmony_ci load->dest.pipeline = ppir_pipeline_reg_uniform; 298bf215546Sopenharmony_ci load->dest.write_mask = u_bit_consecutive(0, num_components); 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci /* New instruction is needed since we're updating a dest register 301bf215546Sopenharmony_ci * and we can't write to the uniform pipeline reg */ 302bf215546Sopenharmony_ci if (!create_new_instr_before(block, node->instr, load_node)) 303bf215546Sopenharmony_ci return false; 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci /* Create move node */ 306bf215546Sopenharmony_ci ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0); 307bf215546Sopenharmony_ci if (unlikely(!move_node)) 308bf215546Sopenharmony_ci return false; 309bf215546Sopenharmony_ci list_addtail(&move_node->list, &node->list); 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci ppir_alu_node *move_alu = ppir_node_to_alu(move_node); 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci move_alu->num_src = 1; 314bf215546Sopenharmony_ci move_alu->src->type = ppir_target_pipeline; 315bf215546Sopenharmony_ci move_alu->src->pipeline = ppir_pipeline_reg_uniform; 316bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) 317bf215546Sopenharmony_ci move_alu->src->swizzle[i] = i; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci move_alu->dest.type = ppir_target_register; 320bf215546Sopenharmony_ci move_alu->dest.reg = reg; 321bf215546Sopenharmony_ci move_alu->dest.write_mask = u_bit_consecutive(0, num_components); 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci if (!ppir_instr_insert_node(load_node->instr, move_node)) 324bf215546Sopenharmony_ci return false; 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci ppir_node_foreach_pred_safe(node, dep) { 327bf215546Sopenharmony_ci ppir_node *pred = dep->pred; 328bf215546Sopenharmony_ci ppir_node_remove_dep(dep); 329bf215546Sopenharmony_ci ppir_node_add_dep(load_node, pred, ppir_dep_src); 330bf215546Sopenharmony_ci } 331bf215546Sopenharmony_ci ppir_node_add_dep(node, move_node, ppir_dep_src); 332bf215546Sopenharmony_ci ppir_node_add_dep(move_node, load_node, ppir_dep_src); 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci return true; 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cistatic bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block, 338bf215546Sopenharmony_ci ppir_node *node) 339bf215546Sopenharmony_ci{ 340bf215546Sopenharmony_ci ppir_dest *dest = ppir_node_get_dest(node); 341bf215546Sopenharmony_ci assert(dest != NULL); 342bf215546Sopenharmony_ci ppir_reg *reg = ppir_dest_get_reg(dest); 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci /* alloc new node to store value */ 345bf215546Sopenharmony_ci ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0); 346bf215546Sopenharmony_ci if (!store_node) 347bf215546Sopenharmony_ci return false; 348bf215546Sopenharmony_ci list_addtail(&store_node->list, &node->list); 349bf215546Sopenharmony_ci comp->num_spills++; 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci ppir_store_node *store = ppir_node_to_store(store_node); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci store->index = -comp->prog->state.stack_size; /* index sizes are negative */ 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci ppir_node_target_assign(&store->src, node); 356bf215546Sopenharmony_ci store->num_components = reg->num_components; 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci /* insert the new node as successor */ 359bf215546Sopenharmony_ci ppir_node_foreach_succ_safe(node, dep) { 360bf215546Sopenharmony_ci ppir_node *succ = dep->succ; 361bf215546Sopenharmony_ci ppir_node_remove_dep(dep); 362bf215546Sopenharmony_ci ppir_node_add_dep(succ, store_node, ppir_dep_src); 363bf215546Sopenharmony_ci } 364bf215546Sopenharmony_ci ppir_node_add_dep(store_node, node, ppir_dep_src); 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci /* If the store temp slot is empty, we can insert the store_temp 367bf215546Sopenharmony_ci * there and use it directly. Exceptionally, if the node is in the 368bf215546Sopenharmony_ci * combine slot, this doesn't work. */ 369bf215546Sopenharmony_ci if (!node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP] && 370bf215546Sopenharmony_ci node->instr_pos != PPIR_INSTR_SLOT_ALU_COMBINE) 371bf215546Sopenharmony_ci return ppir_instr_insert_node(node->instr, store_node); 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci /* Not possible to merge store, so fall back to a new instruction */ 374bf215546Sopenharmony_ci return create_new_instr_after(block, node->instr, store_node); 375bf215546Sopenharmony_ci} 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_cistatic bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen) 378bf215546Sopenharmony_ci{ 379bf215546Sopenharmony_ci list_for_each_entry(ppir_block, block, &comp->block_list, list) { 380bf215546Sopenharmony_ci list_for_each_entry(ppir_node, node, &block->node_list, list) { 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci ppir_dest *dest = ppir_node_get_dest(node); 383bf215546Sopenharmony_ci if (dest && ppir_dest_get_reg(dest) == chosen) { 384bf215546Sopenharmony_ci /* If dest is a register, it might be updating only some its 385bf215546Sopenharmony_ci * components, so need to load the existing value first */ 386bf215546Sopenharmony_ci if (dest->type == ppir_target_register) { 387bf215546Sopenharmony_ci if (!ppir_update_spilled_dest_load(comp, block, node)) 388bf215546Sopenharmony_ci return false; 389bf215546Sopenharmony_ci } 390bf215546Sopenharmony_ci if (!ppir_update_spilled_dest(comp, block, node)) 391bf215546Sopenharmony_ci return false; 392bf215546Sopenharmony_ci } 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci ppir_node *fill_node = NULL; 395bf215546Sopenharmony_ci /* nodes might have multiple references to the same value. 396bf215546Sopenharmony_ci * avoid creating unnecessary loads for the same fill by 397bf215546Sopenharmony_ci * saving the node resulting from the temporary load */ 398bf215546Sopenharmony_ci for (int i = 0; i < ppir_node_get_src_num(node); i++) { 399bf215546Sopenharmony_ci ppir_src *src = ppir_node_get_src(node, i); 400bf215546Sopenharmony_ci ppir_reg *reg = ppir_src_get_reg(src); 401bf215546Sopenharmony_ci if (reg == chosen) { 402bf215546Sopenharmony_ci if (!ppir_update_spilled_src(comp, block, node, src, &fill_node)) 403bf215546Sopenharmony_ci return false; 404bf215546Sopenharmony_ci } 405bf215546Sopenharmony_ci } 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci } 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci return true; 410bf215546Sopenharmony_ci} 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_cistatic ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp, 413bf215546Sopenharmony_ci struct ra_graph *g) 414bf215546Sopenharmony_ci{ 415bf215546Sopenharmony_ci float spill_costs[comp->reg_num]; 416bf215546Sopenharmony_ci /* experimentally determined, it seems to be worth scaling cost of 417bf215546Sopenharmony_ci * regs in instructions that have used uniform/store_temp slots, 418bf215546Sopenharmony_ci * but not too much as to offset the num_components base cost. */ 419bf215546Sopenharmony_ci const float slot_scale = 1.1f; 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci memset(spill_costs, 0, sizeof(spill_costs[0]) * comp->reg_num); 422bf215546Sopenharmony_ci list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { 423bf215546Sopenharmony_ci if (reg->spilled) { 424bf215546Sopenharmony_ci /* not considered for spilling */ 425bf215546Sopenharmony_ci spill_costs[reg->regalloc_index] = 0.0f; 426bf215546Sopenharmony_ci continue; 427bf215546Sopenharmony_ci } 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci /* It is beneficial to spill registers with higher component number, 430bf215546Sopenharmony_ci * so increase the cost of spilling registers with few components */ 431bf215546Sopenharmony_ci float spill_cost = 4.0f / (float)reg->num_components; 432bf215546Sopenharmony_ci spill_costs[reg->regalloc_index] = spill_cost; 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci list_for_each_entry(ppir_block, block, &comp->block_list, list) { 436bf215546Sopenharmony_ci list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 437bf215546Sopenharmony_ci if (instr->slots[PPIR_INSTR_SLOT_UNIFORM]) { 438bf215546Sopenharmony_ci for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 439bf215546Sopenharmony_ci ppir_node *node = instr->slots[i]; 440bf215546Sopenharmony_ci if (!node) 441bf215546Sopenharmony_ci continue; 442bf215546Sopenharmony_ci for (int j = 0; j < ppir_node_get_src_num(node); j++) { 443bf215546Sopenharmony_ci ppir_src *src = ppir_node_get_src(node, j); 444bf215546Sopenharmony_ci if (!src) 445bf215546Sopenharmony_ci continue; 446bf215546Sopenharmony_ci ppir_reg *reg = ppir_src_get_reg(src); 447bf215546Sopenharmony_ci if (!reg) 448bf215546Sopenharmony_ci continue; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci spill_costs[reg->regalloc_index] *= slot_scale; 451bf215546Sopenharmony_ci } 452bf215546Sopenharmony_ci } 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci if (instr->slots[PPIR_INSTR_SLOT_STORE_TEMP]) { 455bf215546Sopenharmony_ci for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 456bf215546Sopenharmony_ci ppir_node *node = instr->slots[i]; 457bf215546Sopenharmony_ci if (!node) 458bf215546Sopenharmony_ci continue; 459bf215546Sopenharmony_ci ppir_dest *dest = ppir_node_get_dest(node); 460bf215546Sopenharmony_ci if (!dest) 461bf215546Sopenharmony_ci continue; 462bf215546Sopenharmony_ci ppir_reg *reg = ppir_dest_get_reg(dest); 463bf215546Sopenharmony_ci if (!reg) 464bf215546Sopenharmony_ci continue; 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci spill_costs[reg->regalloc_index] *= slot_scale; 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci } 469bf215546Sopenharmony_ci } 470bf215546Sopenharmony_ci } 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci for (int i = 0; i < comp->reg_num; i++) 473bf215546Sopenharmony_ci ra_set_node_spill_cost(g, i, spill_costs[i]); 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci int r = ra_get_best_spill_node(g); 476bf215546Sopenharmony_ci if (r == -1) 477bf215546Sopenharmony_ci return NULL; 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci ppir_reg *chosen = NULL; 480bf215546Sopenharmony_ci int i = 0; 481bf215546Sopenharmony_ci list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { 482bf215546Sopenharmony_ci if (i++ == r) { 483bf215546Sopenharmony_ci chosen = reg; 484bf215546Sopenharmony_ci break; 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci } 487bf215546Sopenharmony_ci assert(chosen); 488bf215546Sopenharmony_ci chosen->spilled = true; 489bf215546Sopenharmony_ci chosen->is_head = true; /* store_temp unable to do swizzle */ 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci return chosen; 492bf215546Sopenharmony_ci} 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_cistatic void ppir_regalloc_reset_liveness_info(ppir_compiler *comp) 495bf215546Sopenharmony_ci{ 496bf215546Sopenharmony_ci int idx = 0; 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { 499bf215546Sopenharmony_ci reg->regalloc_index = idx++; 500bf215546Sopenharmony_ci } 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci list_for_each_entry(ppir_block, block, &comp->block_list, list) { 503bf215546Sopenharmony_ci list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci if (instr->live_mask) 506bf215546Sopenharmony_ci ralloc_free(instr->live_mask); 507bf215546Sopenharmony_ci instr->live_mask = rzalloc_array(comp, uint8_t, 508bf215546Sopenharmony_ci reg_mask_size(comp->reg_num)); 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci if (instr->live_set) 511bf215546Sopenharmony_ci ralloc_free(instr->live_set); 512bf215546Sopenharmony_ci instr->live_set = rzalloc_array(comp, BITSET_WORD, comp->reg_num); 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci if (instr->live_internal) 515bf215546Sopenharmony_ci ralloc_free(instr->live_internal); 516bf215546Sopenharmony_ci instr->live_internal = rzalloc_array(comp, BITSET_WORD, comp->reg_num); 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci } 519bf215546Sopenharmony_ci} 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_cistatic void ppir_all_interference(ppir_compiler *comp, struct ra_graph *g, 522bf215546Sopenharmony_ci BITSET_WORD *liveness) 523bf215546Sopenharmony_ci{ 524bf215546Sopenharmony_ci int i, j; 525bf215546Sopenharmony_ci BITSET_FOREACH_SET(i, liveness, comp->reg_num) { 526bf215546Sopenharmony_ci BITSET_FOREACH_SET(j, liveness, comp->reg_num) { 527bf215546Sopenharmony_ci ra_add_node_interference(g, i, j); 528bf215546Sopenharmony_ci } 529bf215546Sopenharmony_ci BITSET_CLEAR(liveness, i); 530bf215546Sopenharmony_ci } 531bf215546Sopenharmony_ci} 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ciint lima_ppir_force_spilling = 0; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_cistatic bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled) 536bf215546Sopenharmony_ci{ 537bf215546Sopenharmony_ci ppir_regalloc_reset_liveness_info(comp); 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci struct ra_graph *g = ra_alloc_interference_graph( 540bf215546Sopenharmony_ci comp->ra, comp->reg_num); 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci int n = 0; 543bf215546Sopenharmony_ci list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { 544bf215546Sopenharmony_ci int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1); 545bf215546Sopenharmony_ci if (reg->is_head) 546bf215546Sopenharmony_ci c += 4; 547bf215546Sopenharmony_ci ra_set_node_class(g, n++, ra_get_class_from_index(comp->ra, c)); 548bf215546Sopenharmony_ci } 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci ppir_liveness_analysis(comp); 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci list_for_each_entry(ppir_block, block, &comp->block_list, list) { 553bf215546Sopenharmony_ci list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 554bf215546Sopenharmony_ci int i; 555bf215546Sopenharmony_ci BITSET_FOREACH_SET(i, instr->live_internal, comp->reg_num) { 556bf215546Sopenharmony_ci BITSET_SET(instr->live_set, i); 557bf215546Sopenharmony_ci } 558bf215546Sopenharmony_ci ppir_all_interference(comp, g, instr->live_set); 559bf215546Sopenharmony_ci } 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci *spilled = false; 563bf215546Sopenharmony_ci bool ok = ra_allocate(g); 564bf215546Sopenharmony_ci if (!ok || (comp->force_spilling-- > 0)) { 565bf215546Sopenharmony_ci ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g); 566bf215546Sopenharmony_ci if (chosen) { 567bf215546Sopenharmony_ci /* stack_size will be used to assemble the frame reg in lima_draw. 568bf215546Sopenharmony_ci * It is also be used in the spilling code, as negative indices 569bf215546Sopenharmony_ci * starting from -1, to create stack addresses. */ 570bf215546Sopenharmony_ci comp->prog->state.stack_size++; 571bf215546Sopenharmony_ci if (!ppir_regalloc_spill_reg(comp, chosen)) 572bf215546Sopenharmony_ci goto err_out; 573bf215546Sopenharmony_ci /* Ask the outer loop to call back in. */ 574bf215546Sopenharmony_ci *spilled = true; 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci ppir_debug("spilled register %d/%d, num_components: %d\n", 577bf215546Sopenharmony_ci chosen->regalloc_index, comp->reg_num, 578bf215546Sopenharmony_ci chosen->num_components); 579bf215546Sopenharmony_ci goto err_out; 580bf215546Sopenharmony_ci } 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci ppir_error("regalloc fail\n"); 583bf215546Sopenharmony_ci goto err_out; 584bf215546Sopenharmony_ci } 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci n = 0; 587bf215546Sopenharmony_ci list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { 588bf215546Sopenharmony_ci reg->index = ra_get_node_reg(g, n++); 589bf215546Sopenharmony_ci if (reg->out_reg) { 590bf215546Sopenharmony_ci /* We need actual reg number, we don't have swizzle for output regs */ 591bf215546Sopenharmony_ci assert(!(reg->index & 0x3) && "ppir: output regs don't have swizzle"); 592bf215546Sopenharmony_ci comp->out_type_to_reg[reg->out_type] = reg->index / 4; 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci } 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci ralloc_free(g); 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci if (lima_debug & LIMA_DEBUG_PP) 599bf215546Sopenharmony_ci ppir_regalloc_print_result(comp); 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci return true; 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_cierr_out: 604bf215546Sopenharmony_ci ralloc_free(g); 605bf215546Sopenharmony_ci return false; 606bf215546Sopenharmony_ci} 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_cibool ppir_regalloc_prog(ppir_compiler *comp) 609bf215546Sopenharmony_ci{ 610bf215546Sopenharmony_ci bool spilled = false; 611bf215546Sopenharmony_ci comp->prog->state.stack_size = 0; 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci /* Set from an environment variable to force spilling 614bf215546Sopenharmony_ci * for debugging purposes, see lima_screen.c */ 615bf215546Sopenharmony_ci comp->force_spilling = lima_ppir_force_spilling; 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci ppir_regalloc_update_reglist_ssa(comp); 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci /* No registers? Probably shader consists of discard instruction */ 620bf215546Sopenharmony_ci if (list_is_empty(&comp->reg_list)) { 621bf215546Sopenharmony_ci comp->prog->state.frag_color0_reg = 0; 622bf215546Sopenharmony_ci comp->prog->state.frag_color1_reg = -1; 623bf215546Sopenharmony_ci comp->prog->state.frag_depth_reg = -1; 624bf215546Sopenharmony_ci return true; 625bf215546Sopenharmony_ci } 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci /* this will most likely succeed in the first 628bf215546Sopenharmony_ci * try, except for very complicated shaders */ 629bf215546Sopenharmony_ci while (!ppir_regalloc_prog_try(comp, &spilled)) 630bf215546Sopenharmony_ci if (!spilled) 631bf215546Sopenharmony_ci return false; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci comp->prog->state.frag_color0_reg = 634bf215546Sopenharmony_ci comp->out_type_to_reg[ppir_output_color0]; 635bf215546Sopenharmony_ci comp->prog->state.frag_color1_reg = 636bf215546Sopenharmony_ci comp->out_type_to_reg[ppir_output_color1]; 637bf215546Sopenharmony_ci comp->prog->state.frag_depth_reg = 638bf215546Sopenharmony_ci comp->out_type_to_reg[ppir_output_depth]; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci return true; 641bf215546Sopenharmony_ci} 642