1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2012 Intel Corporation 3bf215546Sopenharmony_ci * Copyright © 2016 Broadcom 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22bf215546Sopenharmony_ci * IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#define MAX_INSTRUCTION (1 << 30) 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/ralloc.h" 28bf215546Sopenharmony_ci#include "util/register_allocate.h" 29bf215546Sopenharmony_ci#include "v3d_compiler.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci/* Keeps track of conditional / partial writes in a block */ 32bf215546Sopenharmony_cistruct partial_update_state { 33bf215546Sopenharmony_ci /* Instruction doing a conditional or partial write */ 34bf215546Sopenharmony_ci struct qinst *inst; 35bf215546Sopenharmony_ci /* Instruction that set the flags for the conditional write */ 36bf215546Sopenharmony_ci struct qinst *flags_inst; 37bf215546Sopenharmony_ci}; 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cistatic int 40bf215546Sopenharmony_civir_reg_to_var(struct qreg reg) 41bf215546Sopenharmony_ci{ 42bf215546Sopenharmony_ci if (reg.file == QFILE_TEMP) 43bf215546Sopenharmony_ci return reg.index; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci return -1; 46bf215546Sopenharmony_ci} 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistatic void 49bf215546Sopenharmony_civir_setup_use(struct v3d_compile *c, struct qblock *block, int ip, 50bf215546Sopenharmony_ci struct partial_update_state *partial_update_ht, struct qinst *inst, 51bf215546Sopenharmony_ci struct qreg src, struct qinst *flags_inst) 52bf215546Sopenharmony_ci{ 53bf215546Sopenharmony_ci int var = vir_reg_to_var(src); 54bf215546Sopenharmony_ci if (var == -1) 55bf215546Sopenharmony_ci return; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci c->temp_start[var] = MIN2(c->temp_start[var], ip); 58bf215546Sopenharmony_ci c->temp_end[var] = MAX2(c->temp_end[var], ip); 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci /* The use[] bitset marks when the block makes 61bf215546Sopenharmony_ci * use of a variable without having completely 62bf215546Sopenharmony_ci * defined that variable within the block. 63bf215546Sopenharmony_ci */ 64bf215546Sopenharmony_ci if (!BITSET_TEST(block->def, var)) { 65bf215546Sopenharmony_ci /* If this use of var is conditional and the condition 66bf215546Sopenharmony_ci * and flags match those of a previous instruction 67bf215546Sopenharmony_ci * in the same block partially defining var then we 68bf215546Sopenharmony_ci * consider var completely defined within the block. 69bf215546Sopenharmony_ci */ 70bf215546Sopenharmony_ci if (BITSET_TEST(block->defout, var)) { 71bf215546Sopenharmony_ci struct partial_update_state *state = 72bf215546Sopenharmony_ci &partial_update_ht[var]; 73bf215546Sopenharmony_ci if (state->inst) { 74bf215546Sopenharmony_ci if (vir_get_cond(inst) == vir_get_cond(state->inst) && 75bf215546Sopenharmony_ci flags_inst == state->flags_inst) { 76bf215546Sopenharmony_ci return; 77bf215546Sopenharmony_ci } 78bf215546Sopenharmony_ci } 79bf215546Sopenharmony_ci } 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci BITSET_SET(block->use, var); 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci} 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci/* The def[] bitset marks when an initialization in a 86bf215546Sopenharmony_ci * block completely screens off previous updates of 87bf215546Sopenharmony_ci * that variable. 88bf215546Sopenharmony_ci */ 89bf215546Sopenharmony_cistatic void 90bf215546Sopenharmony_civir_setup_def(struct v3d_compile *c, struct qblock *block, int ip, 91bf215546Sopenharmony_ci struct partial_update_state *partial_update, struct qinst *inst, 92bf215546Sopenharmony_ci struct qinst *flags_inst) 93bf215546Sopenharmony_ci{ 94bf215546Sopenharmony_ci if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU) 95bf215546Sopenharmony_ci return; 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci int var = vir_reg_to_var(inst->dst); 98bf215546Sopenharmony_ci if (var == -1) 99bf215546Sopenharmony_ci return; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci c->temp_start[var] = MIN2(c->temp_start[var], ip); 102bf215546Sopenharmony_ci c->temp_end[var] = MAX2(c->temp_end[var], ip); 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci /* Mark the block as having a (partial) def of the var. */ 105bf215546Sopenharmony_ci BITSET_SET(block->defout, var); 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci /* If we've already tracked this as a def that screens off previous 108bf215546Sopenharmony_ci * uses, or already used it within the block, there's nothing to do. 109bf215546Sopenharmony_ci */ 110bf215546Sopenharmony_ci if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var)) 111bf215546Sopenharmony_ci return; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci /* Easy, common case: unconditional full register update.*/ 114bf215546Sopenharmony_ci if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE && 115bf215546Sopenharmony_ci inst->qpu.flags.mc == V3D_QPU_COND_NONE) && 116bf215546Sopenharmony_ci inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE && 117bf215546Sopenharmony_ci inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) { 118bf215546Sopenharmony_ci BITSET_SET(block->def, var); 119bf215546Sopenharmony_ci return; 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci /* Keep track of conditional writes. 123bf215546Sopenharmony_ci * 124bf215546Sopenharmony_ci * Notice that the dst's live range for a conditional or partial writes 125bf215546Sopenharmony_ci * will get extended up the control flow to the top of the program until 126bf215546Sopenharmony_ci * we find a full write, making register allocation more difficult, so 127bf215546Sopenharmony_ci * we should try our best to keep track of these and figure out if a 128bf215546Sopenharmony_ci * combination of them actually writes the entire register so we can 129bf215546Sopenharmony_ci * stop that process early and reduce liveness. 130bf215546Sopenharmony_ci * 131bf215546Sopenharmony_ci * FIXME: Track partial updates via pack/unpack. 132bf215546Sopenharmony_ci */ 133bf215546Sopenharmony_ci struct partial_update_state *state = &partial_update[var]; 134bf215546Sopenharmony_ci if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 135bf215546Sopenharmony_ci inst->qpu.flags.mc != V3D_QPU_COND_NONE) { 136bf215546Sopenharmony_ci state->inst = inst; 137bf215546Sopenharmony_ci state->flags_inst = flags_inst; 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci/* Sets up the def/use arrays for when variables are used-before-defined or 142bf215546Sopenharmony_ci * defined-before-used in the block. 143bf215546Sopenharmony_ci * 144bf215546Sopenharmony_ci * Also initializes the temp_start/temp_end to cover just the instruction IPs 145bf215546Sopenharmony_ci * where the variable is used, which will be extended later in 146bf215546Sopenharmony_ci * vir_compute_start_end(). 147bf215546Sopenharmony_ci */ 148bf215546Sopenharmony_cistatic void 149bf215546Sopenharmony_civir_setup_def_use(struct v3d_compile *c) 150bf215546Sopenharmony_ci{ 151bf215546Sopenharmony_ci struct partial_update_state *partial_update = 152bf215546Sopenharmony_ci rzalloc_array(c, struct partial_update_state, c->num_temps); 153bf215546Sopenharmony_ci int ip = 0; 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci vir_for_each_block(block, c) { 156bf215546Sopenharmony_ci block->start_ip = ip; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci memset(partial_update, 0, 159bf215546Sopenharmony_ci sizeof(struct partial_update_state) * c->num_temps); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci struct qinst *flags_inst = NULL; 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci vir_for_each_inst(inst, block) { 164bf215546Sopenharmony_ci for (int i = 0; i < vir_get_nsrc(inst); i++) { 165bf215546Sopenharmony_ci vir_setup_use(c, block, ip, partial_update, 166bf215546Sopenharmony_ci inst, inst->src[i], flags_inst); 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci vir_setup_def(c, block, ip, partial_update, 170bf215546Sopenharmony_ci inst, flags_inst); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci if (inst->qpu.flags.apf != V3D_QPU_PF_NONE || 173bf215546Sopenharmony_ci inst->qpu.flags.mpf != V3D_QPU_PF_NONE) { 174bf215546Sopenharmony_ci flags_inst = inst; 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci if (inst->qpu.flags.auf != V3D_QPU_UF_NONE || 178bf215546Sopenharmony_ci inst->qpu.flags.muf != V3D_QPU_UF_NONE) { 179bf215546Sopenharmony_ci flags_inst = NULL; 180bf215546Sopenharmony_ci } 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci /* Payload registers: r0/1/2 contain W, centroid W, 183bf215546Sopenharmony_ci * and Z at program start. Register allocation will 184bf215546Sopenharmony_ci * force their nodes to R0/1/2. 185bf215546Sopenharmony_ci */ 186bf215546Sopenharmony_ci if (inst->src[0].file == QFILE_REG) { 187bf215546Sopenharmony_ci switch (inst->src[0].index) { 188bf215546Sopenharmony_ci case 0: 189bf215546Sopenharmony_ci case 1: 190bf215546Sopenharmony_ci case 2: 191bf215546Sopenharmony_ci c->temp_start[inst->dst.index] = 0; 192bf215546Sopenharmony_ci break; 193bf215546Sopenharmony_ci } 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci ip++; 197bf215546Sopenharmony_ci } 198bf215546Sopenharmony_ci block->end_ip = ip; 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci ralloc_free(partial_update); 202bf215546Sopenharmony_ci} 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_cistatic bool 205bf215546Sopenharmony_civir_live_variables_dataflow(struct v3d_compile *c, int bitset_words) 206bf215546Sopenharmony_ci{ 207bf215546Sopenharmony_ci bool cont = false; 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci vir_for_each_block_rev(block, c) { 210bf215546Sopenharmony_ci /* Update live_out: Any successor using the variable 211bf215546Sopenharmony_ci * on entrance needs us to have the variable live on 212bf215546Sopenharmony_ci * exit. 213bf215546Sopenharmony_ci */ 214bf215546Sopenharmony_ci vir_for_each_successor(succ, block) { 215bf215546Sopenharmony_ci for (int i = 0; i < bitset_words; i++) { 216bf215546Sopenharmony_ci BITSET_WORD new_live_out = (succ->live_in[i] & 217bf215546Sopenharmony_ci ~block->live_out[i]); 218bf215546Sopenharmony_ci if (new_live_out) { 219bf215546Sopenharmony_ci block->live_out[i] |= new_live_out; 220bf215546Sopenharmony_ci cont = true; 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci } 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci /* Update live_in */ 226bf215546Sopenharmony_ci for (int i = 0; i < bitset_words; i++) { 227bf215546Sopenharmony_ci BITSET_WORD new_live_in = (block->use[i] | 228bf215546Sopenharmony_ci (block->live_out[i] & 229bf215546Sopenharmony_ci ~block->def[i])); 230bf215546Sopenharmony_ci if (new_live_in & ~block->live_in[i]) { 231bf215546Sopenharmony_ci block->live_in[i] |= new_live_in; 232bf215546Sopenharmony_ci cont = true; 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci return cont; 238bf215546Sopenharmony_ci} 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_cistatic bool 241bf215546Sopenharmony_civir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words) 242bf215546Sopenharmony_ci{ 243bf215546Sopenharmony_ci bool cont = false; 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci vir_for_each_block_rev(block, c) { 246bf215546Sopenharmony_ci /* Propagate defin/defout down the successors to produce the 247bf215546Sopenharmony_ci * union of blocks with a reachable (partial) definition of 248bf215546Sopenharmony_ci * the var. 249bf215546Sopenharmony_ci * 250bf215546Sopenharmony_ci * This keeps a conditional first write to a reg from 251bf215546Sopenharmony_ci * extending its lifetime back to the start of the program. 252bf215546Sopenharmony_ci */ 253bf215546Sopenharmony_ci vir_for_each_successor(succ, block) { 254bf215546Sopenharmony_ci for (int i = 0; i < bitset_words; i++) { 255bf215546Sopenharmony_ci BITSET_WORD new_def = (block->defout[i] & 256bf215546Sopenharmony_ci ~succ->defin[i]); 257bf215546Sopenharmony_ci succ->defin[i] |= new_def; 258bf215546Sopenharmony_ci succ->defout[i] |= new_def; 259bf215546Sopenharmony_ci cont |= new_def; 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci } 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci return cont; 265bf215546Sopenharmony_ci} 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci/** 268bf215546Sopenharmony_ci * Extend the start/end ranges for each variable to account for the 269bf215546Sopenharmony_ci * new information calculated from control flow. 270bf215546Sopenharmony_ci */ 271bf215546Sopenharmony_cistatic void 272bf215546Sopenharmony_civir_compute_start_end(struct v3d_compile *c, int num_vars) 273bf215546Sopenharmony_ci{ 274bf215546Sopenharmony_ci vir_for_each_block(block, c) { 275bf215546Sopenharmony_ci for (int i = 0; i < num_vars; i++) { 276bf215546Sopenharmony_ci if (BITSET_TEST(block->live_in, i) && 277bf215546Sopenharmony_ci BITSET_TEST(block->defin, i)) { 278bf215546Sopenharmony_ci c->temp_start[i] = MIN2(c->temp_start[i], 279bf215546Sopenharmony_ci block->start_ip); 280bf215546Sopenharmony_ci c->temp_end[i] = MAX2(c->temp_end[i], 281bf215546Sopenharmony_ci block->start_ip); 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci if (BITSET_TEST(block->live_out, i) && 285bf215546Sopenharmony_ci BITSET_TEST(block->defout, i)) { 286bf215546Sopenharmony_ci c->temp_start[i] = MIN2(c->temp_start[i], 287bf215546Sopenharmony_ci block->end_ip); 288bf215546Sopenharmony_ci c->temp_end[i] = MAX2(c->temp_end[i], 289bf215546Sopenharmony_ci block->end_ip); 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci } 293bf215546Sopenharmony_ci} 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_civoid 296bf215546Sopenharmony_civir_calculate_live_intervals(struct v3d_compile *c) 297bf215546Sopenharmony_ci{ 298bf215546Sopenharmony_ci int bitset_words = BITSET_WORDS(c->num_temps); 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci /* We may be called more than once if we've rearranged the program to 301bf215546Sopenharmony_ci * try to get register allocation to succeed. 302bf215546Sopenharmony_ci */ 303bf215546Sopenharmony_ci if (c->temp_start) { 304bf215546Sopenharmony_ci ralloc_free(c->temp_start); 305bf215546Sopenharmony_ci ralloc_free(c->temp_end); 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci vir_for_each_block(block, c) { 308bf215546Sopenharmony_ci ralloc_free(block->def); 309bf215546Sopenharmony_ci ralloc_free(block->use); 310bf215546Sopenharmony_ci ralloc_free(block->live_in); 311bf215546Sopenharmony_ci ralloc_free(block->live_out); 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci c->temp_start = rzalloc_array(c, int, c->num_temps); 316bf215546Sopenharmony_ci c->temp_end = rzalloc_array(c, int, c->num_temps); 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci for (int i = 0; i < c->num_temps; i++) { 319bf215546Sopenharmony_ci c->temp_start[i] = MAX_INSTRUCTION; 320bf215546Sopenharmony_ci c->temp_end[i] = -1; 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci vir_for_each_block(block, c) { 324bf215546Sopenharmony_ci block->def = rzalloc_array(c, BITSET_WORD, bitset_words); 325bf215546Sopenharmony_ci block->defin = rzalloc_array(c, BITSET_WORD, bitset_words); 326bf215546Sopenharmony_ci block->defout = rzalloc_array(c, BITSET_WORD, bitset_words); 327bf215546Sopenharmony_ci block->use = rzalloc_array(c, BITSET_WORD, bitset_words); 328bf215546Sopenharmony_ci block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words); 329bf215546Sopenharmony_ci block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words); 330bf215546Sopenharmony_ci } 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci vir_setup_def_use(c); 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci while (vir_live_variables_dataflow(c, bitset_words)) 335bf215546Sopenharmony_ci ; 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci while (vir_live_variables_defin_defout_dataflow(c, bitset_words)) 338bf215546Sopenharmony_ci ; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci vir_compute_start_end(c, c->num_temps); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci c->live_intervals_valid = true; 343bf215546Sopenharmony_ci} 344