1/* 2 * Copyright © 2012 Intel Corporation 3 * Copyright © 2016 Broadcom 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25#define MAX_INSTRUCTION (1 << 30) 26 27#include "util/ralloc.h" 28#include "util/register_allocate.h" 29#include "v3d_compiler.h" 30 31/* Keeps track of conditional / partial writes in a block */ 32struct partial_update_state { 33 /* Instruction doing a conditional or partial write */ 34 struct qinst *inst; 35 /* Instruction that set the flags for the conditional write */ 36 struct qinst *flags_inst; 37}; 38 39static int 40vir_reg_to_var(struct qreg reg) 41{ 42 if (reg.file == QFILE_TEMP) 43 return reg.index; 44 45 return -1; 46} 47 48static void 49vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip, 50 struct partial_update_state *partial_update_ht, struct qinst *inst, 51 struct qreg src, struct qinst *flags_inst) 52{ 53 int var = vir_reg_to_var(src); 54 if (var == -1) 55 return; 56 57 c->temp_start[var] = MIN2(c->temp_start[var], ip); 58 c->temp_end[var] = MAX2(c->temp_end[var], ip); 59 60 /* The use[] bitset marks when the block makes 61 * use of a variable without having completely 62 * defined that variable within the block. 63 */ 64 if (!BITSET_TEST(block->def, var)) { 65 /* If this use of var is conditional and the condition 66 * and flags match those of a previous instruction 67 * in the same block partially defining var then we 68 * consider var completely defined within the block. 69 */ 70 if (BITSET_TEST(block->defout, var)) { 71 struct partial_update_state *state = 72 &partial_update_ht[var]; 73 if (state->inst) { 74 if (vir_get_cond(inst) == vir_get_cond(state->inst) && 75 flags_inst == state->flags_inst) { 76 return; 77 } 78 } 79 } 80 81 BITSET_SET(block->use, var); 82 } 83} 84 85/* The def[] bitset marks when an initialization in a 86 * block completely screens off previous updates of 87 * that variable. 88 */ 89static void 90vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip, 91 struct partial_update_state *partial_update, struct qinst *inst, 92 struct qinst *flags_inst) 93{ 94 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU) 95 return; 96 97 int var = vir_reg_to_var(inst->dst); 98 if (var == -1) 99 return; 100 101 c->temp_start[var] = MIN2(c->temp_start[var], ip); 102 c->temp_end[var] = MAX2(c->temp_end[var], ip); 103 104 /* Mark the block as having a (partial) def of the var. */ 105 BITSET_SET(block->defout, var); 106 107 /* If we've already tracked this as a def that screens off previous 108 * uses, or already used it within the block, there's nothing to do. 109 */ 110 if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var)) 111 return; 112 113 /* Easy, common case: unconditional full register update.*/ 114 if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE && 115 inst->qpu.flags.mc == V3D_QPU_COND_NONE) && 116 inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE && 117 inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) { 118 BITSET_SET(block->def, var); 119 return; 120 } 121 122 /* Keep track of conditional writes. 123 * 124 * Notice that the dst's live range for a conditional or partial writes 125 * will get extended up the control flow to the top of the program until 126 * we find a full write, making register allocation more difficult, so 127 * we should try our best to keep track of these and figure out if a 128 * combination of them actually writes the entire register so we can 129 * stop that process early and reduce liveness. 130 * 131 * FIXME: Track partial updates via pack/unpack. 132 */ 133 struct partial_update_state *state = &partial_update[var]; 134 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 135 inst->qpu.flags.mc != V3D_QPU_COND_NONE) { 136 state->inst = inst; 137 state->flags_inst = flags_inst; 138 } 139} 140 141/* Sets up the def/use arrays for when variables are used-before-defined or 142 * defined-before-used in the block. 143 * 144 * Also initializes the temp_start/temp_end to cover just the instruction IPs 145 * where the variable is used, which will be extended later in 146 * vir_compute_start_end(). 147 */ 148static void 149vir_setup_def_use(struct v3d_compile *c) 150{ 151 struct partial_update_state *partial_update = 152 rzalloc_array(c, struct partial_update_state, c->num_temps); 153 int ip = 0; 154 155 vir_for_each_block(block, c) { 156 block->start_ip = ip; 157 158 memset(partial_update, 0, 159 sizeof(struct partial_update_state) * c->num_temps); 160 161 struct qinst *flags_inst = NULL; 162 163 vir_for_each_inst(inst, block) { 164 for (int i = 0; i < vir_get_nsrc(inst); i++) { 165 vir_setup_use(c, block, ip, partial_update, 166 inst, inst->src[i], flags_inst); 167 } 168 169 vir_setup_def(c, block, ip, partial_update, 170 inst, flags_inst); 171 172 if (inst->qpu.flags.apf != V3D_QPU_PF_NONE || 173 inst->qpu.flags.mpf != V3D_QPU_PF_NONE) { 174 flags_inst = inst; 175 } 176 177 if (inst->qpu.flags.auf != V3D_QPU_UF_NONE || 178 inst->qpu.flags.muf != V3D_QPU_UF_NONE) { 179 flags_inst = NULL; 180 } 181 182 /* Payload registers: r0/1/2 contain W, centroid W, 183 * and Z at program start. Register allocation will 184 * force their nodes to R0/1/2. 185 */ 186 if (inst->src[0].file == QFILE_REG) { 187 switch (inst->src[0].index) { 188 case 0: 189 case 1: 190 case 2: 191 c->temp_start[inst->dst.index] = 0; 192 break; 193 } 194 } 195 196 ip++; 197 } 198 block->end_ip = ip; 199 } 200 201 ralloc_free(partial_update); 202} 203 204static bool 205vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words) 206{ 207 bool cont = false; 208 209 vir_for_each_block_rev(block, c) { 210 /* Update live_out: Any successor using the variable 211 * on entrance needs us to have the variable live on 212 * exit. 213 */ 214 vir_for_each_successor(succ, block) { 215 for (int i = 0; i < bitset_words; i++) { 216 BITSET_WORD new_live_out = (succ->live_in[i] & 217 ~block->live_out[i]); 218 if (new_live_out) { 219 block->live_out[i] |= new_live_out; 220 cont = true; 221 } 222 } 223 } 224 225 /* Update live_in */ 226 for (int i = 0; i < bitset_words; i++) { 227 BITSET_WORD new_live_in = (block->use[i] | 228 (block->live_out[i] & 229 ~block->def[i])); 230 if (new_live_in & ~block->live_in[i]) { 231 block->live_in[i] |= new_live_in; 232 cont = true; 233 } 234 } 235 } 236 237 return cont; 238} 239 240static bool 241vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words) 242{ 243 bool cont = false; 244 245 vir_for_each_block_rev(block, c) { 246 /* Propagate defin/defout down the successors to produce the 247 * union of blocks with a reachable (partial) definition of 248 * the var. 249 * 250 * This keeps a conditional first write to a reg from 251 * extending its lifetime back to the start of the program. 252 */ 253 vir_for_each_successor(succ, block) { 254 for (int i = 0; i < bitset_words; i++) { 255 BITSET_WORD new_def = (block->defout[i] & 256 ~succ->defin[i]); 257 succ->defin[i] |= new_def; 258 succ->defout[i] |= new_def; 259 cont |= new_def; 260 } 261 } 262 } 263 264 return cont; 265} 266 267/** 268 * Extend the start/end ranges for each variable to account for the 269 * new information calculated from control flow. 270 */ 271static void 272vir_compute_start_end(struct v3d_compile *c, int num_vars) 273{ 274 vir_for_each_block(block, c) { 275 for (int i = 0; i < num_vars; i++) { 276 if (BITSET_TEST(block->live_in, i) && 277 BITSET_TEST(block->defin, i)) { 278 c->temp_start[i] = MIN2(c->temp_start[i], 279 block->start_ip); 280 c->temp_end[i] = MAX2(c->temp_end[i], 281 block->start_ip); 282 } 283 284 if (BITSET_TEST(block->live_out, i) && 285 BITSET_TEST(block->defout, i)) { 286 c->temp_start[i] = MIN2(c->temp_start[i], 287 block->end_ip); 288 c->temp_end[i] = MAX2(c->temp_end[i], 289 block->end_ip); 290 } 291 } 292 } 293} 294 295void 296vir_calculate_live_intervals(struct v3d_compile *c) 297{ 298 int bitset_words = BITSET_WORDS(c->num_temps); 299 300 /* We may be called more than once if we've rearranged the program to 301 * try to get register allocation to succeed. 302 */ 303 if (c->temp_start) { 304 ralloc_free(c->temp_start); 305 ralloc_free(c->temp_end); 306 307 vir_for_each_block(block, c) { 308 ralloc_free(block->def); 309 ralloc_free(block->use); 310 ralloc_free(block->live_in); 311 ralloc_free(block->live_out); 312 } 313 } 314 315 c->temp_start = rzalloc_array(c, int, c->num_temps); 316 c->temp_end = rzalloc_array(c, int, c->num_temps); 317 318 for (int i = 0; i < c->num_temps; i++) { 319 c->temp_start[i] = MAX_INSTRUCTION; 320 c->temp_end[i] = -1; 321 } 322 323 vir_for_each_block(block, c) { 324 block->def = rzalloc_array(c, BITSET_WORD, bitset_words); 325 block->defin = rzalloc_array(c, BITSET_WORD, bitset_words); 326 block->defout = rzalloc_array(c, BITSET_WORD, bitset_words); 327 block->use = rzalloc_array(c, BITSET_WORD, bitset_words); 328 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words); 329 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words); 330 } 331 332 vir_setup_def_use(c); 333 334 while (vir_live_variables_dataflow(c, bitset_words)) 335 ; 336 337 while (vir_live_variables_defin_defout_dataflow(c, bitset_words)) 338 ; 339 340 vir_compute_start_end(c, c->num_temps); 341 342 c->live_intervals_valid = true; 343} 344