1/* 2 * Copyright (C) 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24/* This pass lowers array accesses to SSA. 25 * 26 * After this pass, instructions writing arrays implicitly read the contents of 27 * the array defined in instr->dsts[0]->def (possibly a phi node), perform the 28 * operation, and store to instr->dsts[0]. 29 * 30 * This makes arrays appear like "normal" SSA values, even if the false 31 * dependencies mean that they always stay in CSSA form (i.e. able to removed 32 * out-of-SSA with no copies.) While hopefully they shouldn't induce copies in 33 * most cases, we can't make that guarantee while also splitting spilling from 34 * RA and guaranteeing a certain number of registers are used, so we have to 35 * insert the phi nodes to be able to know when copying should happen. 36 * 37 * The implementation is based on the idea in "Simple and Efficient Construction 38 * of Static Single Assignment Form" of scanning backwards to find the 39 * definition. However, since we're not doing this on-the-fly we can simplify 40 * things a little by doing a pre-pass to get the last definition of each array 41 * in each block. Then we optimize trivial phis in a separate pass, "on the fly" 42 * so that we don't have to rewrite (and keep track of) users. 43 */ 44 45#include <stdlib.h> 46#include "ir3.h" 47 48struct array_state { 49 struct ir3_register *live_in_definition; 50 struct ir3_register *live_out_definition; 51 bool constructed; 52 bool optimized; 53}; 54 55struct array_ctx { 56 struct array_state *states; 57 struct ir3 *ir; 58 unsigned array_count; 59}; 60 61static struct array_state * 62get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id) 63{ 64 return &ctx->states[ctx->array_count * block->index + id]; 65} 66 67static struct ir3_register *read_value_beginning(struct array_ctx *ctx, 68 struct ir3_block *block, 69 struct ir3_array *arr); 70 71static struct ir3_register * 72read_value_end(struct array_ctx *ctx, struct ir3_block *block, 73 struct ir3_array *arr) 74{ 75 struct array_state *state = get_state(ctx, block, arr->id); 76 if (state->live_out_definition) 77 return state->live_out_definition; 78 79 state->live_out_definition = read_value_beginning(ctx, block, arr); 80 return state->live_out_definition; 81} 82 83/* Roughly equivalent to readValueRecursive from the paper: */ 84static struct ir3_register * 85read_value_beginning(struct array_ctx *ctx, struct ir3_block *block, 86 struct ir3_array *arr) 87{ 88 struct array_state *state = get_state(ctx, block, arr->id); 89 90 if (state->constructed) 91 return state->live_in_definition; 92 93 if (block->predecessors_count == 0) { 94 state->constructed = true; 95 return NULL; 96 } 97 98 if (block->predecessors_count == 1) { 99 state->live_in_definition = 100 read_value_end(ctx, block->predecessors[0], arr); 101 state->constructed = true; 102 return state->live_in_definition; 103 } 104 105 unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0); 106 struct ir3_instruction *phi = 107 ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count); 108 list_del(&phi->node); 109 list_add(&phi->node, &block->instr_list); 110 111 struct ir3_register *dst = __ssa_dst(phi); 112 dst->flags |= flags; 113 dst->array.id = arr->id; 114 dst->size = arr->length; 115 116 state->live_in_definition = phi->dsts[0]; 117 state->constructed = true; 118 119 for (unsigned i = 0; i < block->predecessors_count; i++) { 120 struct ir3_register *src = 121 read_value_end(ctx, block->predecessors[i], arr); 122 struct ir3_register *src_reg; 123 if (src) { 124 src_reg = __ssa_src(phi, src->instr, flags); 125 } else { 126 src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA); 127 } 128 src_reg->array.id = arr->id; 129 src_reg->size = arr->length; 130 } 131 return phi->dsts[0]; 132} 133 134static struct ir3_register * 135remove_trivial_phi(struct ir3_instruction *phi) 136{ 137 /* Break cycles */ 138 if (phi->data) 139 return phi->data; 140 141 phi->data = phi->dsts[0]; 142 143 struct ir3_register *unique_def = NULL; 144 bool unique = true; 145 for (unsigned i = 0; i < phi->block->predecessors_count; i++) { 146 struct ir3_register *src = phi->srcs[i]; 147 148 /* If there are any undef sources, then the remaining sources may not 149 * dominate the phi node, even if they are all equal. So we need to 150 * bail out in this case. 151 * 152 * This seems to be a bug in the original paper. 153 */ 154 if (!src->def) { 155 unique = false; 156 break; 157 } 158 159 struct ir3_instruction *src_instr = src->def->instr; 160 161 /* phi sources which point to the phi itself don't count for 162 * figuring out if the phi is trivial 163 */ 164 if (src_instr == phi) 165 continue; 166 167 if (src_instr->opc == OPC_META_PHI) { 168 src->def = remove_trivial_phi(src->def->instr); 169 } 170 171 if (unique_def) { 172 if (unique_def != src->def) { 173 unique = false; 174 break; 175 } 176 } else { 177 unique_def = src->def; 178 } 179 } 180 181 if (unique) { 182 phi->data = unique_def; 183 return unique_def; 184 } else { 185 return phi->dsts[0]; 186 } 187} 188 189static struct ir3_register * 190lookup_value(struct ir3_register *reg) 191{ 192 if (reg->instr->opc == OPC_META_PHI) 193 return reg->instr->data; 194 return reg; 195} 196 197static struct ir3_register * 198lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id) 199{ 200 struct array_state *state = get_state(ctx, block, id); 201 if (state->live_in_definition) 202 return lookup_value(state->live_in_definition); 203 204 return NULL; 205} 206 207bool 208ir3_array_to_ssa(struct ir3 *ir) 209{ 210 struct array_ctx ctx = {}; 211 212 foreach_array (array, &ir->array_list) { 213 ctx.array_count = MAX2(ctx.array_count, array->id + 1); 214 } 215 216 if (ctx.array_count == 0) 217 return false; 218 219 unsigned i = 0; 220 foreach_block (block, &ir->block_list) { 221 block->index = i++; 222 } 223 224 ctx.ir = ir; 225 ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state)); 226 227 foreach_block (block, &ir->block_list) { 228 foreach_instr (instr, &block->instr_list) { 229 foreach_dst (dst, instr) { 230 if (dst->flags & IR3_REG_ARRAY) { 231 struct array_state *state = 232 get_state(&ctx, block, dst->array.id); 233 state->live_out_definition = dst; 234 } 235 } 236 } 237 } 238 239 foreach_block (block, &ir->block_list) { 240 foreach_instr (instr, &block->instr_list) { 241 if (instr->opc == OPC_META_PHI) 242 continue; 243 244 foreach_dst (reg, instr) { 245 if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) { 246 struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id); 247 248 /* Construct any phi nodes necessary to read this value */ 249 read_value_beginning(&ctx, block, arr); 250 } 251 } 252 foreach_src (reg, instr) { 253 if ((reg->flags & IR3_REG_ARRAY) && !reg->def) { 254 struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id); 255 256 /* Construct any phi nodes necessary to read this value */ 257 read_value_beginning(&ctx, block, arr); 258 } 259 } 260 } 261 } 262 263 foreach_block (block, &ir->block_list) { 264 foreach_instr_safe (instr, &block->instr_list) { 265 if (instr->opc == OPC_META_PHI) 266 remove_trivial_phi(instr); 267 else 268 break; 269 } 270 } 271 272 foreach_block (block, &ir->block_list) { 273 foreach_instr_safe (instr, &block->instr_list) { 274 if (instr->opc == OPC_META_PHI) { 275 if (!(instr->flags & IR3_REG_ARRAY)) 276 continue; 277 if (instr->data != instr->dsts[0]) { 278 list_del(&instr->node); 279 continue; 280 } 281 for (unsigned i = 0; i < instr->srcs_count; i++) { 282 instr->srcs[i] = lookup_value(instr->srcs[i]); 283 } 284 } else { 285 foreach_dst (reg, instr) { 286 if ((reg->flags & IR3_REG_ARRAY)) { 287 if (!reg->tied) { 288 struct ir3_register *def = 289 lookup_live_in(&ctx, block, reg->array.id); 290 if (def) 291 ir3_reg_set_last_array(instr, reg, def); 292 } 293 reg->flags |= IR3_REG_SSA; 294 } 295 } 296 foreach_src (reg, instr) { 297 if ((reg->flags & IR3_REG_ARRAY)) { 298 /* It is assumed that before calling 299 * ir3_array_to_ssa(), reg->def was set to the 300 * previous writer of the array within the current 301 * block or NULL if none. 302 */ 303 if (!reg->def) { 304 reg->def = lookup_live_in(&ctx, block, reg->array.id); 305 } 306 reg->flags |= IR3_REG_SSA; 307 } 308 } 309 } 310 } 311 } 312 313 free(ctx.states); 314 return true; 315} 316