1/* 2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Jonathan Marek <jonathan@marek.ca> 25 */ 26 27#include "ir2_private.h" 28 29/* if an instruction has side effects, we should never kill it */ 30static bool 31has_side_effects(struct ir2_instr *instr) 32{ 33 if (instr->type == IR2_CF) 34 return true; 35 else if (instr->type == IR2_FETCH) 36 return false; 37 38 switch (instr->alu.scalar_opc) { 39 case PRED_SETEs ... KILLONEs: 40 return true; 41 default: 42 break; 43 } 44 45 switch (instr->alu.vector_opc) { 46 case PRED_SETE_PUSHv ... KILLNEv: 47 return true; 48 default: 49 break; 50 } 51 52 return instr->alu.export >= 0; 53} 54 55/* mark an instruction as required, and all its sources recursively */ 56static void 57set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr) 58{ 59 struct ir2_reg *reg; 60 61 /* don't repeat work already done */ 62 if (instr->need_emit) 63 return; 64 65 instr->need_emit = true; 66 67 ir2_foreach_src (src, instr) { 68 switch (src->type) { 69 case IR2_SRC_SSA: 70 set_need_emit(ctx, &ctx->instr[src->num]); 71 break; 72 case IR2_SRC_REG: 73 /* slow .. */ 74 reg = get_reg_src(ctx, src); 75 ir2_foreach_instr (instr, ctx) { 76 if (!instr->is_ssa && instr->reg == reg) 77 set_need_emit(ctx, instr); 78 } 79 break; 80 default: 81 break; 82 } 83 } 84} 85 86/* get current bit mask of allocated components for a register */ 87static unsigned 88reg_mask(struct ir2_context *ctx, unsigned idx) 89{ 90 return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf; 91} 92 93static void 94reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c) 95{ 96 idx = idx * 4 + c; 97 ctx->reg_state[idx / 32] |= 1 << idx % 32; 98} 99 100static void 101reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c) 102{ 103 idx = idx * 4 + c; 104 ctx->reg_state[idx / 32] &= ~(1 << idx % 32); 105} 106 107void 108ra_count_refs(struct ir2_context *ctx) 109{ 110 struct ir2_reg *reg; 111 112 /* mark instructions as needed 113 * need to do this because "substitutions" pass makes many movs not needed 114 */ 115 ir2_foreach_instr (instr, ctx) { 116 if (has_side_effects(instr)) 117 set_need_emit(ctx, instr); 118 } 119 120 /* compute ref_counts */ 121 ir2_foreach_instr (instr, ctx) { 122 /* kill non-needed so they can be skipped */ 123 if (!instr->need_emit) { 124 instr->type = IR2_NONE; 125 continue; 126 } 127 128 ir2_foreach_src (src, instr) { 129 if (src->type == IR2_SRC_CONST) 130 continue; 131 132 reg = get_reg_src(ctx, src); 133 for (int i = 0; i < src_ncomp(instr); i++) 134 reg->comp[swiz_get(src->swizzle, i)].ref_count++; 135 } 136 } 137} 138 139void 140ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export, 141 uint8_t export_writemask) 142{ 143 /* for export, don't allocate anything but set component layout */ 144 if (export) { 145 for (int i = 0; i < 4; i++) 146 reg->comp[i].c = i; 147 return; 148 } 149 150 unsigned idx = force_idx; 151 152 /* TODO: allocate into the same register if theres room 153 * note: the blob doesn't do it, so verify that it is indeed better 154 * also, doing it would conflict with scalar mov insertion 155 */ 156 157 /* check if already allocated */ 158 for (int i = 0; i < reg->ncomp; i++) { 159 if (reg->comp[i].alloc) 160 return; 161 } 162 163 if (force_idx < 0) { 164 for (idx = 0; idx < 64; idx++) { 165 if (reg_mask(ctx, idx) == 0) 166 break; 167 } 168 } 169 assert(idx != 64); /* TODO ran out of register space.. */ 170 171 /* update max_reg value */ 172 ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx); 173 174 unsigned mask = reg_mask(ctx, idx); 175 176 for (int i = 0; i < reg->ncomp; i++) { 177 /* don't allocate never used values */ 178 if (reg->comp[i].ref_count == 0) { 179 reg->comp[i].c = 7; 180 continue; 181 } 182 183 /* TODO */ 184 unsigned c = 1 ? i : (ffs(~mask) - 1); 185 mask |= 1 << c; 186 reg->comp[i].c = c; 187 reg_setmask(ctx, idx, c); 188 reg->comp[i].alloc = true; 189 } 190 191 reg->idx = idx; 192 ctx->live_regs[reg->idx] = reg; 193} 194 195/* reduce srcs ref_count and free if needed */ 196void 197ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr) 198{ 199 struct ir2_reg *reg; 200 struct ir2_reg_component *comp; 201 202 ir2_foreach_src (src, instr) { 203 if (src->type == IR2_SRC_CONST) 204 continue; 205 206 reg = get_reg_src(ctx, src); 207 /* XXX use before write case */ 208 209 for (int i = 0; i < src_ncomp(instr); i++) { 210 comp = ®->comp[swiz_get(src->swizzle, i)]; 211 if (!--comp->ref_count && reg->block_idx_free < 0) { 212 reg_freemask(ctx, reg->idx, comp->c); 213 comp->alloc = false; 214 } 215 } 216 } 217} 218 219/* free any regs left for a block */ 220void 221ra_block_free(struct ir2_context *ctx, unsigned block) 222{ 223 ir2_foreach_live_reg (reg, ctx) { 224 if (reg->block_idx_free != block) 225 continue; 226 227 for (int i = 0; i < reg->ncomp; i++) { 228 if (!reg->comp[i].alloc) /* XXX should never be true? */ 229 continue; 230 231 reg_freemask(ctx, reg->idx, reg->comp[i].c); 232 reg->comp[i].alloc = false; 233 } 234 ctx->live_regs[reg->idx] = NULL; 235 } 236} 237