1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Jonathan Marek <jonathan@marek.ca> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "ir2_private.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci/* if an instruction has side effects, we should never kill it */ 30bf215546Sopenharmony_cistatic bool 31bf215546Sopenharmony_cihas_side_effects(struct ir2_instr *instr) 32bf215546Sopenharmony_ci{ 33bf215546Sopenharmony_ci if (instr->type == IR2_CF) 34bf215546Sopenharmony_ci return true; 35bf215546Sopenharmony_ci else if (instr->type == IR2_FETCH) 36bf215546Sopenharmony_ci return false; 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci switch (instr->alu.scalar_opc) { 39bf215546Sopenharmony_ci case PRED_SETEs ... KILLONEs: 40bf215546Sopenharmony_ci return true; 41bf215546Sopenharmony_ci default: 42bf215546Sopenharmony_ci break; 43bf215546Sopenharmony_ci } 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci switch (instr->alu.vector_opc) { 46bf215546Sopenharmony_ci case PRED_SETE_PUSHv ... KILLNEv: 47bf215546Sopenharmony_ci return true; 48bf215546Sopenharmony_ci default: 49bf215546Sopenharmony_ci break; 50bf215546Sopenharmony_ci } 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci return instr->alu.export >= 0; 53bf215546Sopenharmony_ci} 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci/* mark an instruction as required, and all its sources recursively */ 56bf215546Sopenharmony_cistatic void 57bf215546Sopenharmony_ciset_need_emit(struct ir2_context *ctx, struct ir2_instr *instr) 58bf215546Sopenharmony_ci{ 59bf215546Sopenharmony_ci struct ir2_reg *reg; 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci /* don't repeat work already done */ 62bf215546Sopenharmony_ci if (instr->need_emit) 63bf215546Sopenharmony_ci return; 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci instr->need_emit = true; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci ir2_foreach_src (src, instr) { 68bf215546Sopenharmony_ci switch (src->type) { 69bf215546Sopenharmony_ci case IR2_SRC_SSA: 70bf215546Sopenharmony_ci set_need_emit(ctx, &ctx->instr[src->num]); 71bf215546Sopenharmony_ci break; 72bf215546Sopenharmony_ci case IR2_SRC_REG: 73bf215546Sopenharmony_ci /* slow .. */ 74bf215546Sopenharmony_ci reg = get_reg_src(ctx, src); 75bf215546Sopenharmony_ci ir2_foreach_instr (instr, ctx) { 76bf215546Sopenharmony_ci if (!instr->is_ssa && instr->reg == reg) 77bf215546Sopenharmony_ci set_need_emit(ctx, instr); 78bf215546Sopenharmony_ci } 79bf215546Sopenharmony_ci break; 80bf215546Sopenharmony_ci default: 81bf215546Sopenharmony_ci break; 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci } 84bf215546Sopenharmony_ci} 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci/* get current bit mask of allocated components for a register */ 87bf215546Sopenharmony_cistatic unsigned 88bf215546Sopenharmony_cireg_mask(struct ir2_context *ctx, unsigned idx) 89bf215546Sopenharmony_ci{ 90bf215546Sopenharmony_ci return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf; 91bf215546Sopenharmony_ci} 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_cistatic void 94bf215546Sopenharmony_cireg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci idx = idx * 4 + c; 97bf215546Sopenharmony_ci ctx->reg_state[idx / 32] |= 1 << idx % 32; 98bf215546Sopenharmony_ci} 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_cistatic void 101bf215546Sopenharmony_cireg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c) 102bf215546Sopenharmony_ci{ 103bf215546Sopenharmony_ci idx = idx * 4 + c; 104bf215546Sopenharmony_ci ctx->reg_state[idx / 32] &= ~(1 << idx % 32); 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_civoid 108bf215546Sopenharmony_cira_count_refs(struct ir2_context *ctx) 109bf215546Sopenharmony_ci{ 110bf215546Sopenharmony_ci struct ir2_reg *reg; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci /* mark instructions as needed 113bf215546Sopenharmony_ci * need to do this because "substitutions" pass makes many movs not needed 114bf215546Sopenharmony_ci */ 115bf215546Sopenharmony_ci ir2_foreach_instr (instr, ctx) { 116bf215546Sopenharmony_ci if (has_side_effects(instr)) 117bf215546Sopenharmony_ci set_need_emit(ctx, instr); 118bf215546Sopenharmony_ci } 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci /* compute ref_counts */ 121bf215546Sopenharmony_ci ir2_foreach_instr (instr, ctx) { 122bf215546Sopenharmony_ci /* kill non-needed so they can be skipped */ 123bf215546Sopenharmony_ci if (!instr->need_emit) { 124bf215546Sopenharmony_ci instr->type = IR2_NONE; 125bf215546Sopenharmony_ci continue; 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci ir2_foreach_src (src, instr) { 129bf215546Sopenharmony_ci if (src->type == IR2_SRC_CONST) 130bf215546Sopenharmony_ci continue; 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci reg = get_reg_src(ctx, src); 133bf215546Sopenharmony_ci for (int i = 0; i < src_ncomp(instr); i++) 134bf215546Sopenharmony_ci reg->comp[swiz_get(src->swizzle, i)].ref_count++; 135bf215546Sopenharmony_ci } 136bf215546Sopenharmony_ci } 137bf215546Sopenharmony_ci} 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_civoid 140bf215546Sopenharmony_cira_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export, 141bf215546Sopenharmony_ci uint8_t export_writemask) 142bf215546Sopenharmony_ci{ 143bf215546Sopenharmony_ci /* for export, don't allocate anything but set component layout */ 144bf215546Sopenharmony_ci if (export) { 145bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) 146bf215546Sopenharmony_ci reg->comp[i].c = i; 147bf215546Sopenharmony_ci return; 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci unsigned idx = force_idx; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci /* TODO: allocate into the same register if theres room 153bf215546Sopenharmony_ci * note: the blob doesn't do it, so verify that it is indeed better 154bf215546Sopenharmony_ci * also, doing it would conflict with scalar mov insertion 155bf215546Sopenharmony_ci */ 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci /* check if already allocated */ 158bf215546Sopenharmony_ci for (int i = 0; i < reg->ncomp; i++) { 159bf215546Sopenharmony_ci if (reg->comp[i].alloc) 160bf215546Sopenharmony_ci return; 161bf215546Sopenharmony_ci } 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci if (force_idx < 0) { 164bf215546Sopenharmony_ci for (idx = 0; idx < 64; idx++) { 165bf215546Sopenharmony_ci if (reg_mask(ctx, idx) == 0) 166bf215546Sopenharmony_ci break; 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci } 169bf215546Sopenharmony_ci assert(idx != 64); /* TODO ran out of register space.. */ 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci /* update max_reg value */ 172bf215546Sopenharmony_ci ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci unsigned mask = reg_mask(ctx, idx); 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci for (int i = 0; i < reg->ncomp; i++) { 177bf215546Sopenharmony_ci /* don't allocate never used values */ 178bf215546Sopenharmony_ci if (reg->comp[i].ref_count == 0) { 179bf215546Sopenharmony_ci reg->comp[i].c = 7; 180bf215546Sopenharmony_ci continue; 181bf215546Sopenharmony_ci } 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci /* TODO */ 184bf215546Sopenharmony_ci unsigned c = 1 ? i : (ffs(~mask) - 1); 185bf215546Sopenharmony_ci mask |= 1 << c; 186bf215546Sopenharmony_ci reg->comp[i].c = c; 187bf215546Sopenharmony_ci reg_setmask(ctx, idx, c); 188bf215546Sopenharmony_ci reg->comp[i].alloc = true; 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci reg->idx = idx; 192bf215546Sopenharmony_ci ctx->live_regs[reg->idx] = reg; 193bf215546Sopenharmony_ci} 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci/* reduce srcs ref_count and free if needed */ 196bf215546Sopenharmony_civoid 197bf215546Sopenharmony_cira_src_free(struct ir2_context *ctx, struct ir2_instr *instr) 198bf215546Sopenharmony_ci{ 199bf215546Sopenharmony_ci struct ir2_reg *reg; 200bf215546Sopenharmony_ci struct ir2_reg_component *comp; 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci ir2_foreach_src (src, instr) { 203bf215546Sopenharmony_ci if (src->type == IR2_SRC_CONST) 204bf215546Sopenharmony_ci continue; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci reg = get_reg_src(ctx, src); 207bf215546Sopenharmony_ci /* XXX use before write case */ 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci for (int i = 0; i < src_ncomp(instr); i++) { 210bf215546Sopenharmony_ci comp = ®->comp[swiz_get(src->swizzle, i)]; 211bf215546Sopenharmony_ci if (!--comp->ref_count && reg->block_idx_free < 0) { 212bf215546Sopenharmony_ci reg_freemask(ctx, reg->idx, comp->c); 213bf215546Sopenharmony_ci comp->alloc = false; 214bf215546Sopenharmony_ci } 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci} 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci/* free any regs left for a block */ 220bf215546Sopenharmony_civoid 221bf215546Sopenharmony_cira_block_free(struct ir2_context *ctx, unsigned block) 222bf215546Sopenharmony_ci{ 223bf215546Sopenharmony_ci ir2_foreach_live_reg (reg, ctx) { 224bf215546Sopenharmony_ci if (reg->block_idx_free != block) 225bf215546Sopenharmony_ci continue; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci for (int i = 0; i < reg->ncomp; i++) { 228bf215546Sopenharmony_ci if (!reg->comp[i].alloc) /* XXX should never be true? */ 229bf215546Sopenharmony_ci continue; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci reg_freemask(ctx, reg->idx, reg->comp[i].c); 232bf215546Sopenharmony_ci reg->comp[i].alloc = false; 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci ctx->live_regs[reg->idx] = NULL; 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci} 237