1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "ir3_ra.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci/* The spilling pass leaves out a few details required to successfully operate 27bf215546Sopenharmony_ci * ldp/stp: 28bf215546Sopenharmony_ci * 29bf215546Sopenharmony_ci * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores 30bf215546Sopenharmony_ci * that and just spills/restores entire values, including arrays and values 31bf215546Sopenharmony_ci * created for texture setup which can be more than 4 components. 32bf215546Sopenharmony_ci * 2. The immediate offset only has 13 bits and is signed, so if we spill a lot 33bf215546Sopenharmony_ci * or have very large arrays before spilling then we could run out. 34bf215546Sopenharmony_ci * 3. The spiller doesn't add barrier dependencies needed for post-RA 35bf215546Sopenharmony_ci * scheduling. 36bf215546Sopenharmony_ci * 37bf215546Sopenharmony_ci * The first one, in particular, is much easier to handle after RA because 38bf215546Sopenharmony_ci * arrays and normal values can be treated the same way. Therefore this pass 39bf215546Sopenharmony_ci * runs after RA, and handles all three issues. This keeps the complexity out of 40bf215546Sopenharmony_ci * the spiller. 41bf215546Sopenharmony_ci */ 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_cistatic unsigned 44bf215546Sopenharmony_cicomponent_bytes(struct ir3_register *src) 45bf215546Sopenharmony_ci{ 46bf215546Sopenharmony_ci return (src->flags & IR3_REG_HALF) ? 2 : 4; 47bf215546Sopenharmony_ci} 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci/* Note: this won't work if the base register is anything other than 0! 50bf215546Sopenharmony_ci * Dynamic bases, which we'll need for "real" function call support, will 51bf215546Sopenharmony_ci * probably be a lot harder to handle and may require reserving another 52bf215546Sopenharmony_ci * register. 53bf215546Sopenharmony_ci */ 54bf215546Sopenharmony_cistatic void 55bf215546Sopenharmony_ciset_base_reg(struct ir3_instruction *mem, unsigned val) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci struct ir3_instruction *mov = ir3_instr_create(mem->block, OPC_MOV, 1, 1); 58bf215546Sopenharmony_ci ir3_dst_create(mov, mem->srcs[0]->num, mem->srcs[0]->flags); 59bf215546Sopenharmony_ci ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = val; 60bf215546Sopenharmony_ci mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci ir3_instr_move_before(mov, mem); 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_cistatic void 66bf215546Sopenharmony_cireset_base_reg(struct ir3_instruction *mem) 67bf215546Sopenharmony_ci{ 68bf215546Sopenharmony_ci /* If the base register is killed, then we don't need to clobber it and it 69bf215546Sopenharmony_ci * may be reused as a destination so we can't always clobber it after the 70bf215546Sopenharmony_ci * instruction anyway. 71bf215546Sopenharmony_ci */ 72bf215546Sopenharmony_ci struct ir3_register *base = mem->srcs[0]; 73bf215546Sopenharmony_ci if (base->flags & IR3_REG_KILL) 74bf215546Sopenharmony_ci return; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci struct ir3_instruction *mov = ir3_instr_create(mem->block, OPC_MOV, 1, 1); 77bf215546Sopenharmony_ci ir3_dst_create(mov, base->num, base->flags); 78bf215546Sopenharmony_ci ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = 0; 79bf215546Sopenharmony_ci mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci ir3_instr_move_after(mov, mem); 82bf215546Sopenharmony_ci} 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci/* There are 13 bits, but 1 << 12 will be sign-extended into a negative offset 85bf215546Sopenharmony_ci * so it can't be used directly. Therefore only offsets under 1 << 12 can be 86bf215546Sopenharmony_ci * used without any adjustments. 87bf215546Sopenharmony_ci */ 88bf215546Sopenharmony_ci#define MAX_CAT6_SIZE (1u << 12) 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistatic void 91bf215546Sopenharmony_cihandle_oob_offset_spill(struct ir3_instruction *spill) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci unsigned components = spill->srcs[2]->uim_val; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci if (spill->cat6.dst_offset + components * component_bytes(spill->srcs[1]) < MAX_CAT6_SIZE) 96bf215546Sopenharmony_ci return; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci set_base_reg(spill, spill->cat6.dst_offset); 99bf215546Sopenharmony_ci reset_base_reg(spill); 100bf215546Sopenharmony_ci spill->cat6.dst_offset = 0; 101bf215546Sopenharmony_ci} 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_cistatic void 104bf215546Sopenharmony_cihandle_oob_offset_reload(struct ir3_instruction *reload) 105bf215546Sopenharmony_ci{ 106bf215546Sopenharmony_ci unsigned components = reload->srcs[2]->uim_val; 107bf215546Sopenharmony_ci unsigned offset = reload->srcs[1]->uim_val; 108bf215546Sopenharmony_ci if (offset + components * component_bytes(reload->dsts[0]) < MAX_CAT6_SIZE) 109bf215546Sopenharmony_ci return; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci set_base_reg(reload, offset); 112bf215546Sopenharmony_ci reset_base_reg(reload); 113bf215546Sopenharmony_ci reload->srcs[1]->uim_val = 0; 114bf215546Sopenharmony_ci} 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_cistatic void 117bf215546Sopenharmony_cisplit_spill(struct ir3_instruction *spill) 118bf215546Sopenharmony_ci{ 119bf215546Sopenharmony_ci unsigned orig_components = spill->srcs[2]->uim_val; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci /* We don't handle splitting dependencies. */ 122bf215546Sopenharmony_ci assert(spill->deps_count == 0); 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci if (orig_components <= 4) { 125bf215546Sopenharmony_ci if (spill->srcs[1]->flags & IR3_REG_ARRAY) { 126bf215546Sopenharmony_ci spill->srcs[1]->wrmask = MASK(orig_components); 127bf215546Sopenharmony_ci spill->srcs[1]->num = spill->srcs[1]->array.base; 128bf215546Sopenharmony_ci spill->srcs[1]->flags &= ~IR3_REG_ARRAY; 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci return; 131bf215546Sopenharmony_ci } 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci for (unsigned comp = 0; comp < orig_components; comp += 4) { 134bf215546Sopenharmony_ci unsigned components = MIN2(orig_components - comp, 4); 135bf215546Sopenharmony_ci struct ir3_instruction *clone = ir3_instr_clone(spill); 136bf215546Sopenharmony_ci ir3_instr_move_before(clone, spill); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci clone->srcs[1]->wrmask = MASK(components); 139bf215546Sopenharmony_ci if (clone->srcs[1]->flags & IR3_REG_ARRAY) { 140bf215546Sopenharmony_ci clone->srcs[1]->num = clone->srcs[1]->array.base + comp; 141bf215546Sopenharmony_ci clone->srcs[1]->flags &= ~IR3_REG_ARRAY; 142bf215546Sopenharmony_ci } 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci clone->srcs[2]->uim_val = components; 145bf215546Sopenharmony_ci clone->cat6.dst_offset += comp * component_bytes(spill->srcs[1]); 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci list_delinit(&spill->node); 149bf215546Sopenharmony_ci} 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_cistatic void 152bf215546Sopenharmony_cisplit_reload(struct ir3_instruction *reload) 153bf215546Sopenharmony_ci{ 154bf215546Sopenharmony_ci unsigned orig_components = reload->srcs[2]->uim_val; 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci assert(reload->deps_count == 0); 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci if (orig_components <= 4) { 159bf215546Sopenharmony_ci if (reload->dsts[0]->flags & IR3_REG_ARRAY) { 160bf215546Sopenharmony_ci reload->dsts[0]->wrmask = MASK(orig_components); 161bf215546Sopenharmony_ci reload->dsts[0]->num = reload->dsts[0]->array.base; 162bf215546Sopenharmony_ci reload->dsts[0]->flags &= ~IR3_REG_ARRAY; 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci return; 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci for (unsigned comp = 0; comp < orig_components; comp += 4) { 168bf215546Sopenharmony_ci unsigned components = MIN2(orig_components - comp, 4); 169bf215546Sopenharmony_ci struct ir3_instruction *clone = ir3_instr_clone(reload); 170bf215546Sopenharmony_ci ir3_instr_move_before(clone, reload); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci clone->dsts[0]->wrmask = MASK(components); 173bf215546Sopenharmony_ci if (clone->dsts[0]->flags & IR3_REG_ARRAY) { 174bf215546Sopenharmony_ci clone->dsts[0]->num = clone->dsts[0]->array.base + comp; 175bf215546Sopenharmony_ci clone->dsts[0]->flags &= ~IR3_REG_ARRAY; 176bf215546Sopenharmony_ci } 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci clone->srcs[2]->uim_val = components; 179bf215546Sopenharmony_ci clone->srcs[1]->uim_val += comp * component_bytes(reload->dsts[0]); 180bf215546Sopenharmony_ci } 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci list_delinit(&reload->node); 183bf215546Sopenharmony_ci} 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_cistatic void 186bf215546Sopenharmony_ciadd_spill_reload_deps(struct ir3_block *block) 187bf215546Sopenharmony_ci{ 188bf215546Sopenharmony_ci struct ir3_instruction *last_spill = NULL; 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 191bf215546Sopenharmony_ci if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) && 192bf215546Sopenharmony_ci last_spill) { 193bf215546Sopenharmony_ci ir3_instr_add_dep(instr, last_spill); 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci if (instr->opc == OPC_SPILL_MACRO) 197bf215546Sopenharmony_ci last_spill = instr; 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci last_spill = NULL; 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci foreach_instr_rev (instr, &block->instr_list) { 204bf215546Sopenharmony_ci if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) && 205bf215546Sopenharmony_ci last_spill) { 206bf215546Sopenharmony_ci ir3_instr_add_dep(last_spill, instr); 207bf215546Sopenharmony_ci } 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci if (instr->opc == OPC_SPILL_MACRO) 210bf215546Sopenharmony_ci last_spill = instr; 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci} 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_cibool 215bf215546Sopenharmony_ciir3_lower_spill(struct ir3 *ir) 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 218bf215546Sopenharmony_ci foreach_instr_safe (instr, &block->instr_list) { 219bf215546Sopenharmony_ci if (instr->opc == OPC_SPILL_MACRO) { 220bf215546Sopenharmony_ci handle_oob_offset_spill(instr); 221bf215546Sopenharmony_ci split_spill(instr); 222bf215546Sopenharmony_ci } else if (instr->opc == OPC_RELOAD_MACRO) { 223bf215546Sopenharmony_ci handle_oob_offset_reload(instr); 224bf215546Sopenharmony_ci split_reload(instr); 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci add_spill_reload_deps(block); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 231bf215546Sopenharmony_ci if (instr->opc == OPC_SPILL_MACRO) 232bf215546Sopenharmony_ci instr->opc = OPC_STP; 233bf215546Sopenharmony_ci else if (instr->opc == OPC_RELOAD_MACRO) 234bf215546Sopenharmony_ci instr->opc = OPC_LDP; 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci return true; 239bf215546Sopenharmony_ci} 240