1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2022 Collabora Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "va_compiler.h" 25bf215546Sopenharmony_ci#include "valhall_enums.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/* 28bf215546Sopenharmony_ci * Valhall sources may marked as the last use of a register, according 29bf215546Sopenharmony_ci * to the following rules: 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * 1. The last use of a register should be marked allowing the hardware 32bf215546Sopenharmony_ci * to elide register writes. 33bf215546Sopenharmony_ci * 2. Staging sources may be read at any time before the asynchronous 34bf215546Sopenharmony_ci * instruction completes. If a register is used as both a staging source and 35bf215546Sopenharmony_ci * a regular source, the regular source cannot be marked until the program 36bf215546Sopenharmony_ci * waits for the asynchronous instruction. 37bf215546Sopenharmony_ci * 3. Marking a register pair marks both registers in the pair. 38bf215546Sopenharmony_ci * 39bf215546Sopenharmony_ci * Last use information follows immediately from (post-RA) liveness analysis: 40bf215546Sopenharmony_ci * a register is dead immediately after its last use. 41bf215546Sopenharmony_ci * 42bf215546Sopenharmony_ci * Staging information follows from scoreboard analysis: do not mark registers 43bf215546Sopenharmony_ci * that are read by a pending asynchronous instruction. Note that the Valhall 44bf215546Sopenharmony_ci * scoreboard analysis does not track reads, so we handle that with our own 45bf215546Sopenharmony_ci * (simplified) scoreboard analysis. 46bf215546Sopenharmony_ci * 47bf215546Sopenharmony_ci * Register pairs are marked conservatively: if either register in a pair cannot 48bf215546Sopenharmony_ci * be marked, do not mark either register. 49bf215546Sopenharmony_ci */ 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_cistatic uint64_t 52bf215546Sopenharmony_cibi_staging_read_mask(const bi_instr *I) 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci uint64_t mask = 0; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci bi_foreach_src(I, s) { 57bf215546Sopenharmony_ci if (bi_is_staging_src(I, s) && !bi_is_null(I->src[s])) { 58bf215546Sopenharmony_ci assert(I->src[s].type == BI_INDEX_REGISTER); 59bf215546Sopenharmony_ci unsigned reg = I->src[s].value; 60bf215546Sopenharmony_ci unsigned count = bi_count_read_registers(I, s); 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci mask |= (BITFIELD64_MASK(count) << reg); 63bf215546Sopenharmony_ci } 64bf215546Sopenharmony_ci } 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci return mask; 67bf215546Sopenharmony_ci} 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_cistatic bool 70bf215546Sopenharmony_cibi_writes_reg(const bi_instr *I, unsigned reg) 71bf215546Sopenharmony_ci{ 72bf215546Sopenharmony_ci bi_foreach_dest(I, d) { 73bf215546Sopenharmony_ci if (bi_is_null(I->dest[d])) 74bf215546Sopenharmony_ci continue; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci assert(I->dest[d].type == BI_INDEX_REGISTER); 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci unsigned count = bi_count_write_registers(I, d); 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci if (reg >= I->dest[d].value && (reg - I->dest[d].value) < count) 81bf215546Sopenharmony_ci return true; 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci return false; 85bf215546Sopenharmony_ci} 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_cistatic unsigned 88bf215546Sopenharmony_ciwaits_on_slot(enum va_flow flow, unsigned slot) 89bf215546Sopenharmony_ci{ 90bf215546Sopenharmony_ci return (flow == VA_FLOW_WAIT) || (flow == VA_FLOW_WAIT0126) || 91bf215546Sopenharmony_ci (va_flow_is_wait_or_none(flow) && (flow & BITFIELD_BIT(slot))); 92bf215546Sopenharmony_ci} 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistatic void 95bf215546Sopenharmony_ciscoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci /* Mark read staging registers */ 98bf215546Sopenharmony_ci st->read[I->slot] |= bi_staging_read_mask(I); 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci /* Unmark registers after they are waited on */ 101bf215546Sopenharmony_ci for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) { 102bf215546Sopenharmony_ci if (waits_on_slot(I->flow, i)) 103bf215546Sopenharmony_ci st->read[i] = 0; 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistatic void 108bf215546Sopenharmony_civa_analyze_scoreboard_reads(bi_context *ctx) 109bf215546Sopenharmony_ci{ 110bf215546Sopenharmony_ci u_worklist worklist; 111bf215546Sopenharmony_ci bi_worklist_init(ctx, &worklist); 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci bi_foreach_block(ctx, block) { 114bf215546Sopenharmony_ci bi_worklist_push_tail(&worklist, block); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci /* Reset analysis from previous pass */ 117bf215546Sopenharmony_ci block->scoreboard_in = (struct bi_scoreboard_state){ 0 }; 118bf215546Sopenharmony_ci block->scoreboard_out = (struct bi_scoreboard_state){ 0 }; 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci /* Perform forward data flow analysis to calculate dependencies */ 122bf215546Sopenharmony_ci while (!u_worklist_is_empty(&worklist)) { 123bf215546Sopenharmony_ci /* Pop from the front for forward analysis */ 124bf215546Sopenharmony_ci bi_block *blk = bi_worklist_pop_head(&worklist); 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci bi_foreach_predecessor(blk, pred) { 127bf215546Sopenharmony_ci for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) 128bf215546Sopenharmony_ci blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i]; 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci struct bi_scoreboard_state state = blk->scoreboard_in; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci bi_foreach_instr_in_block(blk, I) 134bf215546Sopenharmony_ci scoreboard_update(&state, I); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci /* If there was progress, reprocess successors */ 137bf215546Sopenharmony_ci if (memcmp(&state, &blk->scoreboard_out, sizeof(state)) != 0) { 138bf215546Sopenharmony_ci bi_foreach_successor(blk, succ) 139bf215546Sopenharmony_ci bi_worklist_push_tail(&worklist, succ); 140bf215546Sopenharmony_ci } 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci blk->scoreboard_out = state; 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci u_worklist_fini(&worklist); 146bf215546Sopenharmony_ci} 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_civoid 149bf215546Sopenharmony_civa_mark_last(bi_context *ctx) 150bf215546Sopenharmony_ci{ 151bf215546Sopenharmony_ci /* Analyze the shader globally */ 152bf215546Sopenharmony_ci bi_postra_liveness(ctx); 153bf215546Sopenharmony_ci va_analyze_scoreboard_reads(ctx); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci bi_foreach_block(ctx, block) { 156bf215546Sopenharmony_ci uint64_t live = block->reg_live_out; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci /* Mark all last uses */ 159bf215546Sopenharmony_ci bi_foreach_instr_in_block_rev(block, I) { 160bf215546Sopenharmony_ci bi_foreach_src(I, s) { 161bf215546Sopenharmony_ci if (I->src[s].type != BI_INDEX_REGISTER) 162bf215546Sopenharmony_ci continue; 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci unsigned nr = bi_count_read_registers(I, s); 165bf215546Sopenharmony_ci uint64_t mask = BITFIELD64_MASK(nr) << I->src[s].value; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* If the register dead after this instruction, it's the last use */ 168bf215546Sopenharmony_ci I->src[s].discard = (live & mask) == 0; 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci /* If the register is overwritten this cycle, it is implicitly 171bf215546Sopenharmony_ci * discarded, but that won't show up in the liveness analysis. 172bf215546Sopenharmony_ci */ 173bf215546Sopenharmony_ci I->src[s].discard |= bi_writes_reg(I, I->src[s].value); 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci live = bi_postra_liveness_ins(live, I); 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci struct bi_scoreboard_state st = block->scoreboard_in; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci bi_foreach_instr_in_block(block, I) { 182bf215546Sopenharmony_ci /* Unmark registers read by a pending async instruction */ 183bf215546Sopenharmony_ci bi_foreach_src(I, s) { 184bf215546Sopenharmony_ci if (!I->src[s].discard) 185bf215546Sopenharmony_ci continue; 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci assert(I->src[s].type == BI_INDEX_REGISTER); 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci uint64_t pending_regs = st.read[0] | st.read[1] | st.read[2]; 190bf215546Sopenharmony_ci bool pending = (pending_regs & BITFIELD64_BIT(I->src[s].value)); 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci if (bi_is_staging_src(I, s) || pending) 193bf215546Sopenharmony_ci I->src[s].discard = false; 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* Unmark register pairs where one half must be preserved */ 197bf215546Sopenharmony_ci bi_foreach_src(I, s) { 198bf215546Sopenharmony_ci /* Only look for "real" architectural registers */ 199bf215546Sopenharmony_ci if (s >= 3) 200bf215546Sopenharmony_ci break; 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci if (va_src_info(I->op, s).size == VA_SIZE_64) { 203bf215546Sopenharmony_ci bool both_discard = I->src[s].discard && I->src[s + 1].discard; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci I->src[s + 0].discard = both_discard; 206bf215546Sopenharmony_ci I->src[s + 1].discard = both_discard; 207bf215546Sopenharmony_ci } 208bf215546Sopenharmony_ci } 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci scoreboard_update(&st, I); 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci} 214