1/* 2 * Copyright (C) 2022 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "va_compiler.h" 25#include "valhall_enums.h" 26 27/* 28 * Valhall sources may marked as the last use of a register, according 29 * to the following rules: 30 * 31 * 1. The last use of a register should be marked allowing the hardware 32 * to elide register writes. 33 * 2. Staging sources may be read at any time before the asynchronous 34 * instruction completes. If a register is used as both a staging source and 35 * a regular source, the regular source cannot be marked until the program 36 * waits for the asynchronous instruction. 37 * 3. Marking a register pair marks both registers in the pair. 38 * 39 * Last use information follows immediately from (post-RA) liveness analysis: 40 * a register is dead immediately after its last use. 41 * 42 * Staging information follows from scoreboard analysis: do not mark registers 43 * that are read by a pending asynchronous instruction. Note that the Valhall 44 * scoreboard analysis does not track reads, so we handle that with our own 45 * (simplified) scoreboard analysis. 46 * 47 * Register pairs are marked conservatively: if either register in a pair cannot 48 * be marked, do not mark either register. 49 */ 50 51static uint64_t 52bi_staging_read_mask(const bi_instr *I) 53{ 54 uint64_t mask = 0; 55 56 bi_foreach_src(I, s) { 57 if (bi_is_staging_src(I, s) && !bi_is_null(I->src[s])) { 58 assert(I->src[s].type == BI_INDEX_REGISTER); 59 unsigned reg = I->src[s].value; 60 unsigned count = bi_count_read_registers(I, s); 61 62 mask |= (BITFIELD64_MASK(count) << reg); 63 } 64 } 65 66 return mask; 67} 68 69static bool 70bi_writes_reg(const bi_instr *I, unsigned reg) 71{ 72 bi_foreach_dest(I, d) { 73 if (bi_is_null(I->dest[d])) 74 continue; 75 76 assert(I->dest[d].type == BI_INDEX_REGISTER); 77 78 unsigned count = bi_count_write_registers(I, d); 79 80 if (reg >= I->dest[d].value && (reg - I->dest[d].value) < count) 81 return true; 82 } 83 84 return false; 85} 86 87static unsigned 88waits_on_slot(enum va_flow flow, unsigned slot) 89{ 90 return (flow == VA_FLOW_WAIT) || (flow == VA_FLOW_WAIT0126) || 91 (va_flow_is_wait_or_none(flow) && (flow & BITFIELD_BIT(slot))); 92} 93 94static void 95scoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I) 96{ 97 /* Mark read staging registers */ 98 st->read[I->slot] |= bi_staging_read_mask(I); 99 100 /* Unmark registers after they are waited on */ 101 for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) { 102 if (waits_on_slot(I->flow, i)) 103 st->read[i] = 0; 104 } 105} 106 107static void 108va_analyze_scoreboard_reads(bi_context *ctx) 109{ 110 u_worklist worklist; 111 bi_worklist_init(ctx, &worklist); 112 113 bi_foreach_block(ctx, block) { 114 bi_worklist_push_tail(&worklist, block); 115 116 /* Reset analysis from previous pass */ 117 block->scoreboard_in = (struct bi_scoreboard_state){ 0 }; 118 block->scoreboard_out = (struct bi_scoreboard_state){ 0 }; 119 } 120 121 /* Perform forward data flow analysis to calculate dependencies */ 122 while (!u_worklist_is_empty(&worklist)) { 123 /* Pop from the front for forward analysis */ 124 bi_block *blk = bi_worklist_pop_head(&worklist); 125 126 bi_foreach_predecessor(blk, pred) { 127 for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) 128 blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i]; 129 } 130 131 struct bi_scoreboard_state state = blk->scoreboard_in; 132 133 bi_foreach_instr_in_block(blk, I) 134 scoreboard_update(&state, I); 135 136 /* If there was progress, reprocess successors */ 137 if (memcmp(&state, &blk->scoreboard_out, sizeof(state)) != 0) { 138 bi_foreach_successor(blk, succ) 139 bi_worklist_push_tail(&worklist, succ); 140 } 141 142 blk->scoreboard_out = state; 143 } 144 145 u_worklist_fini(&worklist); 146} 147 148void 149va_mark_last(bi_context *ctx) 150{ 151 /* Analyze the shader globally */ 152 bi_postra_liveness(ctx); 153 va_analyze_scoreboard_reads(ctx); 154 155 bi_foreach_block(ctx, block) { 156 uint64_t live = block->reg_live_out; 157 158 /* Mark all last uses */ 159 bi_foreach_instr_in_block_rev(block, I) { 160 bi_foreach_src(I, s) { 161 if (I->src[s].type != BI_INDEX_REGISTER) 162 continue; 163 164 unsigned nr = bi_count_read_registers(I, s); 165 uint64_t mask = BITFIELD64_MASK(nr) << I->src[s].value; 166 167 /* If the register dead after this instruction, it's the last use */ 168 I->src[s].discard = (live & mask) == 0; 169 170 /* If the register is overwritten this cycle, it is implicitly 171 * discarded, but that won't show up in the liveness analysis. 172 */ 173 I->src[s].discard |= bi_writes_reg(I, I->src[s].value); 174 } 175 176 live = bi_postra_liveness_ins(live, I); 177 } 178 179 struct bi_scoreboard_state st = block->scoreboard_in; 180 181 bi_foreach_instr_in_block(block, I) { 182 /* Unmark registers read by a pending async instruction */ 183 bi_foreach_src(I, s) { 184 if (!I->src[s].discard) 185 continue; 186 187 assert(I->src[s].type == BI_INDEX_REGISTER); 188 189 uint64_t pending_regs = st.read[0] | st.read[1] | st.read[2]; 190 bool pending = (pending_regs & BITFIELD64_BIT(I->src[s].value)); 191 192 if (bi_is_staging_src(I, s) || pending) 193 I->src[s].discard = false; 194 } 195 196 /* Unmark register pairs where one half must be preserved */ 197 bi_foreach_src(I, s) { 198 /* Only look for "real" architectural registers */ 199 if (s >= 3) 200 break; 201 202 if (va_src_info(I->op, s).size == VA_SIZE_64) { 203 bool both_discard = I->src[s].discard && I->src[s + 1].discard; 204 205 I->src[s + 0].discard = both_discard; 206 I->src[s + 1].discard = both_discard; 207 } 208 } 209 210 scoreboard_update(&st, I); 211 } 212 } 213} 214