1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2022 Collabora Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "va_compiler.h" 25bf215546Sopenharmony_ci#include "valhall_enums.h" 26bf215546Sopenharmony_ci#include "bi_builder.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* 29bf215546Sopenharmony_ci * Merge NOPs with flow control with nearby instructions to eliminate the NOPs, 30bf215546Sopenharmony_ci * according to the following rules: 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * 1. Waits may be combined by waiting on a union of the slots. 33bf215546Sopenharmony_ci * 2. Waits may be moved up as far as the first (last) asynchronous instruction 34bf215546Sopenharmony_ci * writing to a slot waited on, at a performance cost. 35bf215546Sopenharmony_ci * 3. Discard may be moved down, at a performance cost. 36bf215546Sopenharmony_ci * 4. Reconverge must be on the last instruction of the block. 37bf215546Sopenharmony_ci * 5. End must be on the last instruction of the block. 38bf215546Sopenharmony_ci * 39bf215546Sopenharmony_ci * For simplicity, this pass only merges within a single basic block. This 40bf215546Sopenharmony_ci * should be sufficient. The algorithm works as follows. 41bf215546Sopenharmony_ci * 42bf215546Sopenharmony_ci * Because reconverge/end must be on the last instruction, there may be only 43bf215546Sopenharmony_ci * one of these in a block. First check for a NOP at the end, and if it has 44bf215546Sopenharmony_ci * reconverge/end flow control, merge with the penultimate instruction. Now we 45bf215546Sopenharmony_ci * only have to worry about waits and discard. 46bf215546Sopenharmony_ci * 47bf215546Sopenharmony_ci * The cost of moving a wait up is assumed to be greater than the cost of moving 48bf215546Sopenharmony_ci * a discard down, so we next move waits while we have more freedom. For each 49bf215546Sopenharmony_ci * wait, merge with an instruction above that either is free or has another 50bf215546Sopenharmony_ci * wait, and merge the flow control, aborting if we hit a message signaling the 51bf215546Sopenharmony_ci * relevant slot. By maintaining the candidate instruction at all steps, this is 52bf215546Sopenharmony_ci * implemented in a linear time walk. 53bf215546Sopenharmony_ci * 54bf215546Sopenharmony_ci * Finally, all that's left are discards. Move discards down to merge with a 55bf215546Sopenharmony_ci * free instruction with a similar linear time walk. 56bf215546Sopenharmony_ci * 57bf215546Sopenharmony_ci * Since discarding helper threads is an optimization (not required for 58bf215546Sopenharmony_ci * correctness), we may remove discards if we believe them harmful to the 59bf215546Sopenharmony_ci * performance of the shader. Keeping with the local structure of the pass, we 60bf215546Sopenharmony_ci * remove discards only if they are at the end of the last block and cannot be 61bf215546Sopenharmony_ci * merged with any other instruction. Such a condition means every following 62bf215546Sopenharmony_ci * instruction has incompatible flow control (wait and end). In practice, this 63bf215546Sopenharmony_ci * means BLEND and ATEST run for helper threads in certain shaders to save a 64bf215546Sopenharmony_ci * NOP, but BLEND is a no-op for helper threads anyway. 65bf215546Sopenharmony_ci * 66bf215546Sopenharmony_ci * One case we don't handle is merging "foo, bar, wait, reconverge" to 67bf215546Sopenharmony_ci * "foo.wait, bar.reconverge". This sequence is rarely generated by the dataflow 68bf215546Sopenharmony_ci * analysis, so we don't care for the complexity. 69bf215546Sopenharmony_ci */ 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci/* 72bf215546Sopenharmony_ci * If the last instruction has reconverge or end, try to merge with the 73bf215546Sopenharmony_ci * second to last instruction. 74bf215546Sopenharmony_ci * 75bf215546Sopenharmony_ci * Precondition: block has at least 2 instructions. 76bf215546Sopenharmony_ci */ 77bf215546Sopenharmony_cistatic void 78bf215546Sopenharmony_cimerge_end_reconverge(bi_block *block) 79bf215546Sopenharmony_ci{ 80bf215546Sopenharmony_ci bi_instr *last = list_last_entry(&block->instructions, bi_instr, link); 81bf215546Sopenharmony_ci bi_instr *penult = bi_prev_op(last); 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci if (last->op != BI_OPCODE_NOP) return; 84bf215546Sopenharmony_ci if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END) return; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci /* End implies all other flow control except for waiting on barriers (slot 87bf215546Sopenharmony_ci * #7, with VA_FLOW_WAIT), so remove blocking flow control. 88bf215546Sopenharmony_ci */ 89bf215546Sopenharmony_ci if (last->flow == VA_FLOW_END) { 90bf215546Sopenharmony_ci while (penult->op == BI_OPCODE_NOP && penult->flow != VA_FLOW_WAIT) { 91bf215546Sopenharmony_ci bi_remove_instruction(penult); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci /* There may be nothing left */ 94bf215546Sopenharmony_ci if (list_is_singular(&block->instructions)) 95bf215546Sopenharmony_ci return; 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci penult = bi_prev_op(last); 98bf215546Sopenharmony_ci } 99bf215546Sopenharmony_ci } 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci /* If there is blocking flow control, we can't merge */ 102bf215546Sopenharmony_ci if (penult->flow != VA_FLOW_NONE) return; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci /* Else, merge */ 105bf215546Sopenharmony_ci penult->flow = last->flow; 106bf215546Sopenharmony_ci bi_remove_instruction(last); 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci/* 110bf215546Sopenharmony_ci * Calculate the union of two waits. We may wait on any combination of slots #0, 111bf215546Sopenharmony_ci * #1, #2 or the entirety of 0126 and 01267. If we wait on the entirety, the 112bf215546Sopenharmony_ci * union is trivial. If we do not wait on slot #6 (by extension slot #7), we 113bf215546Sopenharmony_ci * wait only on slots #0, #1, and #2, for which the waits are encoded as a 114bf215546Sopenharmony_ci * bitset and the union is just a bitwise OR. 115bf215546Sopenharmony_ci */ 116bf215546Sopenharmony_cistatic enum va_flow 117bf215546Sopenharmony_ciunion_waits(enum va_flow x, enum va_flow y) 118bf215546Sopenharmony_ci{ 119bf215546Sopenharmony_ci assert(va_flow_is_wait_or_none(x) && va_flow_is_wait_or_none(y)); 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci if ((x == VA_FLOW_WAIT) || (y == VA_FLOW_WAIT)) 122bf215546Sopenharmony_ci return VA_FLOW_WAIT; 123bf215546Sopenharmony_ci else if ((x == VA_FLOW_WAIT0126) || (y == VA_FLOW_WAIT0126)) 124bf215546Sopenharmony_ci return VA_FLOW_WAIT0126; 125bf215546Sopenharmony_ci else 126bf215546Sopenharmony_ci return x | y; 127bf215546Sopenharmony_ci} 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_cistatic void 130bf215546Sopenharmony_cimerge_waits(bi_block *block) 131bf215546Sopenharmony_ci{ 132bf215546Sopenharmony_ci /* Most recent instruction with which we can merge, or NULL if none */ 133bf215546Sopenharmony_ci bi_instr *last_free = NULL; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci bi_foreach_instr_in_block_safe(block, I) { 136bf215546Sopenharmony_ci if (last_free != NULL && 137bf215546Sopenharmony_ci I->op == BI_OPCODE_NOP && va_flow_is_wait_or_none(I->flow)) { 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci /* Merge waits with compatible instructions */ 140bf215546Sopenharmony_ci last_free->flow = union_waits(last_free->flow, I->flow); 141bf215546Sopenharmony_ci bi_remove_instruction(I); 142bf215546Sopenharmony_ci continue; 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci /* Don't move waits past async instructions, since they might be what 146bf215546Sopenharmony_ci * we're waiting for. If we wanted to optimize this case, we could check 147bf215546Sopenharmony_ci * the signaled slots. 148bf215546Sopenharmony_ci */ 149bf215546Sopenharmony_ci if (bi_opcode_props[I->op].message) 150bf215546Sopenharmony_ci last_free = NULL; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci /* We can only merge with instructions whose flow control is a wait. 153bf215546Sopenharmony_ci * This includes such an instruction after merging in a wait. It also 154bf215546Sopenharmony_ci * includes async instructions. 155bf215546Sopenharmony_ci */ 156bf215546Sopenharmony_ci if (va_flow_is_wait_or_none(I->flow)) 157bf215546Sopenharmony_ci last_free = I; 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci} 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_cistatic bool 162bf215546Sopenharmony_cibi_is_first_instr(bi_block *block, bi_instr *I) 163bf215546Sopenharmony_ci{ 164bf215546Sopenharmony_ci return block->instructions.next == &I->link; 165bf215546Sopenharmony_ci} 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_cistatic void 168bf215546Sopenharmony_cimerge_discard(bi_block *block) 169bf215546Sopenharmony_ci{ 170bf215546Sopenharmony_ci bi_instr *last_free = NULL; 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci bi_foreach_instr_in_block_safe_rev(block, I) { 173bf215546Sopenharmony_ci if ((I->op == BI_OPCODE_NOP) && (I->flow == VA_FLOW_DISCARD)) { 174bf215546Sopenharmony_ci /* Try to merge with the instruction *preceding* discard, because 175bf215546Sopenharmony_ci * because flow control happens at the end of an instruction and 176bf215546Sopenharmony_ci * discard is a NOP. 177bf215546Sopenharmony_ci */ 178bf215546Sopenharmony_ci if (!bi_is_first_instr(block, I)) { 179bf215546Sopenharmony_ci bi_instr *prev = bi_prev_op(I); 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci if (prev->flow == VA_FLOW_NONE) { 182bf215546Sopenharmony_ci prev->flow = VA_FLOW_DISCARD; 183bf215546Sopenharmony_ci bi_remove_instruction(I); 184bf215546Sopenharmony_ci continue; 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci } 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci /* Or try to merge with the next instruction with no flow control */ 189bf215546Sopenharmony_ci if (last_free != NULL) { 190bf215546Sopenharmony_ci last_free->flow = VA_FLOW_DISCARD; 191bf215546Sopenharmony_ci bi_remove_instruction(I); 192bf215546Sopenharmony_ci continue; 193bf215546Sopenharmony_ci } 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci /* If there's nowhere to merge and this is the end of the shader, just 196bf215546Sopenharmony_ci * remove the discard. 197bf215546Sopenharmony_ci */ 198bf215546Sopenharmony_ci if (!block->successors[0] && !block->successors[1]) { 199bf215546Sopenharmony_ci bi_remove_instruction(I); 200bf215546Sopenharmony_ci continue; 201bf215546Sopenharmony_ci } 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci /* Allow merging into free instructions */ 205bf215546Sopenharmony_ci if (I->flow == VA_FLOW_NONE) 206bf215546Sopenharmony_ci last_free = I; 207bf215546Sopenharmony_ci } 208bf215546Sopenharmony_ci} 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_civoid 211bf215546Sopenharmony_civa_merge_flow(bi_context *ctx) 212bf215546Sopenharmony_ci{ 213bf215546Sopenharmony_ci bi_foreach_block(ctx, block) { 214bf215546Sopenharmony_ci /* If there are less than 2 instructions, there's nothing to merge */ 215bf215546Sopenharmony_ci if (list_is_empty(&block->instructions)) continue; 216bf215546Sopenharmony_ci if (list_is_singular(&block->instructions)) continue; 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci merge_end_reconverge(block); 219bf215546Sopenharmony_ci merge_waits(block); 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) 222bf215546Sopenharmony_ci merge_discard(block); 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci} 225