1/* 2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "agx_compiler.h" 25#include "agx_minifloat.h" 26 27/* AGX peephole optimizer responsible for instruction combining. It operates in 28 * a forward direction and a backward direction, in each case traversing in 29 * source order. SSA means the forward pass satisfies the invariant: 30 * 31 * Every def is visited before any of its uses. 32 * 33 * Dually, the backend pass satisfies the invariant: 34 * 35 * Every use of a def is visited before the def. 36 * 37 * This means the forward pass can propagate modifiers forward, whereas the 38 * backwards pass propagates modifiers backward. Consider an example: 39 * 40 * 1 = fabs 0 41 * 2 = fround 1 42 * 3 = fsat 1 43 * 44 * The forwards pass would propagate the fabs to the fround (since we can 45 * lookup the fabs from the fround source and do the replacement). By contrast 46 * the backwards pass would propagate the fsat back to the fround (since when 47 * we see the fround we know it has only a single user, fsat). Propagatable 48 * instruction have natural directions (like pushforwards and pullbacks). 49 * 50 * We are careful to update the tracked state whenever we modify an instruction 51 * to ensure the passes are linear-time and converge in a single iteration. 52 * 53 * Size conversions are worth special discussion. Consider the snippet: 54 * 55 * 2 = fadd 0, 1 56 * 3 = f2f16 2 57 * 4 = fround 3 58 * 59 * A priori, we can move the f2f16 in either direction. But it's not equal -- 60 * if we move it up to the fadd, we get FP16 for two instructions, whereas if 61 * we push it into the fround, we effectively get FP32 for two instructions. So 62 * f2f16 is backwards. Likewise, consider 63 * 64 * 2 = fadd 0, 1 65 * 3 = f2f32 1 66 * 4 = fround 3 67 * 68 * This time if we move f2f32 up to the fadd, we get FP32 for two, but if we 69 * move it down to the fround, we get FP16 to too. So f2f32 is backwards. 70 */ 71 72static bool 73agx_is_fmov(agx_instr *def) 74{ 75 return (def->op == AGX_OPCODE_FADD) 76 && agx_is_equiv(def->src[1], agx_negzero()); 77} 78 79/* Compose floating-point modifiers with floating-point sources */ 80 81static agx_index 82agx_compose_float_src(agx_index to, agx_index from) 83{ 84 if (to.abs) { 85 from.neg = false; 86 from.abs = true; 87 } 88 89 from.neg ^= to.neg; 90 91 return from; 92} 93 94static void 95agx_optimizer_fmov(agx_instr **defs, agx_instr *ins) 96{ 97 agx_foreach_src(ins, s) { 98 agx_index src = ins->src[s]; 99 if (src.type != AGX_INDEX_NORMAL) continue; 100 101 agx_instr *def = defs[src.value]; 102 if (def == NULL) continue; /* happens for phis in loops */ 103 if (!agx_is_fmov(def)) continue; 104 if (def->saturate) continue; 105 106 ins->src[s] = agx_compose_float_src(src, def->src[0]); 107 } 108} 109 110static void 111agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, 112 unsigned srcs, bool is_float) 113{ 114 for (unsigned s = 0; s < srcs; ++s) { 115 agx_index src = I->src[s]; 116 if (src.type != AGX_INDEX_NORMAL) continue; 117 118 agx_instr *def = defs[src.value]; 119 if (def->op != AGX_OPCODE_MOV_IMM) continue; 120 121 uint8_t value = def->imm; 122 bool float_src = is_float; 123 124 /* cmpselsrc takes integer immediates only */ 125 if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false; 126 127 if (float_src) { 128 bool fp16 = (def->dest[0].size == AGX_SIZE_16); 129 assert(fp16 || (def->dest[0].size == AGX_SIZE_32)); 130 131 float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm); 132 if (!agx_minifloat_exact(f)) continue; 133 134 value = agx_minifloat_encode(f); 135 } else if (value != def->imm) { 136 continue; 137 } 138 139 I->src[s].type = AGX_INDEX_IMMEDIATE; 140 I->src[s].value = value; 141 } 142} 143 144static bool 145agx_optimizer_fmov_rev(agx_instr *I, agx_instr *use) 146{ 147 if (!agx_is_fmov(use)) return false; 148 if (use->src[0].neg || use->src[0].abs) return false; 149 150 /* saturate(saturate(x)) = saturate(x) */ 151 I->saturate |= use->saturate; 152 I->dest[0] = use->dest[0]; 153 return true; 154} 155 156static void 157agx_optimizer_copyprop(agx_instr **defs, agx_instr *I) 158{ 159 agx_foreach_src(I, s) { 160 agx_index src = I->src[s]; 161 if (src.type != AGX_INDEX_NORMAL) continue; 162 163 agx_instr *def = defs[src.value]; 164 if (def == NULL) continue; /* happens for phis in loops */ 165 if (def->op != AGX_OPCODE_MOV) continue; 166 167 /* At the moment, not all instructions support size conversions. Notably 168 * RA pseudo instructions don't handle size conversions. This should be 169 * refined in the future. 170 */ 171 if (def->src[0].size != src.size) continue; 172 173 /* Immediate inlining happens elsewhere */ 174 if (def->src[0].type == AGX_INDEX_IMMEDIATE) continue; 175 176 I->src[s] = agx_replace_index(src, def->src[0]); 177 } 178} 179 180static void 181agx_optimizer_forward(agx_context *ctx) 182{ 183 agx_instr **defs = calloc(ctx->alloc, sizeof(*defs)); 184 185 agx_foreach_instr_global(ctx, I) { 186 struct agx_opcode_info info = agx_opcodes_info[I->op]; 187 188 agx_foreach_dest(I, d) { 189 if (I->dest[d].type == AGX_INDEX_NORMAL) 190 defs[I->dest[d].value] = I; 191 } 192 193 /* Optimize moves */ 194 agx_optimizer_copyprop(defs, I); 195 196 /* Propagate fmov down */ 197 if (info.is_float) 198 agx_optimizer_fmov(defs, I); 199 200 /* Inline immediates if we can. TODO: systematic */ 201 if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_P_EXTRACT && I->op != AGX_OPCODE_P_COMBINE) 202 agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float); 203 } 204 205 free(defs); 206} 207 208static void 209agx_optimizer_backward(agx_context *ctx) 210{ 211 agx_instr **uses = calloc(ctx->alloc, sizeof(*uses)); 212 BITSET_WORD *multiple = calloc(BITSET_WORDS(ctx->alloc), sizeof(*multiple)); 213 214 agx_foreach_instr_global_rev(ctx, I) { 215 struct agx_opcode_info info = agx_opcodes_info[I->op]; 216 217 for (unsigned s = 0; s < info.nr_srcs; ++s) { 218 if (I->src[s].type == AGX_INDEX_NORMAL) { 219 unsigned v = I->src[s].value; 220 221 if (uses[v]) 222 BITSET_SET(multiple, v); 223 else 224 uses[v] = I; 225 } 226 } 227 228 if (info.nr_dests != 1) 229 continue; 230 231 if (I->dest[0].type != AGX_INDEX_NORMAL) 232 continue; 233 234 agx_instr *use = uses[I->dest[0].value]; 235 236 if (!use || BITSET_TEST(multiple, I->dest[0].value)) 237 continue; 238 239 /* Destination has a single use, try to propagate */ 240 if (info.is_float && agx_optimizer_fmov_rev(I, use)) { 241 agx_remove_instruction(use); 242 continue; 243 } 244 } 245 246 free(uses); 247 free(multiple); 248} 249 250void 251agx_optimizer(agx_context *ctx) 252{ 253 agx_optimizer_backward(ctx); 254 agx_optimizer_forward(ctx); 255} 256