1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "agx_compiler.h" 25bf215546Sopenharmony_ci#include "agx_minifloat.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/* AGX peephole optimizer responsible for instruction combining. It operates in 28bf215546Sopenharmony_ci * a forward direction and a backward direction, in each case traversing in 29bf215546Sopenharmony_ci * source order. SSA means the forward pass satisfies the invariant: 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * Every def is visited before any of its uses. 32bf215546Sopenharmony_ci * 33bf215546Sopenharmony_ci * Dually, the backend pass satisfies the invariant: 34bf215546Sopenharmony_ci * 35bf215546Sopenharmony_ci * Every use of a def is visited before the def. 36bf215546Sopenharmony_ci * 37bf215546Sopenharmony_ci * This means the forward pass can propagate modifiers forward, whereas the 38bf215546Sopenharmony_ci * backwards pass propagates modifiers backward. Consider an example: 39bf215546Sopenharmony_ci * 40bf215546Sopenharmony_ci * 1 = fabs 0 41bf215546Sopenharmony_ci * 2 = fround 1 42bf215546Sopenharmony_ci * 3 = fsat 1 43bf215546Sopenharmony_ci * 44bf215546Sopenharmony_ci * The forwards pass would propagate the fabs to the fround (since we can 45bf215546Sopenharmony_ci * lookup the fabs from the fround source and do the replacement). By contrast 46bf215546Sopenharmony_ci * the backwards pass would propagate the fsat back to the fround (since when 47bf215546Sopenharmony_ci * we see the fround we know it has only a single user, fsat). Propagatable 48bf215546Sopenharmony_ci * instruction have natural directions (like pushforwards and pullbacks). 49bf215546Sopenharmony_ci * 50bf215546Sopenharmony_ci * We are careful to update the tracked state whenever we modify an instruction 51bf215546Sopenharmony_ci * to ensure the passes are linear-time and converge in a single iteration. 52bf215546Sopenharmony_ci * 53bf215546Sopenharmony_ci * Size conversions are worth special discussion. Consider the snippet: 54bf215546Sopenharmony_ci * 55bf215546Sopenharmony_ci * 2 = fadd 0, 1 56bf215546Sopenharmony_ci * 3 = f2f16 2 57bf215546Sopenharmony_ci * 4 = fround 3 58bf215546Sopenharmony_ci * 59bf215546Sopenharmony_ci * A priori, we can move the f2f16 in either direction. But it's not equal -- 60bf215546Sopenharmony_ci * if we move it up to the fadd, we get FP16 for two instructions, whereas if 61bf215546Sopenharmony_ci * we push it into the fround, we effectively get FP32 for two instructions. So 62bf215546Sopenharmony_ci * f2f16 is backwards. Likewise, consider 63bf215546Sopenharmony_ci * 64bf215546Sopenharmony_ci * 2 = fadd 0, 1 65bf215546Sopenharmony_ci * 3 = f2f32 1 66bf215546Sopenharmony_ci * 4 = fround 3 67bf215546Sopenharmony_ci * 68bf215546Sopenharmony_ci * This time if we move f2f32 up to the fadd, we get FP32 for two, but if we 69bf215546Sopenharmony_ci * move it down to the fround, we get FP16 to too. So f2f32 is backwards. 70bf215546Sopenharmony_ci */ 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_cistatic bool 73bf215546Sopenharmony_ciagx_is_fmov(agx_instr *def) 74bf215546Sopenharmony_ci{ 75bf215546Sopenharmony_ci return (def->op == AGX_OPCODE_FADD) 76bf215546Sopenharmony_ci && agx_is_equiv(def->src[1], agx_negzero()); 77bf215546Sopenharmony_ci} 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci/* Compose floating-point modifiers with floating-point sources */ 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_cistatic agx_index 82bf215546Sopenharmony_ciagx_compose_float_src(agx_index to, agx_index from) 83bf215546Sopenharmony_ci{ 84bf215546Sopenharmony_ci if (to.abs) { 85bf215546Sopenharmony_ci from.neg = false; 86bf215546Sopenharmony_ci from.abs = true; 87bf215546Sopenharmony_ci } 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci from.neg ^= to.neg; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci return from; 92bf215546Sopenharmony_ci} 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistatic void 95bf215546Sopenharmony_ciagx_optimizer_fmov(agx_instr **defs, agx_instr *ins) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci agx_foreach_src(ins, s) { 98bf215546Sopenharmony_ci agx_index src = ins->src[s]; 99bf215546Sopenharmony_ci if (src.type != AGX_INDEX_NORMAL) continue; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci agx_instr *def = defs[src.value]; 102bf215546Sopenharmony_ci if (def == NULL) continue; /* happens for phis in loops */ 103bf215546Sopenharmony_ci if (!agx_is_fmov(def)) continue; 104bf215546Sopenharmony_ci if (def->saturate) continue; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci ins->src[s] = agx_compose_float_src(src, def->src[0]); 107bf215546Sopenharmony_ci } 108bf215546Sopenharmony_ci} 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_cistatic void 111bf215546Sopenharmony_ciagx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, 112bf215546Sopenharmony_ci unsigned srcs, bool is_float) 113bf215546Sopenharmony_ci{ 114bf215546Sopenharmony_ci for (unsigned s = 0; s < srcs; ++s) { 115bf215546Sopenharmony_ci agx_index src = I->src[s]; 116bf215546Sopenharmony_ci if (src.type != AGX_INDEX_NORMAL) continue; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci agx_instr *def = defs[src.value]; 119bf215546Sopenharmony_ci if (def->op != AGX_OPCODE_MOV_IMM) continue; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci uint8_t value = def->imm; 122bf215546Sopenharmony_ci bool float_src = is_float; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci /* cmpselsrc takes integer immediates only */ 125bf215546Sopenharmony_ci if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci if (float_src) { 128bf215546Sopenharmony_ci bool fp16 = (def->dest[0].size == AGX_SIZE_16); 129bf215546Sopenharmony_ci assert(fp16 || (def->dest[0].size == AGX_SIZE_32)); 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm); 132bf215546Sopenharmony_ci if (!agx_minifloat_exact(f)) continue; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci value = agx_minifloat_encode(f); 135bf215546Sopenharmony_ci } else if (value != def->imm) { 136bf215546Sopenharmony_ci continue; 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci I->src[s].type = AGX_INDEX_IMMEDIATE; 140bf215546Sopenharmony_ci I->src[s].value = value; 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci} 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_cistatic bool 145bf215546Sopenharmony_ciagx_optimizer_fmov_rev(agx_instr *I, agx_instr *use) 146bf215546Sopenharmony_ci{ 147bf215546Sopenharmony_ci if (!agx_is_fmov(use)) return false; 148bf215546Sopenharmony_ci if (use->src[0].neg || use->src[0].abs) return false; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci /* saturate(saturate(x)) = saturate(x) */ 151bf215546Sopenharmony_ci I->saturate |= use->saturate; 152bf215546Sopenharmony_ci I->dest[0] = use->dest[0]; 153bf215546Sopenharmony_ci return true; 154bf215546Sopenharmony_ci} 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_cistatic void 157bf215546Sopenharmony_ciagx_optimizer_copyprop(agx_instr **defs, agx_instr *I) 158bf215546Sopenharmony_ci{ 159bf215546Sopenharmony_ci agx_foreach_src(I, s) { 160bf215546Sopenharmony_ci agx_index src = I->src[s]; 161bf215546Sopenharmony_ci if (src.type != AGX_INDEX_NORMAL) continue; 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci agx_instr *def = defs[src.value]; 164bf215546Sopenharmony_ci if (def == NULL) continue; /* happens for phis in loops */ 165bf215546Sopenharmony_ci if (def->op != AGX_OPCODE_MOV) continue; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* At the moment, not all instructions support size conversions. Notably 168bf215546Sopenharmony_ci * RA pseudo instructions don't handle size conversions. This should be 169bf215546Sopenharmony_ci * refined in the future. 170bf215546Sopenharmony_ci */ 171bf215546Sopenharmony_ci if (def->src[0].size != src.size) continue; 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci /* Immediate inlining happens elsewhere */ 174bf215546Sopenharmony_ci if (def->src[0].type == AGX_INDEX_IMMEDIATE) continue; 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci I->src[s] = agx_replace_index(src, def->src[0]); 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_cistatic void 181bf215546Sopenharmony_ciagx_optimizer_forward(agx_context *ctx) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci agx_instr **defs = calloc(ctx->alloc, sizeof(*defs)); 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci agx_foreach_instr_global(ctx, I) { 186bf215546Sopenharmony_ci struct agx_opcode_info info = agx_opcodes_info[I->op]; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci agx_foreach_dest(I, d) { 189bf215546Sopenharmony_ci if (I->dest[d].type == AGX_INDEX_NORMAL) 190bf215546Sopenharmony_ci defs[I->dest[d].value] = I; 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci /* Optimize moves */ 194bf215546Sopenharmony_ci agx_optimizer_copyprop(defs, I); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* Propagate fmov down */ 197bf215546Sopenharmony_ci if (info.is_float) 198bf215546Sopenharmony_ci agx_optimizer_fmov(defs, I); 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci /* Inline immediates if we can. TODO: systematic */ 201bf215546Sopenharmony_ci if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_P_EXTRACT && I->op != AGX_OPCODE_P_COMBINE) 202bf215546Sopenharmony_ci agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float); 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci free(defs); 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_cistatic void 209bf215546Sopenharmony_ciagx_optimizer_backward(agx_context *ctx) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci agx_instr **uses = calloc(ctx->alloc, sizeof(*uses)); 212bf215546Sopenharmony_ci BITSET_WORD *multiple = calloc(BITSET_WORDS(ctx->alloc), sizeof(*multiple)); 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci agx_foreach_instr_global_rev(ctx, I) { 215bf215546Sopenharmony_ci struct agx_opcode_info info = agx_opcodes_info[I->op]; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci for (unsigned s = 0; s < info.nr_srcs; ++s) { 218bf215546Sopenharmony_ci if (I->src[s].type == AGX_INDEX_NORMAL) { 219bf215546Sopenharmony_ci unsigned v = I->src[s].value; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci if (uses[v]) 222bf215546Sopenharmony_ci BITSET_SET(multiple, v); 223bf215546Sopenharmony_ci else 224bf215546Sopenharmony_ci uses[v] = I; 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci if (info.nr_dests != 1) 229bf215546Sopenharmony_ci continue; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci if (I->dest[0].type != AGX_INDEX_NORMAL) 232bf215546Sopenharmony_ci continue; 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci agx_instr *use = uses[I->dest[0].value]; 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci if (!use || BITSET_TEST(multiple, I->dest[0].value)) 237bf215546Sopenharmony_ci continue; 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci /* Destination has a single use, try to propagate */ 240bf215546Sopenharmony_ci if (info.is_float && agx_optimizer_fmov_rev(I, use)) { 241bf215546Sopenharmony_ci agx_remove_instruction(use); 242bf215546Sopenharmony_ci continue; 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci free(uses); 247bf215546Sopenharmony_ci free(multiple); 248bf215546Sopenharmony_ci} 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_civoid 251bf215546Sopenharmony_ciagx_optimizer(agx_context *ctx) 252bf215546Sopenharmony_ci{ 253bf215546Sopenharmony_ci agx_optimizer_backward(ctx); 254bf215546Sopenharmony_ci agx_optimizer_forward(ctx); 255bf215546Sopenharmony_ci} 256