1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "agx_compiler.h"
25bf215546Sopenharmony_ci#include "agx_minifloat.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci/* AGX peephole optimizer responsible for instruction combining. It operates in
28bf215546Sopenharmony_ci * a forward direction and a backward direction, in each case traversing in
29bf215546Sopenharmony_ci * source order. SSA means the forward pass satisfies the invariant:
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci *    Every def is visited before any of its uses.
32bf215546Sopenharmony_ci *
33bf215546Sopenharmony_ci * Dually, the backend pass satisfies the invariant:
34bf215546Sopenharmony_ci *
35bf215546Sopenharmony_ci *    Every use of a def is visited before the def.
36bf215546Sopenharmony_ci *
37bf215546Sopenharmony_ci * This means the forward pass can propagate modifiers forward, whereas the
38bf215546Sopenharmony_ci * backwards pass propagates modifiers backward. Consider an example:
39bf215546Sopenharmony_ci *
40bf215546Sopenharmony_ci *    1 = fabs 0
41bf215546Sopenharmony_ci *    2 = fround 1
42bf215546Sopenharmony_ci *    3 = fsat 1
43bf215546Sopenharmony_ci *
44bf215546Sopenharmony_ci * The forwards pass would propagate the fabs to the fround (since we can
45bf215546Sopenharmony_ci * lookup the fabs from the fround source and do the replacement). By contrast
46bf215546Sopenharmony_ci * the backwards pass would propagate the fsat back to the fround (since when
47bf215546Sopenharmony_ci * we see the fround we know it has only a single user, fsat).  Propagatable
48bf215546Sopenharmony_ci * instruction have natural directions (like pushforwards and pullbacks).
49bf215546Sopenharmony_ci *
50bf215546Sopenharmony_ci * We are careful to update the tracked state whenever we modify an instruction
51bf215546Sopenharmony_ci * to ensure the passes are linear-time and converge in a single iteration.
52bf215546Sopenharmony_ci *
53bf215546Sopenharmony_ci * Size conversions are worth special discussion. Consider the snippet:
54bf215546Sopenharmony_ci *
55bf215546Sopenharmony_ci *    2 = fadd 0, 1
56bf215546Sopenharmony_ci *    3 = f2f16 2
57bf215546Sopenharmony_ci *    4 = fround 3
58bf215546Sopenharmony_ci *
59bf215546Sopenharmony_ci * A priori, we can move the f2f16 in either direction. But it's not equal --
60bf215546Sopenharmony_ci * if we move it up to the fadd, we get FP16 for two instructions, whereas if
61bf215546Sopenharmony_ci * we push it into the fround, we effectively get FP32 for two instructions. So
62bf215546Sopenharmony_ci * f2f16 is backwards. Likewise, consider
63bf215546Sopenharmony_ci *
64bf215546Sopenharmony_ci *    2 = fadd 0, 1
65bf215546Sopenharmony_ci *    3 = f2f32 1
66bf215546Sopenharmony_ci *    4 = fround 3
67bf215546Sopenharmony_ci *
68bf215546Sopenharmony_ci * This time if we move f2f32 up to the fadd, we get FP32 for two, but if we
69bf215546Sopenharmony_ci * move it down to the fround, we get FP16 to too. So f2f32 is backwards.
70bf215546Sopenharmony_ci */
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_cistatic bool
73bf215546Sopenharmony_ciagx_is_fmov(agx_instr *def)
74bf215546Sopenharmony_ci{
75bf215546Sopenharmony_ci   return (def->op == AGX_OPCODE_FADD)
76bf215546Sopenharmony_ci      && agx_is_equiv(def->src[1], agx_negzero());
77bf215546Sopenharmony_ci}
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci/* Compose floating-point modifiers with floating-point sources */
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_cistatic agx_index
82bf215546Sopenharmony_ciagx_compose_float_src(agx_index to, agx_index from)
83bf215546Sopenharmony_ci{
84bf215546Sopenharmony_ci   if (to.abs) {
85bf215546Sopenharmony_ci      from.neg = false;
86bf215546Sopenharmony_ci      from.abs = true;
87bf215546Sopenharmony_ci   }
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   from.neg ^= to.neg;
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   return from;
92bf215546Sopenharmony_ci}
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_cistatic void
95bf215546Sopenharmony_ciagx_optimizer_fmov(agx_instr **defs, agx_instr *ins)
96bf215546Sopenharmony_ci{
97bf215546Sopenharmony_ci   agx_foreach_src(ins, s) {
98bf215546Sopenharmony_ci      agx_index src = ins->src[s];
99bf215546Sopenharmony_ci      if (src.type != AGX_INDEX_NORMAL) continue;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci      agx_instr *def = defs[src.value];
102bf215546Sopenharmony_ci      if (def == NULL) continue; /* happens for phis in loops */
103bf215546Sopenharmony_ci      if (!agx_is_fmov(def)) continue;
104bf215546Sopenharmony_ci      if (def->saturate) continue;
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci      ins->src[s] = agx_compose_float_src(src, def->src[0]);
107bf215546Sopenharmony_ci   }
108bf215546Sopenharmony_ci}
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_cistatic void
111bf215546Sopenharmony_ciagx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,
112bf215546Sopenharmony_ci      unsigned srcs, bool is_float)
113bf215546Sopenharmony_ci{
114bf215546Sopenharmony_ci   for (unsigned s = 0; s < srcs; ++s) {
115bf215546Sopenharmony_ci      agx_index src = I->src[s];
116bf215546Sopenharmony_ci      if (src.type != AGX_INDEX_NORMAL) continue;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci      agx_instr *def = defs[src.value];
119bf215546Sopenharmony_ci      if (def->op != AGX_OPCODE_MOV_IMM) continue;
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci      uint8_t value = def->imm;
122bf215546Sopenharmony_ci      bool float_src = is_float;
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci      /* cmpselsrc takes integer immediates only */
125bf215546Sopenharmony_ci      if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci      if (float_src) {
128bf215546Sopenharmony_ci         bool fp16 = (def->dest[0].size == AGX_SIZE_16);
129bf215546Sopenharmony_ci         assert(fp16 || (def->dest[0].size == AGX_SIZE_32));
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci         float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm);
132bf215546Sopenharmony_ci         if (!agx_minifloat_exact(f)) continue;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci         value = agx_minifloat_encode(f);
135bf215546Sopenharmony_ci      } else if (value != def->imm) {
136bf215546Sopenharmony_ci         continue;
137bf215546Sopenharmony_ci      }
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci      I->src[s].type = AGX_INDEX_IMMEDIATE;
140bf215546Sopenharmony_ci      I->src[s].value = value;
141bf215546Sopenharmony_ci   }
142bf215546Sopenharmony_ci}
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_cistatic bool
145bf215546Sopenharmony_ciagx_optimizer_fmov_rev(agx_instr *I, agx_instr *use)
146bf215546Sopenharmony_ci{
147bf215546Sopenharmony_ci   if (!agx_is_fmov(use)) return false;
148bf215546Sopenharmony_ci   if (use->src[0].neg || use->src[0].abs) return false;
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci   /* saturate(saturate(x)) = saturate(x) */
151bf215546Sopenharmony_ci   I->saturate |= use->saturate;
152bf215546Sopenharmony_ci   I->dest[0] = use->dest[0];
153bf215546Sopenharmony_ci   return true;
154bf215546Sopenharmony_ci}
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_cistatic void
157bf215546Sopenharmony_ciagx_optimizer_copyprop(agx_instr **defs, agx_instr *I)
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci   agx_foreach_src(I, s) {
160bf215546Sopenharmony_ci      agx_index src = I->src[s];
161bf215546Sopenharmony_ci      if (src.type != AGX_INDEX_NORMAL) continue;
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci      agx_instr *def = defs[src.value];
164bf215546Sopenharmony_ci      if (def == NULL) continue; /* happens for phis in loops */
165bf215546Sopenharmony_ci      if (def->op != AGX_OPCODE_MOV) continue;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci      /* At the moment, not all instructions support size conversions. Notably
168bf215546Sopenharmony_ci       * RA pseudo instructions don't handle size conversions. This should be
169bf215546Sopenharmony_ci       * refined in the future.
170bf215546Sopenharmony_ci       */
171bf215546Sopenharmony_ci      if (def->src[0].size != src.size) continue;
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci      /* Immediate inlining happens elsewhere */
174bf215546Sopenharmony_ci      if (def->src[0].type == AGX_INDEX_IMMEDIATE) continue;
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci      I->src[s] = agx_replace_index(src, def->src[0]);
177bf215546Sopenharmony_ci   }
178bf215546Sopenharmony_ci}
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_cistatic void
181bf215546Sopenharmony_ciagx_optimizer_forward(agx_context *ctx)
182bf215546Sopenharmony_ci{
183bf215546Sopenharmony_ci   agx_instr **defs = calloc(ctx->alloc, sizeof(*defs));
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   agx_foreach_instr_global(ctx, I) {
186bf215546Sopenharmony_ci      struct agx_opcode_info info = agx_opcodes_info[I->op];
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci      agx_foreach_dest(I, d) {
189bf215546Sopenharmony_ci         if (I->dest[d].type == AGX_INDEX_NORMAL)
190bf215546Sopenharmony_ci            defs[I->dest[d].value] = I;
191bf215546Sopenharmony_ci      }
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci      /* Optimize moves */
194bf215546Sopenharmony_ci      agx_optimizer_copyprop(defs, I);
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci      /* Propagate fmov down */
197bf215546Sopenharmony_ci      if (info.is_float)
198bf215546Sopenharmony_ci         agx_optimizer_fmov(defs, I);
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci      /* Inline immediates if we can. TODO: systematic */
201bf215546Sopenharmony_ci      if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_P_EXTRACT && I->op != AGX_OPCODE_P_COMBINE)
202bf215546Sopenharmony_ci         agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
203bf215546Sopenharmony_ci   }
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   free(defs);
206bf215546Sopenharmony_ci}
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_cistatic void
209bf215546Sopenharmony_ciagx_optimizer_backward(agx_context *ctx)
210bf215546Sopenharmony_ci{
211bf215546Sopenharmony_ci   agx_instr **uses = calloc(ctx->alloc, sizeof(*uses));
212bf215546Sopenharmony_ci   BITSET_WORD *multiple = calloc(BITSET_WORDS(ctx->alloc), sizeof(*multiple));
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   agx_foreach_instr_global_rev(ctx, I) {
215bf215546Sopenharmony_ci      struct agx_opcode_info info = agx_opcodes_info[I->op];
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci      for (unsigned s = 0; s < info.nr_srcs; ++s) {
218bf215546Sopenharmony_ci         if (I->src[s].type == AGX_INDEX_NORMAL) {
219bf215546Sopenharmony_ci            unsigned v = I->src[s].value;
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci            if (uses[v])
222bf215546Sopenharmony_ci               BITSET_SET(multiple, v);
223bf215546Sopenharmony_ci            else
224bf215546Sopenharmony_ci               uses[v] = I;
225bf215546Sopenharmony_ci         }
226bf215546Sopenharmony_ci      }
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci      if (info.nr_dests != 1)
229bf215546Sopenharmony_ci         continue;
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci      if (I->dest[0].type != AGX_INDEX_NORMAL)
232bf215546Sopenharmony_ci         continue;
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci      agx_instr *use = uses[I->dest[0].value];
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci      if (!use || BITSET_TEST(multiple, I->dest[0].value))
237bf215546Sopenharmony_ci         continue;
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci      /* Destination has a single use, try to propagate */
240bf215546Sopenharmony_ci      if (info.is_float && agx_optimizer_fmov_rev(I, use)) {
241bf215546Sopenharmony_ci         agx_remove_instruction(use);
242bf215546Sopenharmony_ci         continue;
243bf215546Sopenharmony_ci      }
244bf215546Sopenharmony_ci   }
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci   free(uses);
247bf215546Sopenharmony_ci   free(multiple);
248bf215546Sopenharmony_ci}
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_civoid
251bf215546Sopenharmony_ciagx_optimizer(agx_context *ctx)
252bf215546Sopenharmony_ci{
253bf215546Sopenharmony_ci   agx_optimizer_backward(ctx);
254bf215546Sopenharmony_ci   agx_optimizer_forward(ctx);
255bf215546Sopenharmony_ci}
256