1/*
2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "agx_compiler.h"
25#include "agx_minifloat.h"
26
27/* AGX peephole optimizer responsible for instruction combining. It operates in
28 * a forward direction and a backward direction, in each case traversing in
29 * source order. SSA means the forward pass satisfies the invariant:
30 *
31 *    Every def is visited before any of its uses.
32 *
33 * Dually, the backend pass satisfies the invariant:
34 *
35 *    Every use of a def is visited before the def.
36 *
37 * This means the forward pass can propagate modifiers forward, whereas the
38 * backwards pass propagates modifiers backward. Consider an example:
39 *
40 *    1 = fabs 0
41 *    2 = fround 1
42 *    3 = fsat 1
43 *
44 * The forwards pass would propagate the fabs to the fround (since we can
45 * lookup the fabs from the fround source and do the replacement). By contrast
46 * the backwards pass would propagate the fsat back to the fround (since when
47 * we see the fround we know it has only a single user, fsat).  Propagatable
48 * instruction have natural directions (like pushforwards and pullbacks).
49 *
50 * We are careful to update the tracked state whenever we modify an instruction
51 * to ensure the passes are linear-time and converge in a single iteration.
52 *
53 * Size conversions are worth special discussion. Consider the snippet:
54 *
55 *    2 = fadd 0, 1
56 *    3 = f2f16 2
57 *    4 = fround 3
58 *
59 * A priori, we can move the f2f16 in either direction. But it's not equal --
60 * if we move it up to the fadd, we get FP16 for two instructions, whereas if
61 * we push it into the fround, we effectively get FP32 for two instructions. So
62 * f2f16 is backwards. Likewise, consider
63 *
64 *    2 = fadd 0, 1
65 *    3 = f2f32 1
66 *    4 = fround 3
67 *
68 * This time if we move f2f32 up to the fadd, we get FP32 for two, but if we
69 * move it down to the fround, we get FP16 to too. So f2f32 is backwards.
70 */
71
72static bool
73agx_is_fmov(agx_instr *def)
74{
75   return (def->op == AGX_OPCODE_FADD)
76      && agx_is_equiv(def->src[1], agx_negzero());
77}
78
79/* Compose floating-point modifiers with floating-point sources */
80
81static agx_index
82agx_compose_float_src(agx_index to, agx_index from)
83{
84   if (to.abs) {
85      from.neg = false;
86      from.abs = true;
87   }
88
89   from.neg ^= to.neg;
90
91   return from;
92}
93
94static void
95agx_optimizer_fmov(agx_instr **defs, agx_instr *ins)
96{
97   agx_foreach_src(ins, s) {
98      agx_index src = ins->src[s];
99      if (src.type != AGX_INDEX_NORMAL) continue;
100
101      agx_instr *def = defs[src.value];
102      if (def == NULL) continue; /* happens for phis in loops */
103      if (!agx_is_fmov(def)) continue;
104      if (def->saturate) continue;
105
106      ins->src[s] = agx_compose_float_src(src, def->src[0]);
107   }
108}
109
110static void
111agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,
112      unsigned srcs, bool is_float)
113{
114   for (unsigned s = 0; s < srcs; ++s) {
115      agx_index src = I->src[s];
116      if (src.type != AGX_INDEX_NORMAL) continue;
117
118      agx_instr *def = defs[src.value];
119      if (def->op != AGX_OPCODE_MOV_IMM) continue;
120
121      uint8_t value = def->imm;
122      bool float_src = is_float;
123
124      /* cmpselsrc takes integer immediates only */
125      if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;
126
127      if (float_src) {
128         bool fp16 = (def->dest[0].size == AGX_SIZE_16);
129         assert(fp16 || (def->dest[0].size == AGX_SIZE_32));
130
131         float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm);
132         if (!agx_minifloat_exact(f)) continue;
133
134         value = agx_minifloat_encode(f);
135      } else if (value != def->imm) {
136         continue;
137      }
138
139      I->src[s].type = AGX_INDEX_IMMEDIATE;
140      I->src[s].value = value;
141   }
142}
143
144static bool
145agx_optimizer_fmov_rev(agx_instr *I, agx_instr *use)
146{
147   if (!agx_is_fmov(use)) return false;
148   if (use->src[0].neg || use->src[0].abs) return false;
149
150   /* saturate(saturate(x)) = saturate(x) */
151   I->saturate |= use->saturate;
152   I->dest[0] = use->dest[0];
153   return true;
154}
155
156static void
157agx_optimizer_copyprop(agx_instr **defs, agx_instr *I)
158{
159   agx_foreach_src(I, s) {
160      agx_index src = I->src[s];
161      if (src.type != AGX_INDEX_NORMAL) continue;
162
163      agx_instr *def = defs[src.value];
164      if (def == NULL) continue; /* happens for phis in loops */
165      if (def->op != AGX_OPCODE_MOV) continue;
166
167      /* At the moment, not all instructions support size conversions. Notably
168       * RA pseudo instructions don't handle size conversions. This should be
169       * refined in the future.
170       */
171      if (def->src[0].size != src.size) continue;
172
173      /* Immediate inlining happens elsewhere */
174      if (def->src[0].type == AGX_INDEX_IMMEDIATE) continue;
175
176      I->src[s] = agx_replace_index(src, def->src[0]);
177   }
178}
179
180static void
181agx_optimizer_forward(agx_context *ctx)
182{
183   agx_instr **defs = calloc(ctx->alloc, sizeof(*defs));
184
185   agx_foreach_instr_global(ctx, I) {
186      struct agx_opcode_info info = agx_opcodes_info[I->op];
187
188      agx_foreach_dest(I, d) {
189         if (I->dest[d].type == AGX_INDEX_NORMAL)
190            defs[I->dest[d].value] = I;
191      }
192
193      /* Optimize moves */
194      agx_optimizer_copyprop(defs, I);
195
196      /* Propagate fmov down */
197      if (info.is_float)
198         agx_optimizer_fmov(defs, I);
199
200      /* Inline immediates if we can. TODO: systematic */
201      if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_P_EXTRACT && I->op != AGX_OPCODE_P_COMBINE)
202         agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
203   }
204
205   free(defs);
206}
207
208static void
209agx_optimizer_backward(agx_context *ctx)
210{
211   agx_instr **uses = calloc(ctx->alloc, sizeof(*uses));
212   BITSET_WORD *multiple = calloc(BITSET_WORDS(ctx->alloc), sizeof(*multiple));
213
214   agx_foreach_instr_global_rev(ctx, I) {
215      struct agx_opcode_info info = agx_opcodes_info[I->op];
216
217      for (unsigned s = 0; s < info.nr_srcs; ++s) {
218         if (I->src[s].type == AGX_INDEX_NORMAL) {
219            unsigned v = I->src[s].value;
220
221            if (uses[v])
222               BITSET_SET(multiple, v);
223            else
224               uses[v] = I;
225         }
226      }
227
228      if (info.nr_dests != 1)
229         continue;
230
231      if (I->dest[0].type != AGX_INDEX_NORMAL)
232         continue;
233
234      agx_instr *use = uses[I->dest[0].value];
235
236      if (!use || BITSET_TEST(multiple, I->dest[0].value))
237         continue;
238
239      /* Destination has a single use, try to propagate */
240      if (info.is_float && agx_optimizer_fmov_rev(I, use)) {
241         agx_remove_instruction(use);
242         continue;
243      }
244   }
245
246   free(uses);
247   free(multiple);
248}
249
250void
251agx_optimizer(agx_context *ctx)
252{
253   agx_optimizer_backward(ctx);
254   agx_optimizer_forward(ctx);
255}
256