1/*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24/* This pass lowers array accesses to SSA.
25 *
26 * After this pass, instructions writing arrays implicitly read the contents of
27 * the array defined in instr->dsts[0]->def (possibly a phi node), perform the
28 * operation, and store to instr->dsts[0].
29 *
30 * This makes arrays appear like "normal" SSA values, even if the false
31 * dependencies mean that they always stay in CSSA form (i.e. able to removed
32 * out-of-SSA with no copies.) While hopefully they shouldn't induce copies in
33 * most cases, we can't make that guarantee while also splitting spilling from
34 * RA and guaranteeing a certain number of registers are used, so we have to
35 * insert the phi nodes to be able to know when copying should happen.
36 *
37 * The implementation is based on the idea in "Simple and Efficient Construction
38 * of Static Single Assignment Form" of scanning backwards to find the
39 * definition. However, since we're not doing this on-the-fly we can simplify
40 * things a little by doing a pre-pass to get the last definition of each array
41 * in each block. Then we optimize trivial phis in a separate pass, "on the fly"
42 * so that we don't have to rewrite (and keep track of) users.
43 */
44
45#include <stdlib.h>
46#include "ir3.h"
47
48struct array_state {
49   struct ir3_register *live_in_definition;
50   struct ir3_register *live_out_definition;
51   bool constructed;
52   bool optimized;
53};
54
55struct array_ctx {
56   struct array_state *states;
57   struct ir3 *ir;
58   unsigned array_count;
59};
60
61static struct array_state *
62get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
63{
64   return &ctx->states[ctx->array_count * block->index + id];
65}
66
67static struct ir3_register *read_value_beginning(struct array_ctx *ctx,
68                                                 struct ir3_block *block,
69                                                 struct ir3_array *arr);
70
71static struct ir3_register *
72read_value_end(struct array_ctx *ctx, struct ir3_block *block,
73               struct ir3_array *arr)
74{
75   struct array_state *state = get_state(ctx, block, arr->id);
76   if (state->live_out_definition)
77      return state->live_out_definition;
78
79   state->live_out_definition = read_value_beginning(ctx, block, arr);
80   return state->live_out_definition;
81}
82
83/* Roughly equivalent to readValueRecursive from the paper: */
84static struct ir3_register *
85read_value_beginning(struct array_ctx *ctx, struct ir3_block *block,
86                     struct ir3_array *arr)
87{
88   struct array_state *state = get_state(ctx, block, arr->id);
89
90   if (state->constructed)
91      return state->live_in_definition;
92
93   if (block->predecessors_count == 0) {
94      state->constructed = true;
95      return NULL;
96   }
97
98   if (block->predecessors_count == 1) {
99      state->live_in_definition =
100         read_value_end(ctx, block->predecessors[0], arr);
101      state->constructed = true;
102      return state->live_in_definition;
103   }
104
105   unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0);
106   struct ir3_instruction *phi =
107      ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
108   list_del(&phi->node);
109   list_add(&phi->node, &block->instr_list);
110
111   struct ir3_register *dst = __ssa_dst(phi);
112   dst->flags |= flags;
113   dst->array.id = arr->id;
114   dst->size = arr->length;
115
116   state->live_in_definition = phi->dsts[0];
117   state->constructed = true;
118
119   for (unsigned i = 0; i < block->predecessors_count; i++) {
120      struct ir3_register *src =
121         read_value_end(ctx, block->predecessors[i], arr);
122      struct ir3_register *src_reg;
123      if (src) {
124         src_reg = __ssa_src(phi, src->instr, flags);
125      } else {
126         src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA);
127      }
128      src_reg->array.id = arr->id;
129      src_reg->size = arr->length;
130   }
131   return phi->dsts[0];
132}
133
134static struct ir3_register *
135remove_trivial_phi(struct ir3_instruction *phi)
136{
137   /* Break cycles */
138   if (phi->data)
139      return phi->data;
140
141   phi->data = phi->dsts[0];
142
143   struct ir3_register *unique_def = NULL;
144   bool unique = true;
145   for (unsigned i = 0; i < phi->block->predecessors_count; i++) {
146      struct ir3_register *src = phi->srcs[i];
147
148      /* If there are any undef sources, then the remaining sources may not
149       * dominate the phi node, even if they are all equal. So we need to
150       * bail out in this case.
151       *
152       * This seems to be a bug in the original paper.
153       */
154      if (!src->def) {
155         unique = false;
156         break;
157      }
158
159      struct ir3_instruction *src_instr = src->def->instr;
160
161      /* phi sources which point to the phi itself don't count for
162       * figuring out if the phi is trivial
163       */
164      if (src_instr == phi)
165         continue;
166
167      if (src_instr->opc == OPC_META_PHI) {
168         src->def = remove_trivial_phi(src->def->instr);
169      }
170
171      if (unique_def) {
172         if (unique_def != src->def) {
173            unique = false;
174            break;
175         }
176      } else {
177         unique_def = src->def;
178      }
179   }
180
181   if (unique) {
182      phi->data = unique_def;
183      return unique_def;
184   } else {
185      return phi->dsts[0];
186   }
187}
188
189static struct ir3_register *
190lookup_value(struct ir3_register *reg)
191{
192   if (reg->instr->opc == OPC_META_PHI)
193      return reg->instr->data;
194   return reg;
195}
196
197static struct ir3_register *
198lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
199{
200   struct array_state *state = get_state(ctx, block, id);
201   if (state->live_in_definition)
202      return lookup_value(state->live_in_definition);
203
204   return NULL;
205}
206
207bool
208ir3_array_to_ssa(struct ir3 *ir)
209{
210   struct array_ctx ctx = {};
211
212   foreach_array (array, &ir->array_list) {
213      ctx.array_count = MAX2(ctx.array_count, array->id + 1);
214   }
215
216   if (ctx.array_count == 0)
217      return false;
218
219   unsigned i = 0;
220   foreach_block (block, &ir->block_list) {
221      block->index = i++;
222   }
223
224   ctx.ir = ir;
225   ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state));
226
227   foreach_block (block, &ir->block_list) {
228      foreach_instr (instr, &block->instr_list) {
229         foreach_dst (dst, instr) {
230            if (dst->flags & IR3_REG_ARRAY) {
231               struct array_state *state =
232                  get_state(&ctx, block, dst->array.id);
233               state->live_out_definition = dst;
234            }
235         }
236      }
237   }
238
239   foreach_block (block, &ir->block_list) {
240      foreach_instr (instr, &block->instr_list) {
241         if (instr->opc == OPC_META_PHI)
242            continue;
243
244         foreach_dst (reg, instr) {
245            if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) {
246               struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
247
248               /* Construct any phi nodes necessary to read this value */
249               read_value_beginning(&ctx, block, arr);
250            }
251         }
252         foreach_src (reg, instr) {
253            if ((reg->flags & IR3_REG_ARRAY) && !reg->def) {
254               struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
255
256               /* Construct any phi nodes necessary to read this value */
257               read_value_beginning(&ctx, block, arr);
258            }
259         }
260      }
261   }
262
263   foreach_block (block, &ir->block_list) {
264      foreach_instr_safe (instr, &block->instr_list) {
265         if (instr->opc == OPC_META_PHI)
266            remove_trivial_phi(instr);
267         else
268            break;
269      }
270   }
271
272   foreach_block (block, &ir->block_list) {
273      foreach_instr_safe (instr, &block->instr_list) {
274         if (instr->opc == OPC_META_PHI) {
275            if (!(instr->flags & IR3_REG_ARRAY))
276               continue;
277            if (instr->data != instr->dsts[0]) {
278               list_del(&instr->node);
279               continue;
280            }
281            for (unsigned i = 0; i < instr->srcs_count; i++) {
282               instr->srcs[i] = lookup_value(instr->srcs[i]);
283            }
284         } else {
285            foreach_dst (reg, instr) {
286               if ((reg->flags & IR3_REG_ARRAY)) {
287                  if (!reg->tied) {
288                     struct ir3_register *def =
289                        lookup_live_in(&ctx, block, reg->array.id);
290                     if (def)
291                        ir3_reg_set_last_array(instr, reg, def);
292                  }
293                  reg->flags |= IR3_REG_SSA;
294               }
295            }
296            foreach_src (reg, instr) {
297               if ((reg->flags & IR3_REG_ARRAY)) {
298                  /* It is assumed that before calling
299                   * ir3_array_to_ssa(), reg->def was set to the
300                   * previous writer of the array within the current
301                   * block or NULL if none.
302                   */
303                  if (!reg->def) {
304                     reg->def = lookup_live_in(&ctx, block, reg->array.id);
305                  }
306                  reg->flags |= IR3_REG_SSA;
307               }
308            }
309         }
310      }
311   }
312
313   free(ctx.states);
314   return true;
315}
316