1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2022 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22bf215546Sopenharmony_ci * SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "agx_compiler.h"
26bf215546Sopenharmony_ci#include "agx_builder.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/*
29bf215546Sopenharmony_ci * Emits code for
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci *    for (int i = 0; i < n; ++i)
32bf215546Sopenharmony_ci *       registers[dests[i]] = registers[srcs[i]];
33bf215546Sopenharmony_ci *
34bf215546Sopenharmony_ci * ...with all copies happening in parallel.
35bf215546Sopenharmony_ci *
36bf215546Sopenharmony_ci * That is, emit machine instructions equivalent to a parallel copy. This is
37bf215546Sopenharmony_ci * used to lower not only parallel copies but also collects and splits, which
38bf215546Sopenharmony_ci * also have parallel copy semantics.
39bf215546Sopenharmony_ci *
40bf215546Sopenharmony_ci * We only handles register-register copies, not general agx_index sources. This
41bf215546Sopenharmony_ci * suffices for its internal use for register allocation.
42bf215546Sopenharmony_ci */
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_cistatic void
45bf215546Sopenharmony_cido_copy(agx_builder *b, const struct agx_copy *copy)
46bf215546Sopenharmony_ci{
47bf215546Sopenharmony_ci   agx_mov_to(b, agx_register(copy->dest, copy->size),
48bf215546Sopenharmony_ci                 agx_register(copy->src, copy->size));
49bf215546Sopenharmony_ci}
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_cistatic void
52bf215546Sopenharmony_cido_swap(agx_builder *b, const struct agx_copy *copy)
53bf215546Sopenharmony_ci{
54bf215546Sopenharmony_ci   if (copy->dest == copy->src)
55bf215546Sopenharmony_ci      return;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   agx_index x = agx_register(copy->dest, copy->size);
58bf215546Sopenharmony_ci   agx_index y = agx_register(copy->src, copy->size);
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   agx_xor_to(b, x, x, y);
61bf215546Sopenharmony_ci   agx_xor_to(b, y, x, y);
62bf215546Sopenharmony_ci   agx_xor_to(b, x, x, y);
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_cistruct copy_ctx {
66bf215546Sopenharmony_ci   /* Number of copies being processed */
67bf215546Sopenharmony_ci   unsigned entry_count;
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci   /* For each physreg, the number of pending copy entries that use it as a
70bf215546Sopenharmony_ci    * source. Once this drops to zero, then the physreg is unblocked and can
71bf215546Sopenharmony_ci    * be moved to.
72bf215546Sopenharmony_ci    */
73bf215546Sopenharmony_ci   unsigned physreg_use_count[AGX_NUM_REGS];
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   /* For each physreg, the pending copy_entry that uses it as a dest. */
76bf215546Sopenharmony_ci   struct agx_copy *physreg_dest[AGX_NUM_REGS];
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci   struct agx_copy entries[AGX_NUM_REGS];
79bf215546Sopenharmony_ci};
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_cistatic bool
82bf215546Sopenharmony_cientry_blocked(struct agx_copy *entry, struct copy_ctx *ctx)
83bf215546Sopenharmony_ci{
84bf215546Sopenharmony_ci   for (unsigned i = 0; i < agx_size_align_16(entry->size); i++) {
85bf215546Sopenharmony_ci      if (ctx->physreg_use_count[entry->dest + i] != 0)
86bf215546Sopenharmony_ci         return true;
87bf215546Sopenharmony_ci   }
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   return false;
90bf215546Sopenharmony_ci}
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_cistatic bool
93bf215546Sopenharmony_ciis_real(struct agx_copy *entry)
94bf215546Sopenharmony_ci{
95bf215546Sopenharmony_ci   /* TODO: Allow immediates in agx_copy */
96bf215546Sopenharmony_ci   return true;
97bf215546Sopenharmony_ci}
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci/* TODO: Generalize to other bit sizes */
100bf215546Sopenharmony_cistatic void
101bf215546Sopenharmony_cisplit_32bit_copy(struct copy_ctx *ctx, struct agx_copy *entry)
102bf215546Sopenharmony_ci{
103bf215546Sopenharmony_ci   assert(!entry->done);
104bf215546Sopenharmony_ci   assert(is_real(entry));
105bf215546Sopenharmony_ci   assert(agx_size_align_16(entry->size) == 2);
106bf215546Sopenharmony_ci   struct agx_copy *new_entry = &ctx->entries[ctx->entry_count++];
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci   new_entry->dest = entry->dest + 1;
109bf215546Sopenharmony_ci   new_entry->src = entry->src + 1;
110bf215546Sopenharmony_ci   new_entry->done = false;
111bf215546Sopenharmony_ci   entry->size = AGX_SIZE_16;
112bf215546Sopenharmony_ci   new_entry->size = AGX_SIZE_16;
113bf215546Sopenharmony_ci   ctx->physreg_dest[entry->dest + 1] = new_entry;
114bf215546Sopenharmony_ci}
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_civoid
117bf215546Sopenharmony_ciagx_emit_parallel_copies(agx_builder *b,
118bf215546Sopenharmony_ci                         struct agx_copy *copies,
119bf215546Sopenharmony_ci                         unsigned num_copies)
120bf215546Sopenharmony_ci{
121bf215546Sopenharmony_ci   struct copy_ctx _ctx = {
122bf215546Sopenharmony_ci      .entry_count = num_copies
123bf215546Sopenharmony_ci   };
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci   struct copy_ctx *ctx = &_ctx;
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci   /* Set up the bookkeeping */
128bf215546Sopenharmony_ci   memset(ctx->physreg_dest, 0, sizeof(ctx->physreg_dest));
129bf215546Sopenharmony_ci   memset(ctx->physreg_use_count, 0, sizeof(ctx->physreg_use_count));
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->entry_count; i++) {
132bf215546Sopenharmony_ci      struct agx_copy *entry = &copies[i];
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci      ctx->entries[i] = *entry;
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci      for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) {
137bf215546Sopenharmony_ci         if (is_real(entry))
138bf215546Sopenharmony_ci            ctx->physreg_use_count[entry->src + j]++;
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci         /* Copies should not have overlapping destinations. */
141bf215546Sopenharmony_ci         assert(!ctx->physreg_dest[entry->dest + j]);
142bf215546Sopenharmony_ci         ctx->physreg_dest[entry->dest + j] = entry;
143bf215546Sopenharmony_ci      }
144bf215546Sopenharmony_ci   }
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   bool progress = true;
147bf215546Sopenharmony_ci   while (progress) {
148bf215546Sopenharmony_ci      progress = false;
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci      /* Step 1: resolve paths in the transfer graph. This means finding
151bf215546Sopenharmony_ci       * copies whose destination aren't blocked by something else and then
152bf215546Sopenharmony_ci       * emitting them, continuing this process until every copy is blocked
153bf215546Sopenharmony_ci       * and there are only cycles left.
154bf215546Sopenharmony_ci       *
155bf215546Sopenharmony_ci       * TODO: We should note that src is also available in dest to unblock
156bf215546Sopenharmony_ci       * cycles that src is involved in.
157bf215546Sopenharmony_ci       */
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci      for (unsigned i = 0; i < ctx->entry_count; i++) {
160bf215546Sopenharmony_ci         struct agx_copy *entry = &ctx->entries[i];
161bf215546Sopenharmony_ci         if (!entry->done && !entry_blocked(entry, ctx)) {
162bf215546Sopenharmony_ci            entry->done = true;
163bf215546Sopenharmony_ci            progress = true;
164bf215546Sopenharmony_ci            do_copy(b, entry);
165bf215546Sopenharmony_ci            for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) {
166bf215546Sopenharmony_ci               if (is_real(entry))
167bf215546Sopenharmony_ci                  ctx->physreg_use_count[entry->src + j]--;
168bf215546Sopenharmony_ci               ctx->physreg_dest[entry->dest + j] = NULL;
169bf215546Sopenharmony_ci            }
170bf215546Sopenharmony_ci         }
171bf215546Sopenharmony_ci      }
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci      if (progress)
174bf215546Sopenharmony_ci         continue;
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci      /* Step 2: Find partially blocked copies and split them. In the
177bf215546Sopenharmony_ci       * mergedregs case, we can 32-bit copies which are only blocked on one
178bf215546Sopenharmony_ci       * 16-bit half, and splitting them helps get things moving.
179bf215546Sopenharmony_ci       *
180bf215546Sopenharmony_ci       * We can skip splitting copies if the source isn't a register,
181bf215546Sopenharmony_ci       * however, because it does not unblock anything and therefore doesn't
182bf215546Sopenharmony_ci       * contribute to making forward progress with step 1. These copies
183bf215546Sopenharmony_ci       * should still be resolved eventually in step 1 because they can't be
184bf215546Sopenharmony_ci       * part of a cycle.
185bf215546Sopenharmony_ci       */
186bf215546Sopenharmony_ci      for (unsigned i = 0; i < ctx->entry_count; i++) {
187bf215546Sopenharmony_ci         struct agx_copy *entry = &ctx->entries[i];
188bf215546Sopenharmony_ci         if (entry->done || (agx_size_align_16(entry->size) != 2))
189bf215546Sopenharmony_ci            continue;
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci         if (((ctx->physreg_use_count[entry->dest] == 0 ||
192bf215546Sopenharmony_ci               ctx->physreg_use_count[entry->dest + 1] == 0)) &&
193bf215546Sopenharmony_ci             is_real(entry)) {
194bf215546Sopenharmony_ci            split_32bit_copy(ctx, entry);
195bf215546Sopenharmony_ci            progress = true;
196bf215546Sopenharmony_ci         }
197bf215546Sopenharmony_ci      }
198bf215546Sopenharmony_ci   }
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   /* Step 3: resolve cycles through swapping.
201bf215546Sopenharmony_ci    *
202bf215546Sopenharmony_ci    * At this point, the transfer graph should consist of only cycles.
203bf215546Sopenharmony_ci    * The reason is that, given any physreg n_1 that's the source of a
204bf215546Sopenharmony_ci    * remaining entry, it has a destination n_2, which (because every
205bf215546Sopenharmony_ci    * copy is blocked) is the source of some other copy whose destination
206bf215546Sopenharmony_ci    * is n_3, and so we can follow the chain until we get a cycle. If we
207bf215546Sopenharmony_ci    * reached some other node than n_1:
208bf215546Sopenharmony_ci    *
209bf215546Sopenharmony_ci    *  n_1 -> n_2 -> ... -> n_i
210bf215546Sopenharmony_ci    *          ^             |
211bf215546Sopenharmony_ci    *          |-------------|
212bf215546Sopenharmony_ci    *
213bf215546Sopenharmony_ci    *  then n_2 would be the destination of 2 copies, which is illegal
214bf215546Sopenharmony_ci    *  (checked above in an assert). So n_1 must be part of a cycle:
215bf215546Sopenharmony_ci    *
216bf215546Sopenharmony_ci    *  n_1 -> n_2 -> ... -> n_i
217bf215546Sopenharmony_ci    *  ^                     |
218bf215546Sopenharmony_ci    *  |---------------------|
219bf215546Sopenharmony_ci    *
220bf215546Sopenharmony_ci    *  and this must be only cycle n_1 is involved in, because any other
221bf215546Sopenharmony_ci    *  path starting from n_1 would also have to end in n_1, resulting in
222bf215546Sopenharmony_ci    *  a node somewhere along the way being the destination of 2 copies
223bf215546Sopenharmony_ci    *  when the 2 paths merge.
224bf215546Sopenharmony_ci    *
225bf215546Sopenharmony_ci    *  The way we resolve the cycle is through picking a copy (n_1, n_2)
226bf215546Sopenharmony_ci    *  and swapping n_1 and n_2. This moves n_1 to n_2, so n_2 is taken
227bf215546Sopenharmony_ci    *  out of the cycle:
228bf215546Sopenharmony_ci    *
229bf215546Sopenharmony_ci    *  n_1 -> ... -> n_i
230bf215546Sopenharmony_ci    *  ^              |
231bf215546Sopenharmony_ci    *  |--------------|
232bf215546Sopenharmony_ci    *
233bf215546Sopenharmony_ci    *  and we can keep repeating this until the cycle is empty.
234bf215546Sopenharmony_ci    */
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->entry_count; i++) {
237bf215546Sopenharmony_ci      struct agx_copy *entry = &ctx->entries[i];
238bf215546Sopenharmony_ci      if (entry->done)
239bf215546Sopenharmony_ci         continue;
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci      assert(is_real(entry));
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci      /* catch trivial copies */
244bf215546Sopenharmony_ci      if (entry->dest == entry->src) {
245bf215546Sopenharmony_ci         entry->done = true;
246bf215546Sopenharmony_ci         continue;
247bf215546Sopenharmony_ci      }
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci      do_swap(b, entry);
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci      /* Split any blocking copies whose sources are only partially
252bf215546Sopenharmony_ci       * contained within our destination.
253bf215546Sopenharmony_ci       */
254bf215546Sopenharmony_ci      if (agx_size_align_16(entry->size) == 1) {
255bf215546Sopenharmony_ci         for (unsigned j = 0; j < ctx->entry_count; j++) {
256bf215546Sopenharmony_ci            struct agx_copy *blocking = &ctx->entries[j];
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci            if (blocking->done)
259bf215546Sopenharmony_ci               continue;
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci            if (blocking->src <= entry->dest &&
262bf215546Sopenharmony_ci                blocking->src + 1 >= entry->dest &&
263bf215546Sopenharmony_ci                agx_size_align_16(blocking->size) == 2) {
264bf215546Sopenharmony_ci               split_32bit_copy(ctx, blocking);
265bf215546Sopenharmony_ci            }
266bf215546Sopenharmony_ci         }
267bf215546Sopenharmony_ci      }
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci      /* Update sources of blocking copies.
270bf215546Sopenharmony_ci       *
271bf215546Sopenharmony_ci       * Note: at this point, every blocking copy's source should be
272bf215546Sopenharmony_ci       * contained within our destination.
273bf215546Sopenharmony_ci       */
274bf215546Sopenharmony_ci      for (unsigned j = 0; j < ctx->entry_count; j++) {
275bf215546Sopenharmony_ci         struct agx_copy *blocking = &ctx->entries[j];
276bf215546Sopenharmony_ci         if (blocking->src >= entry->dest &&
277bf215546Sopenharmony_ci             blocking->src < entry->dest + agx_size_align_16(entry->size)) {
278bf215546Sopenharmony_ci            blocking->src = entry->src + (blocking->src - entry->dest);
279bf215546Sopenharmony_ci         }
280bf215546Sopenharmony_ci      }
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci      entry->done = true;
283bf215546Sopenharmony_ci   }
284bf215546Sopenharmony_ci}
285