1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation
3bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22bf215546Sopenharmony_ci * SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "ir3_ra.h"
26bf215546Sopenharmony_ci#include "util/rb_tree.h"
27bf215546Sopenharmony_ci#include "util/u_math.h"
28bf215546Sopenharmony_ci#include "ir3_shader.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci/* This file implements an SSA-based register allocator. Unlike other
31bf215546Sopenharmony_ci * SSA-based allocators, it handles vector split/collect "smartly," meaning
32bf215546Sopenharmony_ci * that multiple values may share the same register interval. From the
33bf215546Sopenharmony_ci * perspective of the allocator itself, only the top-level intervals matter,
34bf215546Sopenharmony_ci * and the allocator is only concerned with allocating top-level intervals,
35bf215546Sopenharmony_ci * which may mean moving other top-level intervals around. Other intervals,
36bf215546Sopenharmony_ci * like the destination of a split instruction or the source of a collect
37bf215546Sopenharmony_ci * instruction, are "locked" to their parent interval. The details of this are
38bf215546Sopenharmony_ci * mostly handled by ir3_merge_regs and ir3_reg_ctx.
39bf215546Sopenharmony_ci *
40bf215546Sopenharmony_ci * We currently don't do any backtracking, but we do use the merge sets as a
41bf215546Sopenharmony_ci * form of affinity to try to avoid moves from phis/splits/collects. Each
42bf215546Sopenharmony_ci * merge set is what a more "classic" graph-coloring or live-range based
43bf215546Sopenharmony_ci * allocator would consider a single register, but here we use it as merely a
44bf215546Sopenharmony_ci * hint, except when multiple overlapping values are live at the same time.
45bf215546Sopenharmony_ci * Each merge set has a "preferred" register, and we try to honor that when
46bf215546Sopenharmony_ci * allocating values in the merge set.
47bf215546Sopenharmony_ci */
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci/* ir3_reg_ctx implementation. */
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_cistatic int
52bf215546Sopenharmony_ciir3_reg_interval_cmp(const struct rb_node *node, const void *data)
53bf215546Sopenharmony_ci{
54bf215546Sopenharmony_ci   unsigned reg = *(const unsigned *)data;
55bf215546Sopenharmony_ci   const struct ir3_reg_interval *interval =
56bf215546Sopenharmony_ci      ir3_rb_node_to_interval_const(node);
57bf215546Sopenharmony_ci   if (interval->reg->interval_start > reg)
58bf215546Sopenharmony_ci      return -1;
59bf215546Sopenharmony_ci   else if (interval->reg->interval_end <= reg)
60bf215546Sopenharmony_ci      return 1;
61bf215546Sopenharmony_ci   else
62bf215546Sopenharmony_ci      return 0;
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_cistatic struct ir3_reg_interval *
66bf215546Sopenharmony_ciir3_reg_interval_search(struct rb_tree *tree, unsigned offset)
67bf215546Sopenharmony_ci{
68bf215546Sopenharmony_ci   struct rb_node *node = rb_tree_search(tree, &offset, ir3_reg_interval_cmp);
69bf215546Sopenharmony_ci   return node ? ir3_rb_node_to_interval(node) : NULL;
70bf215546Sopenharmony_ci}
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_cistatic struct ir3_reg_interval *
73bf215546Sopenharmony_ciir3_reg_interval_search_sloppy(struct rb_tree *tree, unsigned offset)
74bf215546Sopenharmony_ci{
75bf215546Sopenharmony_ci   struct rb_node *node =
76bf215546Sopenharmony_ci      rb_tree_search_sloppy(tree, &offset, ir3_reg_interval_cmp);
77bf215546Sopenharmony_ci   return node ? ir3_rb_node_to_interval(node) : NULL;
78bf215546Sopenharmony_ci}
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci/* Get the interval covering the reg, or the closest to the right if it
81bf215546Sopenharmony_ci * doesn't exist.
82bf215546Sopenharmony_ci */
83bf215546Sopenharmony_cistatic struct ir3_reg_interval *
84bf215546Sopenharmony_ciir3_reg_interval_search_right(struct rb_tree *tree, unsigned offset)
85bf215546Sopenharmony_ci{
86bf215546Sopenharmony_ci   struct ir3_reg_interval *interval =
87bf215546Sopenharmony_ci      ir3_reg_interval_search_sloppy(tree, offset);
88bf215546Sopenharmony_ci   if (!interval) {
89bf215546Sopenharmony_ci      return NULL;
90bf215546Sopenharmony_ci   } else if (interval->reg->interval_end > offset) {
91bf215546Sopenharmony_ci      return interval;
92bf215546Sopenharmony_ci   } else {
93bf215546Sopenharmony_ci      /* There is no interval covering reg, and ra_file_search_sloppy()
94bf215546Sopenharmony_ci       * returned the closest range to the left, so the next interval to the
95bf215546Sopenharmony_ci       * right should be the closest to the right.
96bf215546Sopenharmony_ci       */
97bf215546Sopenharmony_ci      return ir3_reg_interval_next_or_null(interval);
98bf215546Sopenharmony_ci   }
99bf215546Sopenharmony_ci}
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_cistatic int
102bf215546Sopenharmony_ciir3_reg_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   const struct ir3_reg_interval *a = ir3_rb_node_to_interval_const(_a);
105bf215546Sopenharmony_ci   const struct ir3_reg_interval *b = ir3_rb_node_to_interval_const(_b);
106bf215546Sopenharmony_ci   return b->reg->interval_start - a->reg->interval_start;
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_cistatic void
110bf215546Sopenharmony_ciinterval_insert(struct ir3_reg_ctx *ctx, struct rb_tree *tree,
111bf215546Sopenharmony_ci                struct ir3_reg_interval *interval)
112bf215546Sopenharmony_ci{
113bf215546Sopenharmony_ci   struct ir3_reg_interval *right =
114bf215546Sopenharmony_ci      ir3_reg_interval_search_right(tree, interval->reg->interval_start);
115bf215546Sopenharmony_ci   if (right && right->reg->interval_start < interval->reg->interval_end) {
116bf215546Sopenharmony_ci      /* We disallow trees where different members have different half-ness.
117bf215546Sopenharmony_ci       * This means that we can't treat bitcasts as copies like normal
118bf215546Sopenharmony_ci       * split/collect, so something like this would require an extra copy
119bf215546Sopenharmony_ci       * in mergedregs mode, and count as 4 half-units of register pressure
120bf215546Sopenharmony_ci       * instead of 2:
121bf215546Sopenharmony_ci       *
122bf215546Sopenharmony_ci       * f16vec2 foo = unpackFloat2x16(bar)
123bf215546Sopenharmony_ci       * ... = foo.x
124bf215546Sopenharmony_ci       * ... = bar
125bf215546Sopenharmony_ci       *
126bf215546Sopenharmony_ci       * However, relaxing this rule would open a huge can of worms. What
127bf215546Sopenharmony_ci       * happens when there's a vector of 16 things, and the fifth element
128bf215546Sopenharmony_ci       * has been bitcasted as a half-reg? Would that element alone have to
129bf215546Sopenharmony_ci       * be small enough to be used as a half-reg source? Let's keep that
130bf215546Sopenharmony_ci       * can of worms firmly shut for now.
131bf215546Sopenharmony_ci       */
132bf215546Sopenharmony_ci      assert((interval->reg->flags & IR3_REG_HALF) ==
133bf215546Sopenharmony_ci             (right->reg->flags & IR3_REG_HALF));
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci      if (right->reg->interval_end <= interval->reg->interval_end &&
136bf215546Sopenharmony_ci          right->reg->interval_start >= interval->reg->interval_start) {
137bf215546Sopenharmony_ci         /* Check if we're inserting something that's already inserted */
138bf215546Sopenharmony_ci         assert(interval != right);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci         /* "right" is contained in "interval" and must become a child of
141bf215546Sopenharmony_ci          * it. There may be further children too.
142bf215546Sopenharmony_ci          */
143bf215546Sopenharmony_ci         for (struct ir3_reg_interval *next = ir3_reg_interval_next(right);
144bf215546Sopenharmony_ci              right && right->reg->interval_start < interval->reg->interval_end;
145bf215546Sopenharmony_ci              right = next, next = ir3_reg_interval_next_or_null(next)) {
146bf215546Sopenharmony_ci            /* "right" must be contained in "interval." */
147bf215546Sopenharmony_ci            assert(right->reg->interval_end <= interval->reg->interval_end);
148bf215546Sopenharmony_ci            assert((interval->reg->flags & IR3_REG_HALF) ==
149bf215546Sopenharmony_ci                   (right->reg->flags & IR3_REG_HALF));
150bf215546Sopenharmony_ci            if (!right->parent)
151bf215546Sopenharmony_ci               ctx->interval_delete(ctx, right);
152bf215546Sopenharmony_ci            right->parent = interval;
153bf215546Sopenharmony_ci            rb_tree_remove(tree, &right->node);
154bf215546Sopenharmony_ci            rb_tree_insert(&interval->children, &right->node,
155bf215546Sopenharmony_ci                           ir3_reg_interval_insert_cmp);
156bf215546Sopenharmony_ci         }
157bf215546Sopenharmony_ci      } else {
158bf215546Sopenharmony_ci         /* "right" must contain "interval," since intervals must form a
159bf215546Sopenharmony_ci          * tree.
160bf215546Sopenharmony_ci          */
161bf215546Sopenharmony_ci         assert(right->reg->interval_start <= interval->reg->interval_start);
162bf215546Sopenharmony_ci         interval->parent = right;
163bf215546Sopenharmony_ci         interval_insert(ctx, &right->children, interval);
164bf215546Sopenharmony_ci         return;
165bf215546Sopenharmony_ci      }
166bf215546Sopenharmony_ci   }
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   if (!interval->parent)
169bf215546Sopenharmony_ci      ctx->interval_add(ctx, interval);
170bf215546Sopenharmony_ci   rb_tree_insert(tree, &interval->node, ir3_reg_interval_insert_cmp);
171bf215546Sopenharmony_ci   interval->inserted = true;
172bf215546Sopenharmony_ci}
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_civoid
175bf215546Sopenharmony_ciir3_reg_interval_insert(struct ir3_reg_ctx *ctx,
176bf215546Sopenharmony_ci                        struct ir3_reg_interval *interval)
177bf215546Sopenharmony_ci{
178bf215546Sopenharmony_ci   rb_tree_init(&interval->children);
179bf215546Sopenharmony_ci   interval->parent = NULL;
180bf215546Sopenharmony_ci   interval_insert(ctx, &ctx->intervals, interval);
181bf215546Sopenharmony_ci}
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci/* Call after ir3_reg_interval_remove_temp() to reinsert the interval */
184bf215546Sopenharmony_cistatic void
185bf215546Sopenharmony_ciir3_reg_interval_reinsert(struct ir3_reg_ctx *ctx,
186bf215546Sopenharmony_ci                          struct ir3_reg_interval *interval)
187bf215546Sopenharmony_ci{
188bf215546Sopenharmony_ci   interval->parent = NULL;
189bf215546Sopenharmony_ci   interval_insert(ctx, &ctx->intervals, interval);
190bf215546Sopenharmony_ci}
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_civoid
193bf215546Sopenharmony_ciir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
194bf215546Sopenharmony_ci                        struct ir3_reg_interval *interval)
195bf215546Sopenharmony_ci{
196bf215546Sopenharmony_ci   if (interval->parent) {
197bf215546Sopenharmony_ci      rb_tree_remove(&interval->parent->children, &interval->node);
198bf215546Sopenharmony_ci   } else {
199bf215546Sopenharmony_ci      ctx->interval_delete(ctx, interval);
200bf215546Sopenharmony_ci      rb_tree_remove(&ctx->intervals, &interval->node);
201bf215546Sopenharmony_ci   }
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   rb_tree_foreach_safe (struct ir3_reg_interval, child, &interval->children,
204bf215546Sopenharmony_ci                         node) {
205bf215546Sopenharmony_ci      rb_tree_remove(&interval->children, &child->node);
206bf215546Sopenharmony_ci      child->parent = interval->parent;
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci      if (interval->parent) {
209bf215546Sopenharmony_ci         rb_tree_insert(&child->parent->children, &child->node,
210bf215546Sopenharmony_ci                        ir3_reg_interval_insert_cmp);
211bf215546Sopenharmony_ci      } else {
212bf215546Sopenharmony_ci         ctx->interval_readd(ctx, interval, child);
213bf215546Sopenharmony_ci         rb_tree_insert(&ctx->intervals, &child->node,
214bf215546Sopenharmony_ci                        ir3_reg_interval_insert_cmp);
215bf215546Sopenharmony_ci      }
216bf215546Sopenharmony_ci   }
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci   interval->inserted = false;
219bf215546Sopenharmony_ci}
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_cistatic void
222bf215546Sopenharmony_ci_mark_free(struct ir3_reg_interval *interval)
223bf215546Sopenharmony_ci{
224bf215546Sopenharmony_ci   interval->inserted = false;
225bf215546Sopenharmony_ci   rb_tree_foreach (struct ir3_reg_interval, child, &interval->children, node) {
226bf215546Sopenharmony_ci      _mark_free(child);
227bf215546Sopenharmony_ci   }
228bf215546Sopenharmony_ci}
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci/* Remove an interval and all its children from the tree. */
231bf215546Sopenharmony_civoid
232bf215546Sopenharmony_ciir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,
233bf215546Sopenharmony_ci                            struct ir3_reg_interval *interval)
234bf215546Sopenharmony_ci{
235bf215546Sopenharmony_ci   assert(!interval->parent);
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   ctx->interval_delete(ctx, interval);
238bf215546Sopenharmony_ci   rb_tree_remove(&ctx->intervals, &interval->node);
239bf215546Sopenharmony_ci   _mark_free(interval);
240bf215546Sopenharmony_ci}
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci/* Used when popping an interval to be shuffled around. Don't disturb children
243bf215546Sopenharmony_ci * so that it can be later reinserted.
244bf215546Sopenharmony_ci */
245bf215546Sopenharmony_cistatic void
246bf215546Sopenharmony_ciir3_reg_interval_remove_temp(struct ir3_reg_ctx *ctx,
247bf215546Sopenharmony_ci                             struct ir3_reg_interval *interval)
248bf215546Sopenharmony_ci{
249bf215546Sopenharmony_ci   assert(!interval->parent);
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   ctx->interval_delete(ctx, interval);
252bf215546Sopenharmony_ci   rb_tree_remove(&ctx->intervals, &interval->node);
253bf215546Sopenharmony_ci}
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_cistatic void
256bf215546Sopenharmony_ciinterval_dump(struct log_stream *stream, struct ir3_reg_interval *interval,
257bf215546Sopenharmony_ci              unsigned indent)
258bf215546Sopenharmony_ci{
259bf215546Sopenharmony_ci   for (unsigned i = 0; i < indent; i++)
260bf215546Sopenharmony_ci      mesa_log_stream_printf(stream, "\t");
261bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "reg %u start %u\n", interval->reg->name,
262bf215546Sopenharmony_ci                          interval->reg->interval_start);
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci   rb_tree_foreach (struct ir3_reg_interval, child, &interval->children, node) {
265bf215546Sopenharmony_ci      interval_dump(stream, child, indent + 1);
266bf215546Sopenharmony_ci   }
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci   for (unsigned i = 0; i < indent; i++)
269bf215546Sopenharmony_ci      mesa_log_stream_printf(stream, "\t");
270bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "reg %u end %u\n", interval->reg->name,
271bf215546Sopenharmony_ci                          interval->reg->interval_end);
272bf215546Sopenharmony_ci}
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_civoid
275bf215546Sopenharmony_ciir3_reg_interval_dump(struct log_stream *stream, struct ir3_reg_interval *interval)
276bf215546Sopenharmony_ci{
277bf215546Sopenharmony_ci   interval_dump(stream, interval, 0);
278bf215546Sopenharmony_ci}
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci/* These are the core datastructures used by the register allocator. First
281bf215546Sopenharmony_ci * ra_interval and ra_file, which are used for intra-block tracking and use
282bf215546Sopenharmony_ci * the ir3_reg_ctx infrastructure:
283bf215546Sopenharmony_ci */
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_cistruct ra_interval {
286bf215546Sopenharmony_ci   struct ir3_reg_interval interval;
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   struct rb_node physreg_node;
289bf215546Sopenharmony_ci   physreg_t physreg_start, physreg_end;
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci   /* True if this is a source of the current instruction which is entirely
292bf215546Sopenharmony_ci    * killed. This means we can allocate the dest over it, but we can't break
293bf215546Sopenharmony_ci    * it up.
294bf215546Sopenharmony_ci    */
295bf215546Sopenharmony_ci   bool is_killed;
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   /* True if this interval cannot be moved from its position. This is only
298bf215546Sopenharmony_ci    * used for precolored inputs to ensure that other inputs don't get
299bf215546Sopenharmony_ci    * allocated on top of them.
300bf215546Sopenharmony_ci    */
301bf215546Sopenharmony_ci   bool frozen;
302bf215546Sopenharmony_ci};
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_cistruct ra_file {
305bf215546Sopenharmony_ci   struct ir3_reg_ctx reg_ctx;
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   BITSET_DECLARE(available, RA_MAX_FILE_SIZE);
308bf215546Sopenharmony_ci   BITSET_DECLARE(available_to_evict, RA_MAX_FILE_SIZE);
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   struct rb_tree physreg_intervals;
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   unsigned size;
313bf215546Sopenharmony_ci   unsigned start;
314bf215546Sopenharmony_ci};
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci/* State for inter-block tracking. When we split a live range to make space
317bf215546Sopenharmony_ci * for a vector, we may need to insert fixup code when a block has multiple
318bf215546Sopenharmony_ci * predecessors that have moved the same live value to different registers.
319bf215546Sopenharmony_ci * This keeps track of state required to do that.
320bf215546Sopenharmony_ci */
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_cistruct ra_block_state {
323bf215546Sopenharmony_ci   /* Map of defining ir3_register -> physreg it was allocated to at the end
324bf215546Sopenharmony_ci    * of the block.
325bf215546Sopenharmony_ci    */
326bf215546Sopenharmony_ci   struct hash_table *renames;
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   /* For loops, we need to process a block before all its predecessors have
329bf215546Sopenharmony_ci    * been processed. In particular, we need to pick registers for values
330bf215546Sopenharmony_ci    * without knowing if all the predecessors have been renamed. This keeps
331bf215546Sopenharmony_ci    * track of the registers we chose so that when we visit the back-edge we
332bf215546Sopenharmony_ci    * can move them appropriately. If all predecessors have been visited
333bf215546Sopenharmony_ci    * before this block is visited then we don't need to fill this out. This
334bf215546Sopenharmony_ci    * is a map from ir3_register -> physreg.
335bf215546Sopenharmony_ci    */
336bf215546Sopenharmony_ci   struct hash_table *entry_regs;
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci   /* True if the block has been visited and "renames" is complete.
339bf215546Sopenharmony_ci    */
340bf215546Sopenharmony_ci   bool visited;
341bf215546Sopenharmony_ci};
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_cistruct ra_parallel_copy {
344bf215546Sopenharmony_ci   struct ra_interval *interval;
345bf215546Sopenharmony_ci   physreg_t src;
346bf215546Sopenharmony_ci};
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci/* The main context: */
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_cistruct ra_ctx {
351bf215546Sopenharmony_ci   /* r0.x - r47.w. On a6xx with merged-regs, hr0.x-hr47.w go into the bottom
352bf215546Sopenharmony_ci    * half of this file too.
353bf215546Sopenharmony_ci    */
354bf215546Sopenharmony_ci   struct ra_file full;
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   /* hr0.x - hr63.w, only used without merged-regs. */
357bf215546Sopenharmony_ci   struct ra_file half;
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   /* Shared regs. */
360bf215546Sopenharmony_ci   struct ra_file shared;
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   struct ir3_liveness *live;
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   struct ir3_block *block;
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci   const struct ir3_compiler *compiler;
367bf215546Sopenharmony_ci   gl_shader_stage stage;
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci   /* Pending moves of top-level intervals that will be emitted once we're
370bf215546Sopenharmony_ci    * finished:
371bf215546Sopenharmony_ci    */
372bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ra_parallel_copy, parallel_copies);
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci   struct ra_interval *intervals;
375bf215546Sopenharmony_ci   struct ra_block_state *blocks;
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci   bool merged_regs;
378bf215546Sopenharmony_ci};
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci#define foreach_interval(interval, file)                                       \
381bf215546Sopenharmony_ci   rb_tree_foreach (struct ra_interval, interval, &(file)->physreg_intervals,  \
382bf215546Sopenharmony_ci                    physreg_node)
383bf215546Sopenharmony_ci#define foreach_interval_rev(interval, file)                                   \
384bf215546Sopenharmony_ci   rb_tree_foreach (struct ra_interval, interval, &(file)->physreg_intervals,  \
385bf215546Sopenharmony_ci                    physreg_node)
386bf215546Sopenharmony_ci#define foreach_interval_safe(interval, file)                                  \
387bf215546Sopenharmony_ci   rb_tree_foreach_safe (struct ra_interval, interval,                         \
388bf215546Sopenharmony_ci                         &(file)->physreg_intervals, physreg_node)
389bf215546Sopenharmony_ci#define foreach_interval_rev_safe(interval, file)                              \
390bf215546Sopenharmony_ci   rb_tree_foreach_rev_safe(struct ra_interval, interval,                      \
391bf215546Sopenharmony_ci                            &(file)->physreg_intervals, physreg_node)
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_cistatic struct ra_interval *
394bf215546Sopenharmony_cirb_node_to_interval(struct rb_node *node)
395bf215546Sopenharmony_ci{
396bf215546Sopenharmony_ci   return rb_node_data(struct ra_interval, node, physreg_node);
397bf215546Sopenharmony_ci}
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_cistatic const struct ra_interval *
400bf215546Sopenharmony_cirb_node_to_interval_const(const struct rb_node *node)
401bf215546Sopenharmony_ci{
402bf215546Sopenharmony_ci   return rb_node_data(struct ra_interval, node, physreg_node);
403bf215546Sopenharmony_ci}
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_cistatic struct ra_interval *
406bf215546Sopenharmony_cira_interval_next(struct ra_interval *interval)
407bf215546Sopenharmony_ci{
408bf215546Sopenharmony_ci   struct rb_node *next = rb_node_next(&interval->physreg_node);
409bf215546Sopenharmony_ci   return next ? rb_node_to_interval(next) : NULL;
410bf215546Sopenharmony_ci}
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_cistatic struct ra_interval *
413bf215546Sopenharmony_cira_interval_next_or_null(struct ra_interval *interval)
414bf215546Sopenharmony_ci{
415bf215546Sopenharmony_ci   return interval ? ra_interval_next(interval) : NULL;
416bf215546Sopenharmony_ci}
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_cistatic int
419bf215546Sopenharmony_cira_interval_cmp(const struct rb_node *node, const void *data)
420bf215546Sopenharmony_ci{
421bf215546Sopenharmony_ci   physreg_t reg = *(const physreg_t *)data;
422bf215546Sopenharmony_ci   const struct ra_interval *interval = rb_node_to_interval_const(node);
423bf215546Sopenharmony_ci   if (interval->physreg_start > reg)
424bf215546Sopenharmony_ci      return -1;
425bf215546Sopenharmony_ci   else if (interval->physreg_end <= reg)
426bf215546Sopenharmony_ci      return 1;
427bf215546Sopenharmony_ci   else
428bf215546Sopenharmony_ci      return 0;
429bf215546Sopenharmony_ci}
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_cistatic struct ra_interval *
432bf215546Sopenharmony_cira_interval_search_sloppy(struct rb_tree *tree, physreg_t reg)
433bf215546Sopenharmony_ci{
434bf215546Sopenharmony_ci   struct rb_node *node = rb_tree_search_sloppy(tree, &reg, ra_interval_cmp);
435bf215546Sopenharmony_ci   return node ? rb_node_to_interval(node) : NULL;
436bf215546Sopenharmony_ci}
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci/* Get the interval covering the reg, or the closest to the right if it
439bf215546Sopenharmony_ci * doesn't exist.
440bf215546Sopenharmony_ci */
441bf215546Sopenharmony_cistatic struct ra_interval *
442bf215546Sopenharmony_cira_interval_search_right(struct rb_tree *tree, physreg_t reg)
443bf215546Sopenharmony_ci{
444bf215546Sopenharmony_ci   struct ra_interval *interval = ra_interval_search_sloppy(tree, reg);
445bf215546Sopenharmony_ci   if (!interval) {
446bf215546Sopenharmony_ci      return NULL;
447bf215546Sopenharmony_ci   } else if (interval->physreg_end > reg) {
448bf215546Sopenharmony_ci      return interval;
449bf215546Sopenharmony_ci   } else {
450bf215546Sopenharmony_ci      /* There is no interval covering reg, and ra_file_search_sloppy()
451bf215546Sopenharmony_ci       * returned the closest range to the left, so the next interval to the
452bf215546Sopenharmony_ci       * right should be the closest to the right.
453bf215546Sopenharmony_ci       */
454bf215546Sopenharmony_ci      return ra_interval_next_or_null(interval);
455bf215546Sopenharmony_ci   }
456bf215546Sopenharmony_ci}
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_cistatic struct ra_interval *
459bf215546Sopenharmony_cira_file_search_right(struct ra_file *file, physreg_t reg)
460bf215546Sopenharmony_ci{
461bf215546Sopenharmony_ci   return ra_interval_search_right(&file->physreg_intervals, reg);
462bf215546Sopenharmony_ci}
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_cistatic int
465bf215546Sopenharmony_cira_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b)
466bf215546Sopenharmony_ci{
467bf215546Sopenharmony_ci   const struct ra_interval *a = rb_node_to_interval_const(_a);
468bf215546Sopenharmony_ci   const struct ra_interval *b = rb_node_to_interval_const(_b);
469bf215546Sopenharmony_ci   return b->physreg_start - a->physreg_start;
470bf215546Sopenharmony_ci}
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_cistatic struct ra_interval *
473bf215546Sopenharmony_ciir3_reg_interval_to_ra_interval(struct ir3_reg_interval *interval)
474bf215546Sopenharmony_ci{
475bf215546Sopenharmony_ci   return rb_node_data(struct ra_interval, interval, interval);
476bf215546Sopenharmony_ci}
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_cistatic struct ra_file *
479bf215546Sopenharmony_ciir3_reg_ctx_to_file(struct ir3_reg_ctx *ctx)
480bf215546Sopenharmony_ci{
481bf215546Sopenharmony_ci   return rb_node_data(struct ra_file, ctx, reg_ctx);
482bf215546Sopenharmony_ci}
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_cistatic void
485bf215546Sopenharmony_ciinterval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_interval)
486bf215546Sopenharmony_ci{
487bf215546Sopenharmony_ci   struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval);
488bf215546Sopenharmony_ci   struct ra_file *file = ir3_reg_ctx_to_file(ctx);
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci   /* We can assume in this case that physreg_start/physreg_end is already
491bf215546Sopenharmony_ci    * initialized.
492bf215546Sopenharmony_ci    */
493bf215546Sopenharmony_ci   for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
494bf215546Sopenharmony_ci      BITSET_CLEAR(file->available, i);
495bf215546Sopenharmony_ci      BITSET_CLEAR(file->available_to_evict, i);
496bf215546Sopenharmony_ci   }
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   rb_tree_insert(&file->physreg_intervals, &interval->physreg_node,
499bf215546Sopenharmony_ci                  ra_interval_insert_cmp);
500bf215546Sopenharmony_ci}
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_cistatic void
503bf215546Sopenharmony_ciinterval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_interval)
504bf215546Sopenharmony_ci{
505bf215546Sopenharmony_ci   struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval);
506bf215546Sopenharmony_ci   struct ra_file *file = ir3_reg_ctx_to_file(ctx);
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci   for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
509bf215546Sopenharmony_ci      BITSET_SET(file->available, i);
510bf215546Sopenharmony_ci      BITSET_SET(file->available_to_evict, i);
511bf215546Sopenharmony_ci   }
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci   rb_tree_remove(&file->physreg_intervals, &interval->physreg_node);
514bf215546Sopenharmony_ci}
515bf215546Sopenharmony_ci
516bf215546Sopenharmony_cistatic void
517bf215546Sopenharmony_ciinterval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_parent,
518bf215546Sopenharmony_ci               struct ir3_reg_interval *_child)
519bf215546Sopenharmony_ci{
520bf215546Sopenharmony_ci   struct ra_interval *parent = ir3_reg_interval_to_ra_interval(_parent);
521bf215546Sopenharmony_ci   struct ra_interval *child = ir3_reg_interval_to_ra_interval(_child);
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci   child->physreg_start =
524bf215546Sopenharmony_ci      parent->physreg_start + (child->interval.reg->interval_start -
525bf215546Sopenharmony_ci                               parent->interval.reg->interval_start);
526bf215546Sopenharmony_ci   child->physreg_end =
527bf215546Sopenharmony_ci      child->physreg_start +
528bf215546Sopenharmony_ci      (child->interval.reg->interval_end - child->interval.reg->interval_start);
529bf215546Sopenharmony_ci
530bf215546Sopenharmony_ci   interval_add(ctx, _child);
531bf215546Sopenharmony_ci}
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_cistatic void
534bf215546Sopenharmony_cira_file_init(struct ra_file *file)
535bf215546Sopenharmony_ci{
536bf215546Sopenharmony_ci   for (unsigned i = 0; i < file->size; i++) {
537bf215546Sopenharmony_ci      BITSET_SET(file->available, i);
538bf215546Sopenharmony_ci      BITSET_SET(file->available_to_evict, i);
539bf215546Sopenharmony_ci   }
540bf215546Sopenharmony_ci
541bf215546Sopenharmony_ci   rb_tree_init(&file->reg_ctx.intervals);
542bf215546Sopenharmony_ci   rb_tree_init(&file->physreg_intervals);
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci   file->reg_ctx.interval_add = interval_add;
545bf215546Sopenharmony_ci   file->reg_ctx.interval_delete = interval_delete;
546bf215546Sopenharmony_ci   file->reg_ctx.interval_readd = interval_readd;
547bf215546Sopenharmony_ci}
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_cistatic void
550bf215546Sopenharmony_cira_file_insert(struct ra_file *file, struct ra_interval *interval)
551bf215546Sopenharmony_ci{
552bf215546Sopenharmony_ci   assert(interval->physreg_start < interval->physreg_end);
553bf215546Sopenharmony_ci   assert(interval->physreg_end <= file->size);
554bf215546Sopenharmony_ci   if (interval->interval.reg->flags & IR3_REG_HALF)
555bf215546Sopenharmony_ci      assert(interval->physreg_end <= RA_HALF_SIZE);
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci   ir3_reg_interval_insert(&file->reg_ctx, &interval->interval);
558bf215546Sopenharmony_ci}
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_cistatic void
561bf215546Sopenharmony_cira_file_remove(struct ra_file *file, struct ra_interval *interval)
562bf215546Sopenharmony_ci{
563bf215546Sopenharmony_ci   ir3_reg_interval_remove(&file->reg_ctx, &interval->interval);
564bf215546Sopenharmony_ci}
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_cistatic void
567bf215546Sopenharmony_cira_file_mark_killed(struct ra_file *file, struct ra_interval *interval)
568bf215546Sopenharmony_ci{
569bf215546Sopenharmony_ci   assert(!interval->interval.parent);
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci   for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
572bf215546Sopenharmony_ci      BITSET_SET(file->available, i);
573bf215546Sopenharmony_ci   }
574bf215546Sopenharmony_ci
575bf215546Sopenharmony_ci   interval->is_killed = true;
576bf215546Sopenharmony_ci}
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_cistatic void
579bf215546Sopenharmony_cira_file_unmark_killed(struct ra_file *file, struct ra_interval *interval)
580bf215546Sopenharmony_ci{
581bf215546Sopenharmony_ci   assert(!interval->interval.parent);
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci   for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) {
584bf215546Sopenharmony_ci      BITSET_CLEAR(file->available, i);
585bf215546Sopenharmony_ci   }
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci   interval->is_killed = false;
588bf215546Sopenharmony_ci}
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_cistatic physreg_t
591bf215546Sopenharmony_cira_interval_get_physreg(const struct ra_interval *interval)
592bf215546Sopenharmony_ci{
593bf215546Sopenharmony_ci   unsigned child_start = interval->interval.reg->interval_start;
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci   while (interval->interval.parent) {
596bf215546Sopenharmony_ci      interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
597bf215546Sopenharmony_ci   }
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci   return interval->physreg_start +
600bf215546Sopenharmony_ci          (child_start - interval->interval.reg->interval_start);
601bf215546Sopenharmony_ci}
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_cistatic unsigned
604bf215546Sopenharmony_cira_interval_get_num(const struct ra_interval *interval)
605bf215546Sopenharmony_ci{
606bf215546Sopenharmony_ci   return ra_physreg_to_num(ra_interval_get_physreg(interval),
607bf215546Sopenharmony_ci                            interval->interval.reg->flags);
608bf215546Sopenharmony_ci}
609bf215546Sopenharmony_ci
610bf215546Sopenharmony_cistatic void
611bf215546Sopenharmony_cira_interval_init(struct ra_interval *interval, struct ir3_register *reg)
612bf215546Sopenharmony_ci{
613bf215546Sopenharmony_ci   ir3_reg_interval_init(&interval->interval, reg);
614bf215546Sopenharmony_ci   interval->is_killed = false;
615bf215546Sopenharmony_ci   interval->frozen = false;
616bf215546Sopenharmony_ci}
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_cistatic void
619bf215546Sopenharmony_cira_interval_dump(struct log_stream *stream, struct ra_interval *interval)
620bf215546Sopenharmony_ci{
621bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "physreg %u ", interval->physreg_start);
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci   ir3_reg_interval_dump(stream, &interval->interval);
624bf215546Sopenharmony_ci}
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_cistatic void
627bf215546Sopenharmony_cira_file_dump(struct log_stream *stream, struct ra_file *file)
628bf215546Sopenharmony_ci{
629bf215546Sopenharmony_ci   rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
630bf215546Sopenharmony_ci                    physreg_node) {
631bf215546Sopenharmony_ci      ra_interval_dump(stream, interval);
632bf215546Sopenharmony_ci   }
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci   unsigned start, end;
635bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "available:\n");
636bf215546Sopenharmony_ci   BITSET_FOREACH_RANGE (start, end, file->available, file->size) {
637bf215546Sopenharmony_ci      mesa_log_stream_printf(stream, "%u-%u ", start, end);
638bf215546Sopenharmony_ci   }
639bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "\n");
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "available to evict:\n");
642bf215546Sopenharmony_ci   BITSET_FOREACH_RANGE (start, end, file->available_to_evict, file->size) {
643bf215546Sopenharmony_ci      mesa_log_stream_printf(stream, "%u-%u ", start, end);
644bf215546Sopenharmony_ci   }
645bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "\n");
646bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "start: %u\n", file->start);
647bf215546Sopenharmony_ci}
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_cistatic void
650bf215546Sopenharmony_cira_ctx_dump(struct ra_ctx *ctx)
651bf215546Sopenharmony_ci{
652bf215546Sopenharmony_ci   struct log_stream *stream = mesa_log_streami();
653bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "full:\n");
654bf215546Sopenharmony_ci   ra_file_dump(stream, &ctx->full);
655bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "half:\n");
656bf215546Sopenharmony_ci   ra_file_dump(stream, &ctx->half);
657bf215546Sopenharmony_ci   mesa_log_stream_printf(stream, "shared:");
658bf215546Sopenharmony_ci   ra_file_dump(stream, &ctx->shared);
659bf215546Sopenharmony_ci   mesa_log_stream_destroy(stream);
660bf215546Sopenharmony_ci}
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_cistatic unsigned
663bf215546Sopenharmony_cireg_file_size(struct ra_file *file, struct ir3_register *reg)
664bf215546Sopenharmony_ci{
665bf215546Sopenharmony_ci   /* Half-regs can only take up the first half of the combined regfile */
666bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_HALF)
667bf215546Sopenharmony_ci      return MIN2(file->size, RA_HALF_SIZE);
668bf215546Sopenharmony_ci   else
669bf215546Sopenharmony_ci      return file->size;
670bf215546Sopenharmony_ci}
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci/* ra_pop_interval/ra_push_interval provide an API to shuffle around multiple
673bf215546Sopenharmony_ci * top-level intervals at once. Pop multiple intervals, then push them back in
674bf215546Sopenharmony_ci * any order.
675bf215546Sopenharmony_ci */
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_cistruct ra_removed_interval {
678bf215546Sopenharmony_ci   struct ra_interval *interval;
679bf215546Sopenharmony_ci   unsigned size;
680bf215546Sopenharmony_ci};
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_cistatic struct ra_removed_interval
683bf215546Sopenharmony_cira_pop_interval(struct ra_ctx *ctx, struct ra_file *file,
684bf215546Sopenharmony_ci                struct ra_interval *interval)
685bf215546Sopenharmony_ci{
686bf215546Sopenharmony_ci   assert(!interval->interval.parent);
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_ci   /* Check if we've already moved this reg before */
689bf215546Sopenharmony_ci   unsigned pcopy_index;
690bf215546Sopenharmony_ci   for (pcopy_index = 0; pcopy_index < ctx->parallel_copies_count;
691bf215546Sopenharmony_ci        pcopy_index++) {
692bf215546Sopenharmony_ci      if (ctx->parallel_copies[pcopy_index].interval == interval)
693bf215546Sopenharmony_ci         break;
694bf215546Sopenharmony_ci   }
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci   if (pcopy_index == ctx->parallel_copies_count) {
697bf215546Sopenharmony_ci      array_insert(ctx, ctx->parallel_copies,
698bf215546Sopenharmony_ci                   (struct ra_parallel_copy){
699bf215546Sopenharmony_ci                      .interval = interval,
700bf215546Sopenharmony_ci                      .src = interval->physreg_start,
701bf215546Sopenharmony_ci                   });
702bf215546Sopenharmony_ci   }
703bf215546Sopenharmony_ci
704bf215546Sopenharmony_ci   ir3_reg_interval_remove_temp(&file->reg_ctx, &interval->interval);
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci   return (struct ra_removed_interval){
707bf215546Sopenharmony_ci      .interval = interval,
708bf215546Sopenharmony_ci      .size = interval->physreg_end - interval->physreg_start,
709bf215546Sopenharmony_ci   };
710bf215546Sopenharmony_ci}
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_cistatic void
713bf215546Sopenharmony_cira_push_interval(struct ra_ctx *ctx, struct ra_file *file,
714bf215546Sopenharmony_ci                 const struct ra_removed_interval *removed, physreg_t dst)
715bf215546Sopenharmony_ci{
716bf215546Sopenharmony_ci   struct ra_interval *interval = removed->interval;
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci   interval->physreg_start = dst;
719bf215546Sopenharmony_ci   interval->physreg_end = dst + removed->size;
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   assert(interval->physreg_end <= file->size);
722bf215546Sopenharmony_ci   if (interval->interval.reg->flags & IR3_REG_HALF)
723bf215546Sopenharmony_ci      assert(interval->physreg_end <= RA_HALF_SIZE);
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci   ir3_reg_interval_reinsert(&file->reg_ctx, &interval->interval);
726bf215546Sopenharmony_ci}
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci/* Pick up the interval and place it at "dst". */
729bf215546Sopenharmony_cistatic void
730bf215546Sopenharmony_cira_move_interval(struct ra_ctx *ctx, struct ra_file *file,
731bf215546Sopenharmony_ci                 struct ra_interval *interval, physreg_t dst)
732bf215546Sopenharmony_ci{
733bf215546Sopenharmony_ci   struct ra_removed_interval temp = ra_pop_interval(ctx, file, interval);
734bf215546Sopenharmony_ci   ra_push_interval(ctx, file, &temp, dst);
735bf215546Sopenharmony_ci}
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_cistatic struct ra_file *
738bf215546Sopenharmony_cira_get_file(struct ra_ctx *ctx, struct ir3_register *reg)
739bf215546Sopenharmony_ci{
740bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_SHARED)
741bf215546Sopenharmony_ci      return &ctx->shared;
742bf215546Sopenharmony_ci   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
743bf215546Sopenharmony_ci      return &ctx->full;
744bf215546Sopenharmony_ci   else
745bf215546Sopenharmony_ci      return &ctx->half;
746bf215546Sopenharmony_ci}
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_ci/* Returns true if the proposed spot for "dst" or a killed source overlaps a
750bf215546Sopenharmony_ci * destination that's been allocated.
751bf215546Sopenharmony_ci */
752bf215546Sopenharmony_cistatic bool
753bf215546Sopenharmony_cicheck_dst_overlap(struct ra_ctx *ctx, struct ra_file *file,
754bf215546Sopenharmony_ci                  struct ir3_register *dst, physreg_t start,
755bf215546Sopenharmony_ci                  physreg_t end)
756bf215546Sopenharmony_ci{
757bf215546Sopenharmony_ci   struct ir3_instruction *instr = dst->instr;
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci   ra_foreach_dst (other_dst, instr) {
760bf215546Sopenharmony_ci      /* We assume only destinations before the current one have been allocated.
761bf215546Sopenharmony_ci       */
762bf215546Sopenharmony_ci      if (other_dst == dst)
763bf215546Sopenharmony_ci         break;
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ci      if (ra_get_file(ctx, other_dst) != file)
766bf215546Sopenharmony_ci         continue;
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci      struct ra_interval *other_interval = &ctx->intervals[other_dst->name];
769bf215546Sopenharmony_ci      assert(!other_interval->interval.parent);
770bf215546Sopenharmony_ci      physreg_t other_start = other_interval->physreg_start;
771bf215546Sopenharmony_ci      physreg_t other_end = other_interval->physreg_end;
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_ci      if (other_end > start && end > other_start)
774bf215546Sopenharmony_ci         return true;
775bf215546Sopenharmony_ci   }
776bf215546Sopenharmony_ci
777bf215546Sopenharmony_ci   return false;
778bf215546Sopenharmony_ci}
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci/* True if the destination is "early-clobber," meaning that it cannot be
781bf215546Sopenharmony_ci * allocated over killed sources. Some destinations always require it, but it
782bf215546Sopenharmony_ci * also is implicitly true for tied destinations whose source is live-through.
783bf215546Sopenharmony_ci * If the source is killed, then we skip allocating a register for the
784bf215546Sopenharmony_ci * destination altogether so we don't need to worry about that case here.
785bf215546Sopenharmony_ci */
786bf215546Sopenharmony_cistatic bool
787bf215546Sopenharmony_ciis_early_clobber(struct ir3_register *reg)
788bf215546Sopenharmony_ci{
789bf215546Sopenharmony_ci   return (reg->flags & IR3_REG_EARLY_CLOBBER) || reg->tied;
790bf215546Sopenharmony_ci}
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_cistatic bool
793bf215546Sopenharmony_ciget_reg_specified(struct ra_ctx *ctx, struct ra_file *file,
794bf215546Sopenharmony_ci                  struct ir3_register *reg, physreg_t physreg, bool is_source)
795bf215546Sopenharmony_ci{
796bf215546Sopenharmony_ci   for (unsigned i = 0; i < reg_size(reg); i++) {
797bf215546Sopenharmony_ci      if (!BITSET_TEST(is_early_clobber(reg) || is_source ?
798bf215546Sopenharmony_ci                           file->available_to_evict : file->available,
799bf215546Sopenharmony_ci                       physreg + i))
800bf215546Sopenharmony_ci         return false;
801bf215546Sopenharmony_ci   }
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci   if (!is_source &&
804bf215546Sopenharmony_ci       check_dst_overlap(ctx, file, reg, physreg, physreg + reg_size(reg)))
805bf215546Sopenharmony_ci      return false;
806bf215546Sopenharmony_ci
807bf215546Sopenharmony_ci   return true;
808bf215546Sopenharmony_ci}
809bf215546Sopenharmony_ci
810bf215546Sopenharmony_ci/* Try to evict any registers conflicting with the proposed spot "physreg" for
811bf215546Sopenharmony_ci * "reg". That is, move them to other places so that we can allocate "physreg"
812bf215546Sopenharmony_ci * here.
813bf215546Sopenharmony_ci */
814bf215546Sopenharmony_ci
815bf215546Sopenharmony_cistatic bool
816bf215546Sopenharmony_citry_evict_regs(struct ra_ctx *ctx, struct ra_file *file,
817bf215546Sopenharmony_ci               struct ir3_register *reg, physreg_t physreg,
818bf215546Sopenharmony_ci               unsigned *_eviction_count, bool is_source, bool speculative)
819bf215546Sopenharmony_ci{
820bf215546Sopenharmony_ci   BITSET_DECLARE(available_to_evict, RA_MAX_FILE_SIZE);
821bf215546Sopenharmony_ci   memcpy(available_to_evict, file->available_to_evict,
822bf215546Sopenharmony_ci          sizeof(available_to_evict));
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ci   BITSET_DECLARE(available, RA_MAX_FILE_SIZE);
825bf215546Sopenharmony_ci   memcpy(available, file->available, sizeof(available));
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci   for (unsigned i = 0; i < reg_size(reg); i++) {
828bf215546Sopenharmony_ci      BITSET_CLEAR(available_to_evict, physreg + i);
829bf215546Sopenharmony_ci      BITSET_CLEAR(available, physreg + i);
830bf215546Sopenharmony_ci   }
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci   unsigned eviction_count = 0;
833bf215546Sopenharmony_ci   /* Iterate over each range conflicting with physreg */
834bf215546Sopenharmony_ci   for (struct ra_interval *conflicting = ra_file_search_right(file, physreg),
835bf215546Sopenharmony_ci                           *next = ra_interval_next_or_null(conflicting);
836bf215546Sopenharmony_ci        conflicting != NULL &&
837bf215546Sopenharmony_ci        conflicting->physreg_start < physreg + reg_size(reg);
838bf215546Sopenharmony_ci        conflicting = next, next = ra_interval_next_or_null(next)) {
839bf215546Sopenharmony_ci      if (!is_early_clobber(reg) && !is_source && conflicting->is_killed)
840bf215546Sopenharmony_ci         continue;
841bf215546Sopenharmony_ci
842bf215546Sopenharmony_ci      if (conflicting->frozen) {
843bf215546Sopenharmony_ci         assert(speculative);
844bf215546Sopenharmony_ci         return false;
845bf215546Sopenharmony_ci      }
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci      unsigned conflicting_file_size =
848bf215546Sopenharmony_ci         reg_file_size(file, conflicting->interval.reg);
849bf215546Sopenharmony_ci      unsigned avail_start, avail_end;
850bf215546Sopenharmony_ci      bool evicted = false;
851bf215546Sopenharmony_ci      BITSET_FOREACH_RANGE (avail_start, avail_end, available_to_evict,
852bf215546Sopenharmony_ci                            conflicting_file_size) {
853bf215546Sopenharmony_ci         unsigned size = avail_end - avail_start;
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci         /* non-half registers must be aligned */
856bf215546Sopenharmony_ci         if (!(conflicting->interval.reg->flags & IR3_REG_HALF) &&
857bf215546Sopenharmony_ci             avail_start % 2 == 1) {
858bf215546Sopenharmony_ci            avail_start++;
859bf215546Sopenharmony_ci            size--;
860bf215546Sopenharmony_ci         }
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci         unsigned conflicting_size =
863bf215546Sopenharmony_ci            conflicting->physreg_end - conflicting->physreg_start;
864bf215546Sopenharmony_ci         if (size >= conflicting_size &&
865bf215546Sopenharmony_ci             !check_dst_overlap(ctx, file, reg, avail_start, avail_start +
866bf215546Sopenharmony_ci                                conflicting_size)) {
867bf215546Sopenharmony_ci            for (unsigned i = 0;
868bf215546Sopenharmony_ci                 i < conflicting->physreg_end - conflicting->physreg_start; i++)
869bf215546Sopenharmony_ci               BITSET_CLEAR(available_to_evict, avail_start + i);
870bf215546Sopenharmony_ci            eviction_count +=
871bf215546Sopenharmony_ci               conflicting->physreg_end - conflicting->physreg_start;
872bf215546Sopenharmony_ci            if (!speculative)
873bf215546Sopenharmony_ci               ra_move_interval(ctx, file, conflicting, avail_start);
874bf215546Sopenharmony_ci            evicted = true;
875bf215546Sopenharmony_ci            break;
876bf215546Sopenharmony_ci         }
877bf215546Sopenharmony_ci      }
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_ci      if (evicted)
880bf215546Sopenharmony_ci         continue;
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci      /* If we couldn't evict this range, we may be able to swap it with a
883bf215546Sopenharmony_ci       * killed range to acheive the same effect.
884bf215546Sopenharmony_ci       */
885bf215546Sopenharmony_ci      foreach_interval (killed, file) {
886bf215546Sopenharmony_ci         if (!killed->is_killed)
887bf215546Sopenharmony_ci            continue;
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci         if (killed->physreg_end - killed->physreg_start !=
890bf215546Sopenharmony_ci             conflicting->physreg_end - conflicting->physreg_start)
891bf215546Sopenharmony_ci            continue;
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_ci         if (killed->physreg_end > conflicting_file_size ||
894bf215546Sopenharmony_ci             conflicting->physreg_end > reg_file_size(file, killed->interval.reg))
895bf215546Sopenharmony_ci            continue;
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci         /* We can't swap the killed range if it partially/fully overlaps the
898bf215546Sopenharmony_ci          * space we're trying to allocate or (in speculative mode) if it's
899bf215546Sopenharmony_ci          * already been swapped and will overlap when we actually evict.
900bf215546Sopenharmony_ci          */
901bf215546Sopenharmony_ci         bool killed_available = true;
902bf215546Sopenharmony_ci         for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) {
903bf215546Sopenharmony_ci            if (!BITSET_TEST(available, i)) {
904bf215546Sopenharmony_ci               killed_available = false;
905bf215546Sopenharmony_ci               break;
906bf215546Sopenharmony_ci            }
907bf215546Sopenharmony_ci         }
908bf215546Sopenharmony_ci
909bf215546Sopenharmony_ci         if (!killed_available)
910bf215546Sopenharmony_ci            continue;
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci         if (check_dst_overlap(ctx, file, reg, killed->physreg_start,
913bf215546Sopenharmony_ci                               killed->physreg_end))
914bf215546Sopenharmony_ci            continue;
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci         /* Check for alignment if one is a full reg */
917bf215546Sopenharmony_ci         if ((!(killed->interval.reg->flags & IR3_REG_HALF) ||
918bf215546Sopenharmony_ci              !(conflicting->interval.reg->flags & IR3_REG_HALF)) &&
919bf215546Sopenharmony_ci             (killed->physreg_start % 2 != 0 ||
920bf215546Sopenharmony_ci              conflicting->physreg_start % 2 != 0))
921bf215546Sopenharmony_ci            continue;
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci         for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) {
924bf215546Sopenharmony_ci            BITSET_CLEAR(available, i);
925bf215546Sopenharmony_ci         }
926bf215546Sopenharmony_ci         /* Because this will generate swaps instead of moves, multiply the
927bf215546Sopenharmony_ci          * cost by 2.
928bf215546Sopenharmony_ci          */
929bf215546Sopenharmony_ci         eviction_count += (killed->physreg_end - killed->physreg_start) * 2;
930bf215546Sopenharmony_ci         if (!speculative) {
931bf215546Sopenharmony_ci            physreg_t killed_start = killed->physreg_start,
932bf215546Sopenharmony_ci                      conflicting_start = conflicting->physreg_start;
933bf215546Sopenharmony_ci            struct ra_removed_interval killed_removed =
934bf215546Sopenharmony_ci               ra_pop_interval(ctx, file, killed);
935bf215546Sopenharmony_ci            struct ra_removed_interval conflicting_removed =
936bf215546Sopenharmony_ci               ra_pop_interval(ctx, file, conflicting);
937bf215546Sopenharmony_ci            ra_push_interval(ctx, file, &killed_removed, conflicting_start);
938bf215546Sopenharmony_ci            ra_push_interval(ctx, file, &conflicting_removed, killed_start);
939bf215546Sopenharmony_ci         }
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci         evicted = true;
942bf215546Sopenharmony_ci         break;
943bf215546Sopenharmony_ci      }
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci      if (!evicted)
946bf215546Sopenharmony_ci         return false;
947bf215546Sopenharmony_ci   }
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci   *_eviction_count = eviction_count;
950bf215546Sopenharmony_ci   return true;
951bf215546Sopenharmony_ci}
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_cistatic int
954bf215546Sopenharmony_ciremoved_interval_cmp(const void *_i1, const void *_i2)
955bf215546Sopenharmony_ci{
956bf215546Sopenharmony_ci   const struct ra_removed_interval *i1 = _i1;
957bf215546Sopenharmony_ci   const struct ra_removed_interval *i2 = _i2;
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_ci   /* We sort the registers as follows:
960bf215546Sopenharmony_ci    *
961bf215546Sopenharmony_ci    * |------------------------------------------------------------------------------------------|
962bf215546Sopenharmony_ci    * |               |                    |        |        |                    |              |
963bf215546Sopenharmony_ci    * |  Half         | Half early-clobber | Half   | Full   | Full early-clobber | Full         |
964bf215546Sopenharmony_ci    * |  live-through | destination        | killed | killed | destination        | live-through |
965bf215546Sopenharmony_ci    * |               |                    |        |        |                    |              |
966bf215546Sopenharmony_ci    * |------------------------------------------------------------------------------------------|
967bf215546Sopenharmony_ci    *                                      |                 |
968bf215546Sopenharmony_ci    *                                      |   Destination   |
969bf215546Sopenharmony_ci    *                                      |                 |
970bf215546Sopenharmony_ci    *                                      |-----------------|
971bf215546Sopenharmony_ci    *
972bf215546Sopenharmony_ci    * Half-registers have to be first so that they stay in the low half of
973bf215546Sopenharmony_ci    * the register file. Then half and full killed must stay together so that
974bf215546Sopenharmony_ci    * there's a contiguous range where we can put the register. With this
975bf215546Sopenharmony_ci    * structure we should be able to accomodate any collection of intervals
976bf215546Sopenharmony_ci    * such that the total number of half components is within the half limit
977bf215546Sopenharmony_ci    * and the combined components are within the full limit.
978bf215546Sopenharmony_ci    */
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_ci   unsigned i1_align = reg_elem_size(i1->interval->interval.reg);
981bf215546Sopenharmony_ci   unsigned i2_align = reg_elem_size(i2->interval->interval.reg);
982bf215546Sopenharmony_ci   if (i1_align > i2_align)
983bf215546Sopenharmony_ci      return 1;
984bf215546Sopenharmony_ci   if (i1_align < i2_align)
985bf215546Sopenharmony_ci      return -1;
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_ci   if (i1_align == 1) {
988bf215546Sopenharmony_ci      if (i2->interval->is_killed)
989bf215546Sopenharmony_ci         return -1;
990bf215546Sopenharmony_ci      if (i1->interval->is_killed)
991bf215546Sopenharmony_ci         return 1;
992bf215546Sopenharmony_ci   } else {
993bf215546Sopenharmony_ci      if (i2->interval->is_killed)
994bf215546Sopenharmony_ci         return 1;
995bf215546Sopenharmony_ci      if (i1->interval->is_killed)
996bf215546Sopenharmony_ci         return -1;
997bf215546Sopenharmony_ci   }
998bf215546Sopenharmony_ci
999bf215546Sopenharmony_ci   return 0;
1000bf215546Sopenharmony_ci}
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_cistatic int
1003bf215546Sopenharmony_cidsts_cmp(const void *_i1, const void *_i2)
1004bf215546Sopenharmony_ci{
1005bf215546Sopenharmony_ci   struct ir3_register *i1 = *(struct ir3_register *const *) _i1;
1006bf215546Sopenharmony_ci   struct ir3_register *i2 = *(struct ir3_register *const *) _i2;
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_ci   /* Treat tied destinations as-if they are live-through sources, and normal
1009bf215546Sopenharmony_ci    * destinations as killed sources.
1010bf215546Sopenharmony_ci    */
1011bf215546Sopenharmony_ci   unsigned i1_align = reg_elem_size(i1);
1012bf215546Sopenharmony_ci   unsigned i2_align = reg_elem_size(i2);
1013bf215546Sopenharmony_ci   if (i1_align > i2_align)
1014bf215546Sopenharmony_ci      return 1;
1015bf215546Sopenharmony_ci   if (i1_align < i2_align)
1016bf215546Sopenharmony_ci      return -1;
1017bf215546Sopenharmony_ci
1018bf215546Sopenharmony_ci   if (i1_align == 1) {
1019bf215546Sopenharmony_ci      if (!is_early_clobber(i2))
1020bf215546Sopenharmony_ci         return -1;
1021bf215546Sopenharmony_ci      if (!is_early_clobber(i1))
1022bf215546Sopenharmony_ci         return 1;
1023bf215546Sopenharmony_ci   } else {
1024bf215546Sopenharmony_ci      if (!is_early_clobber(i2))
1025bf215546Sopenharmony_ci         return 1;
1026bf215546Sopenharmony_ci      if (!is_early_clobber(i1))
1027bf215546Sopenharmony_ci         return -1;
1028bf215546Sopenharmony_ci   }
1029bf215546Sopenharmony_ci
1030bf215546Sopenharmony_ci   return 0;
1031bf215546Sopenharmony_ci}
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci/* "Compress" all the live intervals so that there is enough space for the
1034bf215546Sopenharmony_ci * destination register. As there can be gaps when a more-aligned interval
1035bf215546Sopenharmony_ci * follows a less-aligned interval, this also sorts them to remove such
1036bf215546Sopenharmony_ci * "padding", which may be required when space is very tight.  This isn't
1037bf215546Sopenharmony_ci * amazing, but should be used only as a last resort in case the register file
1038bf215546Sopenharmony_ci * is almost full and badly fragmented.
1039bf215546Sopenharmony_ci *
1040bf215546Sopenharmony_ci * Return the physreg to use.
1041bf215546Sopenharmony_ci */
1042bf215546Sopenharmony_cistatic physreg_t
1043bf215546Sopenharmony_cicompress_regs_left(struct ra_ctx *ctx, struct ra_file *file,
1044bf215546Sopenharmony_ci                   struct ir3_register *reg)
1045bf215546Sopenharmony_ci{
1046bf215546Sopenharmony_ci   unsigned align = reg_elem_size(reg);
1047bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ra_removed_interval, intervals);
1048bf215546Sopenharmony_ci   intervals_count = intervals_sz = 0;
1049bf215546Sopenharmony_ci   intervals = NULL;
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_register *, dsts);
1052bf215546Sopenharmony_ci   dsts_count = dsts_sz = 0;
1053bf215546Sopenharmony_ci   dsts = NULL;
1054bf215546Sopenharmony_ci   array_insert(ctx, dsts, reg);
1055bf215546Sopenharmony_ci   bool dst_inserted[reg->instr->dsts_count];
1056bf215546Sopenharmony_ci
1057bf215546Sopenharmony_ci   unsigned dst_size = reg->tied ? 0 : reg_size(reg);
1058bf215546Sopenharmony_ci   unsigned ec_dst_size = is_early_clobber(reg) ? reg_size(reg) : 0;
1059bf215546Sopenharmony_ci   unsigned half_dst_size = 0, ec_half_dst_size = 0;
1060bf215546Sopenharmony_ci   if (align == 1) {
1061bf215546Sopenharmony_ci      half_dst_size = dst_size;
1062bf215546Sopenharmony_ci      ec_half_dst_size = ec_dst_size;
1063bf215546Sopenharmony_ci   }
1064bf215546Sopenharmony_ci
1065bf215546Sopenharmony_ci   unsigned removed_size = 0, removed_half_size = 0;
1066bf215546Sopenharmony_ci   unsigned removed_killed_size = 0, removed_killed_half_size = 0;
1067bf215546Sopenharmony_ci   unsigned file_size =
1068bf215546Sopenharmony_ci      align == 1 ? MIN2(file->size, RA_HALF_SIZE) : file->size;
1069bf215546Sopenharmony_ci   physreg_t start_reg = 0;
1070bf215546Sopenharmony_ci
1071bf215546Sopenharmony_ci   foreach_interval_rev_safe (interval, file) {
1072bf215546Sopenharmony_ci      /* We'll check if we can compact the intervals starting here. */
1073bf215546Sopenharmony_ci      physreg_t candidate_start = interval->physreg_end;
1074bf215546Sopenharmony_ci
1075bf215546Sopenharmony_ci      /* Check if there are any other destinations we need to compact. */
1076bf215546Sopenharmony_ci      ra_foreach_dst_n (other_dst, n, reg->instr) {
1077bf215546Sopenharmony_ci         if (other_dst == reg)
1078bf215546Sopenharmony_ci            break;
1079bf215546Sopenharmony_ci         if (ra_get_file(ctx, other_dst) != file)
1080bf215546Sopenharmony_ci            continue;
1081bf215546Sopenharmony_ci         if (dst_inserted[n])
1082bf215546Sopenharmony_ci            continue;
1083bf215546Sopenharmony_ci
1084bf215546Sopenharmony_ci         struct ra_interval *other_interval = &ctx->intervals[other_dst->name];
1085bf215546Sopenharmony_ci         /* if the destination partially overlaps this interval, we need to
1086bf215546Sopenharmony_ci          * extend candidate_start to the end.
1087bf215546Sopenharmony_ci          */
1088bf215546Sopenharmony_ci         if (other_interval->physreg_start < candidate_start) {
1089bf215546Sopenharmony_ci            candidate_start = MAX2(candidate_start,
1090bf215546Sopenharmony_ci                                   other_interval->physreg_end);
1091bf215546Sopenharmony_ci            continue;
1092bf215546Sopenharmony_ci         }
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci         dst_inserted[n] = true;
1095bf215546Sopenharmony_ci
1096bf215546Sopenharmony_ci         /* dst intervals with a tied killed source are considered attached to
1097bf215546Sopenharmony_ci          * that source. Don't actually insert them. This means we have to
1098bf215546Sopenharmony_ci          * update them below if their tied source moves.
1099bf215546Sopenharmony_ci          */
1100bf215546Sopenharmony_ci         if (other_dst->tied) {
1101bf215546Sopenharmony_ci            struct ra_interval *tied_interval =
1102bf215546Sopenharmony_ci               &ctx->intervals[other_dst->tied->def->name];
1103bf215546Sopenharmony_ci            if (tied_interval->is_killed)
1104bf215546Sopenharmony_ci               continue;
1105bf215546Sopenharmony_ci         }
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci         d("popping destination %u physreg %u\n",
1108bf215546Sopenharmony_ci           other_interval->interval.reg->name,
1109bf215546Sopenharmony_ci           other_interval->physreg_start);
1110bf215546Sopenharmony_ci
1111bf215546Sopenharmony_ci         array_insert(ctx, dsts, other_dst);
1112bf215546Sopenharmony_ci         unsigned interval_size = reg_size(other_dst);
1113bf215546Sopenharmony_ci         if (is_early_clobber(other_dst)) {
1114bf215546Sopenharmony_ci            ec_dst_size += interval_size;
1115bf215546Sopenharmony_ci            if (other_interval->interval.reg->flags & IR3_REG_HALF)
1116bf215546Sopenharmony_ci               ec_half_dst_size += interval_size;
1117bf215546Sopenharmony_ci         } else {
1118bf215546Sopenharmony_ci            dst_size += interval_size;
1119bf215546Sopenharmony_ci            if (other_interval->interval.reg->flags & IR3_REG_HALF)
1120bf215546Sopenharmony_ci               half_dst_size += interval_size;
1121bf215546Sopenharmony_ci         }
1122bf215546Sopenharmony_ci      }
1123bf215546Sopenharmony_ci
1124bf215546Sopenharmony_ci      /* Check if we can sort the intervals *after* this one and have enough
1125bf215546Sopenharmony_ci       * space leftover to accomodate all intervals, keeping in mind that killed
1126bf215546Sopenharmony_ci       * sources overlap non-tied destinations. Also check that we have enough
1127bf215546Sopenharmony_ci       * space leftover for half-registers, if we're inserting a half-register
1128bf215546Sopenharmony_ci       * (otherwise we only shift any half-registers down so they should be
1129bf215546Sopenharmony_ci       * safe).
1130bf215546Sopenharmony_ci       */
1131bf215546Sopenharmony_ci      if (candidate_start + removed_size + ec_dst_size +
1132bf215546Sopenharmony_ci          MAX2(removed_killed_size, dst_size) <= file->size &&
1133bf215546Sopenharmony_ci          (align != 1 ||
1134bf215546Sopenharmony_ci           candidate_start + removed_half_size + ec_half_dst_size +
1135bf215546Sopenharmony_ci           MAX2(removed_killed_half_size, half_dst_size) <= file_size)) {
1136bf215546Sopenharmony_ci         start_reg = candidate_start;
1137bf215546Sopenharmony_ci         break;
1138bf215546Sopenharmony_ci      }
1139bf215546Sopenharmony_ci
1140bf215546Sopenharmony_ci      /* We assume that all frozen intervals are at the start and that we
1141bf215546Sopenharmony_ci       * can avoid popping them.
1142bf215546Sopenharmony_ci       */
1143bf215546Sopenharmony_ci      assert(!interval->frozen);
1144bf215546Sopenharmony_ci
1145bf215546Sopenharmony_ci      /* Killed sources are different because they go at the end and can
1146bf215546Sopenharmony_ci       * overlap the register we're trying to add.
1147bf215546Sopenharmony_ci       */
1148bf215546Sopenharmony_ci      unsigned interval_size = interval->physreg_end - interval->physreg_start;
1149bf215546Sopenharmony_ci      if (interval->is_killed) {
1150bf215546Sopenharmony_ci         removed_killed_size += interval_size;
1151bf215546Sopenharmony_ci         if (interval->interval.reg->flags & IR3_REG_HALF)
1152bf215546Sopenharmony_ci            removed_killed_half_size += interval_size;
1153bf215546Sopenharmony_ci      } else {
1154bf215546Sopenharmony_ci         removed_size += interval_size;
1155bf215546Sopenharmony_ci         if (interval->interval.reg->flags & IR3_REG_HALF)
1156bf215546Sopenharmony_ci            removed_half_size += interval_size;
1157bf215546Sopenharmony_ci      }
1158bf215546Sopenharmony_ci
1159bf215546Sopenharmony_ci      /* Now that we've done the accounting, pop this off */
1160bf215546Sopenharmony_ci      d("popping interval %u physreg %u%s\n", interval->interval.reg->name,
1161bf215546Sopenharmony_ci        interval->physreg_start, interval->is_killed ? ", killed" : "");
1162bf215546Sopenharmony_ci      array_insert(ctx, intervals, ra_pop_interval(ctx, file, interval));
1163bf215546Sopenharmony_ci   }
1164bf215546Sopenharmony_ci
1165bf215546Sopenharmony_ci   /* TODO: In addition to skipping registers at the beginning that are
1166bf215546Sopenharmony_ci    * well-packed, we should try to skip registers at the end.
1167bf215546Sopenharmony_ci    */
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ci   qsort(intervals, intervals_count, sizeof(*intervals), removed_interval_cmp);
1170bf215546Sopenharmony_ci   qsort(dsts, dsts_count, sizeof(*dsts), dsts_cmp);
1171bf215546Sopenharmony_ci
1172bf215546Sopenharmony_ci   physreg_t live_reg = start_reg;
1173bf215546Sopenharmony_ci   physreg_t dst_reg = (physreg_t)~0;
1174bf215546Sopenharmony_ci   physreg_t ret_reg = (physreg_t)~0;
1175bf215546Sopenharmony_ci   unsigned dst_index = 0;
1176bf215546Sopenharmony_ci   unsigned live_index = 0;
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_ci   /* We have two lists of intervals to process, live intervals and destination
1179bf215546Sopenharmony_ci    * intervals. Process them in the order of the disgram in insert_cmp().
1180bf215546Sopenharmony_ci    */
1181bf215546Sopenharmony_ci   while (live_index < intervals_count || dst_index < dsts_count) {
1182bf215546Sopenharmony_ci      bool process_dst;
1183bf215546Sopenharmony_ci      if (live_index == intervals_count) {
1184bf215546Sopenharmony_ci         process_dst = true;
1185bf215546Sopenharmony_ci      } else if (dst_index == dsts_count) {
1186bf215546Sopenharmony_ci         process_dst = false;
1187bf215546Sopenharmony_ci      } else {
1188bf215546Sopenharmony_ci         struct ir3_register *dst = dsts[dst_index];
1189bf215546Sopenharmony_ci         struct ra_interval *live_interval = intervals[live_index].interval;
1190bf215546Sopenharmony_ci
1191bf215546Sopenharmony_ci         bool live_half = live_interval->interval.reg->flags & IR3_REG_HALF;
1192bf215546Sopenharmony_ci         bool live_killed = live_interval->is_killed;
1193bf215546Sopenharmony_ci         bool dst_half = dst->flags & IR3_REG_HALF;
1194bf215546Sopenharmony_ci         bool dst_early_clobber = is_early_clobber(dst);
1195bf215546Sopenharmony_ci
1196bf215546Sopenharmony_ci         if (live_half && !live_killed) {
1197bf215546Sopenharmony_ci            /* far-left of diagram. */
1198bf215546Sopenharmony_ci            process_dst = false;
1199bf215546Sopenharmony_ci         } else if (dst_half && dst_early_clobber) {
1200bf215546Sopenharmony_ci            /* mid-left of diagram. */
1201bf215546Sopenharmony_ci            process_dst = true;
1202bf215546Sopenharmony_ci         } else if (!dst_early_clobber) {
1203bf215546Sopenharmony_ci            /* bottom of disagram. */
1204bf215546Sopenharmony_ci            process_dst = true;
1205bf215546Sopenharmony_ci         } else if (live_killed) {
1206bf215546Sopenharmony_ci            /* middle of diagram. */
1207bf215546Sopenharmony_ci            process_dst = false;
1208bf215546Sopenharmony_ci         } else if (!dst_half && dst_early_clobber) {
1209bf215546Sopenharmony_ci            /* mid-right of diagram. */
1210bf215546Sopenharmony_ci            process_dst = true;
1211bf215546Sopenharmony_ci         } else {
1212bf215546Sopenharmony_ci            /* far right of diagram. */
1213bf215546Sopenharmony_ci            assert(!live_killed && !live_half);
1214bf215546Sopenharmony_ci            process_dst = false;
1215bf215546Sopenharmony_ci         }
1216bf215546Sopenharmony_ci      }
1217bf215546Sopenharmony_ci
1218bf215546Sopenharmony_ci      struct ir3_register *cur_reg =
1219bf215546Sopenharmony_ci         process_dst ? dsts[dst_index] :
1220bf215546Sopenharmony_ci         intervals[live_index].interval->interval.reg;
1221bf215546Sopenharmony_ci
1222bf215546Sopenharmony_ci      physreg_t physreg;
1223bf215546Sopenharmony_ci      if (process_dst && !is_early_clobber(cur_reg)) {
1224bf215546Sopenharmony_ci         if (dst_reg == (physreg_t)~0)
1225bf215546Sopenharmony_ci            dst_reg = live_reg;
1226bf215546Sopenharmony_ci         physreg = dst_reg;
1227bf215546Sopenharmony_ci      } else {
1228bf215546Sopenharmony_ci         physreg = live_reg;
1229bf215546Sopenharmony_ci         struct ra_interval *live_interval = intervals[live_index].interval;
1230bf215546Sopenharmony_ci         bool live_killed = live_interval->is_killed;
1231bf215546Sopenharmony_ci         /* If this is live-through and we've processed the destinations, we
1232bf215546Sopenharmony_ci          * need to make sure we take into account any overlapping destinations.
1233bf215546Sopenharmony_ci          */
1234bf215546Sopenharmony_ci         if (!live_killed && dst_reg != (physreg_t)~0)
1235bf215546Sopenharmony_ci            physreg = MAX2(physreg, dst_reg);
1236bf215546Sopenharmony_ci      }
1237bf215546Sopenharmony_ci
1238bf215546Sopenharmony_ci      if (!(cur_reg->flags & IR3_REG_HALF))
1239bf215546Sopenharmony_ci         physreg = ALIGN(physreg, 2);
1240bf215546Sopenharmony_ci
1241bf215546Sopenharmony_ci      d("pushing reg %u physreg %u\n", cur_reg->name, physreg);
1242bf215546Sopenharmony_ci
1243bf215546Sopenharmony_ci      unsigned interval_size = reg_size(cur_reg);
1244bf215546Sopenharmony_ci      if (physreg + interval_size >
1245bf215546Sopenharmony_ci          reg_file_size(file, cur_reg)) {
1246bf215546Sopenharmony_ci         d("ran out of room for interval %u!\n",
1247bf215546Sopenharmony_ci           cur_reg->name);
1248bf215546Sopenharmony_ci         unreachable("reg pressure calculation was wrong!");
1249bf215546Sopenharmony_ci         return 0;
1250bf215546Sopenharmony_ci      }
1251bf215546Sopenharmony_ci
1252bf215546Sopenharmony_ci      if (process_dst) {
1253bf215546Sopenharmony_ci         if (cur_reg == reg) {
1254bf215546Sopenharmony_ci            ret_reg = physreg;
1255bf215546Sopenharmony_ci         } else {
1256bf215546Sopenharmony_ci            struct ra_interval *interval = &ctx->intervals[cur_reg->name];
1257bf215546Sopenharmony_ci            interval->physreg_start = physreg;
1258bf215546Sopenharmony_ci            interval->physreg_end = physreg + interval_size;
1259bf215546Sopenharmony_ci         }
1260bf215546Sopenharmony_ci         dst_index++;
1261bf215546Sopenharmony_ci      } else {
1262bf215546Sopenharmony_ci         ra_push_interval(ctx, file, &intervals[live_index], physreg);
1263bf215546Sopenharmony_ci         live_index++;
1264bf215546Sopenharmony_ci      }
1265bf215546Sopenharmony_ci
1266bf215546Sopenharmony_ci      physreg += interval_size;
1267bf215546Sopenharmony_ci
1268bf215546Sopenharmony_ci      if (process_dst && !is_early_clobber(cur_reg)) {
1269bf215546Sopenharmony_ci         dst_reg = physreg;
1270bf215546Sopenharmony_ci      } else {
1271bf215546Sopenharmony_ci         live_reg = physreg;
1272bf215546Sopenharmony_ci      }
1273bf215546Sopenharmony_ci   }
1274bf215546Sopenharmony_ci
1275bf215546Sopenharmony_ci   /* If we shuffled around a tied source that is killed, we may have to update
1276bf215546Sopenharmony_ci    * its corresponding destination since we didn't insert it above.
1277bf215546Sopenharmony_ci    */
1278bf215546Sopenharmony_ci   ra_foreach_dst (dst, reg->instr) {
1279bf215546Sopenharmony_ci      if (dst == reg)
1280bf215546Sopenharmony_ci         break;
1281bf215546Sopenharmony_ci
1282bf215546Sopenharmony_ci      struct ir3_register *tied = dst->tied;
1283bf215546Sopenharmony_ci      if (!tied)
1284bf215546Sopenharmony_ci         continue;
1285bf215546Sopenharmony_ci
1286bf215546Sopenharmony_ci      struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1287bf215546Sopenharmony_ci      if (!tied_interval->is_killed)
1288bf215546Sopenharmony_ci         continue;
1289bf215546Sopenharmony_ci
1290bf215546Sopenharmony_ci      struct ra_interval *dst_interval = &ctx->intervals[dst->name];
1291bf215546Sopenharmony_ci      unsigned dst_size = reg_size(dst);
1292bf215546Sopenharmony_ci      dst_interval->physreg_start = ra_interval_get_physreg(tied_interval);
1293bf215546Sopenharmony_ci      dst_interval->physreg_end = dst_interval->physreg_start + dst_size;
1294bf215546Sopenharmony_ci   }
1295bf215546Sopenharmony_ci
1296bf215546Sopenharmony_ci   return ret_reg;
1297bf215546Sopenharmony_ci}
1298bf215546Sopenharmony_ci
1299bf215546Sopenharmony_cistatic void
1300bf215546Sopenharmony_ciupdate_affinity(struct ra_file *file, struct ir3_register *reg,
1301bf215546Sopenharmony_ci                physreg_t physreg)
1302bf215546Sopenharmony_ci{
1303bf215546Sopenharmony_ci   if (!reg->merge_set || reg->merge_set->preferred_reg != (physreg_t)~0)
1304bf215546Sopenharmony_ci      return;
1305bf215546Sopenharmony_ci
1306bf215546Sopenharmony_ci   if (physreg < reg->merge_set_offset)
1307bf215546Sopenharmony_ci      return;
1308bf215546Sopenharmony_ci
1309bf215546Sopenharmony_ci   if ((physreg - reg->merge_set_offset + reg->merge_set->size) > file->size)
1310bf215546Sopenharmony_ci      return;
1311bf215546Sopenharmony_ci
1312bf215546Sopenharmony_ci   reg->merge_set->preferred_reg = physreg - reg->merge_set_offset;
1313bf215546Sopenharmony_ci}
1314bf215546Sopenharmony_ci
1315bf215546Sopenharmony_ci/* Try to find free space for a register without shuffling anything. This uses
1316bf215546Sopenharmony_ci * a round-robin algorithm to reduce false dependencies.
1317bf215546Sopenharmony_ci */
1318bf215546Sopenharmony_cistatic physreg_t
1319bf215546Sopenharmony_cifind_best_gap(struct ra_ctx *ctx, struct ra_file *file,
1320bf215546Sopenharmony_ci              struct ir3_register *dst, unsigned file_size, unsigned size,
1321bf215546Sopenharmony_ci              unsigned align)
1322bf215546Sopenharmony_ci{
1323bf215546Sopenharmony_ci   /* This can happen if we create a very large merge set. Just bail out in that
1324bf215546Sopenharmony_ci    * case.
1325bf215546Sopenharmony_ci    */
1326bf215546Sopenharmony_ci   if (size > file_size)
1327bf215546Sopenharmony_ci      return (physreg_t) ~0;
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_ci   BITSET_WORD *available =
1330bf215546Sopenharmony_ci      is_early_clobber(dst) ? file->available_to_evict : file->available;
1331bf215546Sopenharmony_ci
1332bf215546Sopenharmony_ci   unsigned start = ALIGN(file->start, align) % (file_size - size + align);
1333bf215546Sopenharmony_ci   unsigned candidate = start;
1334bf215546Sopenharmony_ci   do {
1335bf215546Sopenharmony_ci      bool is_available = true;
1336bf215546Sopenharmony_ci      for (unsigned i = 0; i < size; i++) {
1337bf215546Sopenharmony_ci         if (!BITSET_TEST(available, candidate + i)) {
1338bf215546Sopenharmony_ci            is_available = false;
1339bf215546Sopenharmony_ci            break;
1340bf215546Sopenharmony_ci         }
1341bf215546Sopenharmony_ci      }
1342bf215546Sopenharmony_ci
1343bf215546Sopenharmony_ci      if (is_available) {
1344bf215546Sopenharmony_ci         is_available =
1345bf215546Sopenharmony_ci            !check_dst_overlap(ctx, file, dst, candidate, candidate + size);
1346bf215546Sopenharmony_ci      }
1347bf215546Sopenharmony_ci
1348bf215546Sopenharmony_ci      if (is_available) {
1349bf215546Sopenharmony_ci         file->start = (candidate + size) % file_size;
1350bf215546Sopenharmony_ci         return candidate;
1351bf215546Sopenharmony_ci      }
1352bf215546Sopenharmony_ci
1353bf215546Sopenharmony_ci      candidate += align;
1354bf215546Sopenharmony_ci      if (candidate + size > file_size)
1355bf215546Sopenharmony_ci         candidate = 0;
1356bf215546Sopenharmony_ci   } while (candidate != start);
1357bf215546Sopenharmony_ci
1358bf215546Sopenharmony_ci   return (physreg_t)~0;
1359bf215546Sopenharmony_ci}
1360bf215546Sopenharmony_ci
1361bf215546Sopenharmony_ci/* This is the main entrypoint for picking a register. Pick a free register
1362bf215546Sopenharmony_ci * for "reg", shuffling around sources if necessary. In the normal case where
1363bf215546Sopenharmony_ci * "is_source" is false, this register can overlap with killed sources
1364bf215546Sopenharmony_ci * (intervals with "is_killed == true"). If "is_source" is true, then
1365bf215546Sopenharmony_ci * is_killed is ignored and the register returned must not overlap with killed
1366bf215546Sopenharmony_ci * sources. This must be used for tied registers, because we're actually
1367bf215546Sopenharmony_ci * allocating the destination and the tied source at the same time.
1368bf215546Sopenharmony_ci */
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_cistatic physreg_t
1371bf215546Sopenharmony_ciget_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg)
1372bf215546Sopenharmony_ci{
1373bf215546Sopenharmony_ci   unsigned file_size = reg_file_size(file, reg);
1374bf215546Sopenharmony_ci   if (reg->merge_set && reg->merge_set->preferred_reg != (physreg_t)~0) {
1375bf215546Sopenharmony_ci      physreg_t preferred_reg =
1376bf215546Sopenharmony_ci         reg->merge_set->preferred_reg + reg->merge_set_offset;
1377bf215546Sopenharmony_ci      if (preferred_reg + reg_size(reg) <= file_size &&
1378bf215546Sopenharmony_ci          preferred_reg % reg_elem_size(reg) == 0 &&
1379bf215546Sopenharmony_ci          get_reg_specified(ctx, file, reg, preferred_reg, false))
1380bf215546Sopenharmony_ci         return preferred_reg;
1381bf215546Sopenharmony_ci   }
1382bf215546Sopenharmony_ci
1383bf215546Sopenharmony_ci   /* If this register is a subset of a merge set which we have not picked a
1384bf215546Sopenharmony_ci    * register for, first try to allocate enough space for the entire merge
1385bf215546Sopenharmony_ci    * set.
1386bf215546Sopenharmony_ci    */
1387bf215546Sopenharmony_ci   unsigned size = reg_size(reg);
1388bf215546Sopenharmony_ci   if (reg->merge_set && reg->merge_set->preferred_reg == (physreg_t)~0 &&
1389bf215546Sopenharmony_ci       size < reg->merge_set->size) {
1390bf215546Sopenharmony_ci      physreg_t best_reg = find_best_gap(ctx, file, reg, file_size,
1391bf215546Sopenharmony_ci                                         reg->merge_set->size,
1392bf215546Sopenharmony_ci                                         reg->merge_set->alignment);
1393bf215546Sopenharmony_ci      if (best_reg != (physreg_t)~0u) {
1394bf215546Sopenharmony_ci         best_reg += reg->merge_set_offset;
1395bf215546Sopenharmony_ci         return best_reg;
1396bf215546Sopenharmony_ci      }
1397bf215546Sopenharmony_ci   }
1398bf215546Sopenharmony_ci
1399bf215546Sopenharmony_ci   /* For ALU and SFU instructions, if the src reg is avail to pick, use it.
1400bf215546Sopenharmony_ci    * Because this doesn't introduce unnecessary dependencies, and it
1401bf215546Sopenharmony_ci    * potentially avoids needing (ss) syncs for write after read hazards for
1402bf215546Sopenharmony_ci    * SFU instructions:
1403bf215546Sopenharmony_ci    */
1404bf215546Sopenharmony_ci   if (is_sfu(reg->instr) || is_alu(reg->instr)) {
1405bf215546Sopenharmony_ci      for (unsigned i = 0; i < reg->instr->srcs_count; i++) {
1406bf215546Sopenharmony_ci         struct ir3_register *src = reg->instr->srcs[i];
1407bf215546Sopenharmony_ci         if (!ra_reg_is_src(src))
1408bf215546Sopenharmony_ci            continue;
1409bf215546Sopenharmony_ci         if (ra_get_file(ctx, src) == file && reg_size(src) >= size) {
1410bf215546Sopenharmony_ci            struct ra_interval *src_interval = &ctx->intervals[src->def->name];
1411bf215546Sopenharmony_ci            physreg_t src_physreg = ra_interval_get_physreg(src_interval);
1412bf215546Sopenharmony_ci            if (src_physreg % reg_elem_size(reg) == 0 &&
1413bf215546Sopenharmony_ci                src_physreg + size <= file_size &&
1414bf215546Sopenharmony_ci                get_reg_specified(ctx, file, reg, src_physreg, false))
1415bf215546Sopenharmony_ci               return src_physreg;
1416bf215546Sopenharmony_ci         }
1417bf215546Sopenharmony_ci      }
1418bf215546Sopenharmony_ci   }
1419bf215546Sopenharmony_ci
1420bf215546Sopenharmony_ci   physreg_t best_reg =
1421bf215546Sopenharmony_ci      find_best_gap(ctx, file, reg, file_size, size, reg_elem_size(reg));
1422bf215546Sopenharmony_ci   if (best_reg != (physreg_t)~0u) {
1423bf215546Sopenharmony_ci      return best_reg;
1424bf215546Sopenharmony_ci   }
1425bf215546Sopenharmony_ci
1426bf215546Sopenharmony_ci   /* Ok, we couldn't find anything that fits. Here is where we have to start
1427bf215546Sopenharmony_ci    * moving things around to make stuff fit. First try solely evicting
1428bf215546Sopenharmony_ci    * registers in the way.
1429bf215546Sopenharmony_ci    */
1430bf215546Sopenharmony_ci   unsigned best_eviction_count = ~0;
1431bf215546Sopenharmony_ci   for (physreg_t i = 0; i + size <= file_size; i += reg_elem_size(reg)) {
1432bf215546Sopenharmony_ci      unsigned eviction_count;
1433bf215546Sopenharmony_ci      if (try_evict_regs(ctx, file, reg, i, &eviction_count, false, true)) {
1434bf215546Sopenharmony_ci         if (eviction_count < best_eviction_count) {
1435bf215546Sopenharmony_ci            best_eviction_count = eviction_count;
1436bf215546Sopenharmony_ci            best_reg = i;
1437bf215546Sopenharmony_ci         }
1438bf215546Sopenharmony_ci      }
1439bf215546Sopenharmony_ci   }
1440bf215546Sopenharmony_ci
1441bf215546Sopenharmony_ci   if (best_eviction_count != ~0) {
1442bf215546Sopenharmony_ci      ASSERTED bool result = try_evict_regs(
1443bf215546Sopenharmony_ci         ctx, file, reg, best_reg, &best_eviction_count, false, false);
1444bf215546Sopenharmony_ci      assert(result);
1445bf215546Sopenharmony_ci      return best_reg;
1446bf215546Sopenharmony_ci   }
1447bf215546Sopenharmony_ci
1448bf215546Sopenharmony_ci   /* Use the dumb fallback only if try_evict_regs() fails. */
1449bf215546Sopenharmony_ci   return compress_regs_left(ctx, file, reg);
1450bf215546Sopenharmony_ci}
1451bf215546Sopenharmony_ci
1452bf215546Sopenharmony_cistatic void
1453bf215546Sopenharmony_ciassign_reg(struct ir3_instruction *instr, struct ir3_register *reg,
1454bf215546Sopenharmony_ci           unsigned num)
1455bf215546Sopenharmony_ci{
1456bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_ARRAY) {
1457bf215546Sopenharmony_ci      reg->array.base = num;
1458bf215546Sopenharmony_ci      if (reg->flags & IR3_REG_RELATIV)
1459bf215546Sopenharmony_ci         reg->array.offset += num;
1460bf215546Sopenharmony_ci      else
1461bf215546Sopenharmony_ci         reg->num = num + reg->array.offset;
1462bf215546Sopenharmony_ci   } else {
1463bf215546Sopenharmony_ci      reg->num = num;
1464bf215546Sopenharmony_ci   }
1465bf215546Sopenharmony_ci}
1466bf215546Sopenharmony_ci
1467bf215546Sopenharmony_cistatic void
1468bf215546Sopenharmony_cimark_src_killed(struct ra_ctx *ctx, struct ir3_register *src)
1469bf215546Sopenharmony_ci{
1470bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[src->def->name];
1471bf215546Sopenharmony_ci
1472bf215546Sopenharmony_ci   if (!(src->flags & IR3_REG_FIRST_KILL) || interval->is_killed ||
1473bf215546Sopenharmony_ci       interval->interval.parent ||
1474bf215546Sopenharmony_ci       !rb_tree_is_empty(&interval->interval.children))
1475bf215546Sopenharmony_ci      return;
1476bf215546Sopenharmony_ci
1477bf215546Sopenharmony_ci   ra_file_mark_killed(ra_get_file(ctx, src), interval);
1478bf215546Sopenharmony_ci}
1479bf215546Sopenharmony_ci
1480bf215546Sopenharmony_cistatic void
1481bf215546Sopenharmony_ciinsert_dst(struct ra_ctx *ctx, struct ir3_register *dst)
1482bf215546Sopenharmony_ci{
1483bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, dst);
1484bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[dst->name];
1485bf215546Sopenharmony_ci
1486bf215546Sopenharmony_ci   d("insert dst %u physreg %u", dst->name, ra_interval_get_physreg(interval));
1487bf215546Sopenharmony_ci
1488bf215546Sopenharmony_ci   if (!(dst->flags & IR3_REG_UNUSED))
1489bf215546Sopenharmony_ci      ra_file_insert(file, interval);
1490bf215546Sopenharmony_ci
1491bf215546Sopenharmony_ci   assign_reg(dst->instr, dst, ra_interval_get_num(interval));
1492bf215546Sopenharmony_ci}
1493bf215546Sopenharmony_ci
1494bf215546Sopenharmony_cistatic void
1495bf215546Sopenharmony_ciallocate_dst_fixed(struct ra_ctx *ctx, struct ir3_register *dst,
1496bf215546Sopenharmony_ci                   physreg_t physreg)
1497bf215546Sopenharmony_ci{
1498bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, dst);
1499bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[dst->name];
1500bf215546Sopenharmony_ci   update_affinity(file, dst, physreg);
1501bf215546Sopenharmony_ci
1502bf215546Sopenharmony_ci   ra_interval_init(interval, dst);
1503bf215546Sopenharmony_ci   interval->physreg_start = physreg;
1504bf215546Sopenharmony_ci   interval->physreg_end = physreg + reg_size(dst);
1505bf215546Sopenharmony_ci}
1506bf215546Sopenharmony_ci
1507bf215546Sopenharmony_ci/* If a tied destination interferes with its source register, we have to insert
1508bf215546Sopenharmony_ci * a copy beforehand to copy the source to the destination. Because we are using
1509bf215546Sopenharmony_ci * the parallel_copies array and not creating a separate copy, this copy will
1510bf215546Sopenharmony_ci * happen in parallel with any shuffling around of the tied source, so we have
1511bf215546Sopenharmony_ci * to copy the source *as it exists before it is shuffled around*. We do this by
1512bf215546Sopenharmony_ci * inserting the copy early, before any other copies are inserted. We don't
1513bf215546Sopenharmony_ci * actually know the destination of the copy, but that's ok because the
1514bf215546Sopenharmony_ci * dst_interval will be filled out later.
1515bf215546Sopenharmony_ci */
1516bf215546Sopenharmony_cistatic void
1517bf215546Sopenharmony_ciinsert_tied_dst_copy(struct ra_ctx *ctx, struct ir3_register *dst)
1518bf215546Sopenharmony_ci{
1519bf215546Sopenharmony_ci   struct ir3_register *tied = dst->tied;
1520bf215546Sopenharmony_ci
1521bf215546Sopenharmony_ci   if (!tied)
1522bf215546Sopenharmony_ci      return;
1523bf215546Sopenharmony_ci
1524bf215546Sopenharmony_ci   struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1525bf215546Sopenharmony_ci   struct ra_interval *dst_interval = &ctx->intervals[dst->name];
1526bf215546Sopenharmony_ci
1527bf215546Sopenharmony_ci   if (tied_interval->is_killed)
1528bf215546Sopenharmony_ci      return;
1529bf215546Sopenharmony_ci
1530bf215546Sopenharmony_ci   physreg_t tied_physreg = ra_interval_get_physreg(tied_interval);
1531bf215546Sopenharmony_ci
1532bf215546Sopenharmony_ci   array_insert(ctx, ctx->parallel_copies,
1533bf215546Sopenharmony_ci                (struct ra_parallel_copy){
1534bf215546Sopenharmony_ci                   .interval = dst_interval,
1535bf215546Sopenharmony_ci                   .src = tied_physreg,
1536bf215546Sopenharmony_ci                });
1537bf215546Sopenharmony_ci}
1538bf215546Sopenharmony_ci
1539bf215546Sopenharmony_cistatic void
1540bf215546Sopenharmony_ciallocate_dst(struct ra_ctx *ctx, struct ir3_register *dst)
1541bf215546Sopenharmony_ci{
1542bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, dst);
1543bf215546Sopenharmony_ci
1544bf215546Sopenharmony_ci   struct ir3_register *tied = dst->tied;
1545bf215546Sopenharmony_ci   if (tied) {
1546bf215546Sopenharmony_ci      struct ra_interval *tied_interval = &ctx->intervals[tied->def->name];
1547bf215546Sopenharmony_ci      if (tied_interval->is_killed) {
1548bf215546Sopenharmony_ci         /* The easy case: the source is killed, so we can just reuse it
1549bf215546Sopenharmony_ci          * for the destination.
1550bf215546Sopenharmony_ci          */
1551bf215546Sopenharmony_ci         allocate_dst_fixed(ctx, dst, ra_interval_get_physreg(tied_interval));
1552bf215546Sopenharmony_ci         return;
1553bf215546Sopenharmony_ci      }
1554bf215546Sopenharmony_ci   }
1555bf215546Sopenharmony_ci
1556bf215546Sopenharmony_ci   /* All the hard work is done by get_reg here. */
1557bf215546Sopenharmony_ci   physreg_t physreg = get_reg(ctx, file, dst);
1558bf215546Sopenharmony_ci
1559bf215546Sopenharmony_ci   allocate_dst_fixed(ctx, dst, physreg);
1560bf215546Sopenharmony_ci}
1561bf215546Sopenharmony_ci
1562bf215546Sopenharmony_cistatic void
1563bf215546Sopenharmony_ciassign_src(struct ra_ctx *ctx, struct ir3_instruction *instr,
1564bf215546Sopenharmony_ci           struct ir3_register *src)
1565bf215546Sopenharmony_ci{
1566bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[src->def->name];
1567bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, src);
1568bf215546Sopenharmony_ci
1569bf215546Sopenharmony_ci   struct ir3_register *tied = src->tied;
1570bf215546Sopenharmony_ci   physreg_t physreg;
1571bf215546Sopenharmony_ci   if (tied) {
1572bf215546Sopenharmony_ci      struct ra_interval *tied_interval = &ctx->intervals[tied->name];
1573bf215546Sopenharmony_ci      physreg = ra_interval_get_physreg(tied_interval);
1574bf215546Sopenharmony_ci   } else {
1575bf215546Sopenharmony_ci      physreg = ra_interval_get_physreg(interval);
1576bf215546Sopenharmony_ci   }
1577bf215546Sopenharmony_ci
1578bf215546Sopenharmony_ci   assign_reg(instr, src, ra_physreg_to_num(physreg, src->flags));
1579bf215546Sopenharmony_ci
1580bf215546Sopenharmony_ci   if (src->flags & IR3_REG_FIRST_KILL)
1581bf215546Sopenharmony_ci      ra_file_remove(file, interval);
1582bf215546Sopenharmony_ci}
1583bf215546Sopenharmony_ci
1584bf215546Sopenharmony_ci/* Insert a parallel copy instruction before the instruction with the parallel
1585bf215546Sopenharmony_ci * copy entries we've built up.
1586bf215546Sopenharmony_ci */
1587bf215546Sopenharmony_cistatic void
1588bf215546Sopenharmony_ciinsert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
1589bf215546Sopenharmony_ci{
1590bf215546Sopenharmony_ci   if (ctx->parallel_copies_count == 0)
1591bf215546Sopenharmony_ci      return;
1592bf215546Sopenharmony_ci
1593bf215546Sopenharmony_ci   struct ir3_instruction *pcopy =
1594bf215546Sopenharmony_ci      ir3_instr_create(instr->block, OPC_META_PARALLEL_COPY,
1595bf215546Sopenharmony_ci                       ctx->parallel_copies_count, ctx->parallel_copies_count);
1596bf215546Sopenharmony_ci
1597bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
1598bf215546Sopenharmony_ci      struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
1599bf215546Sopenharmony_ci      struct ir3_register *reg =
1600bf215546Sopenharmony_ci         ir3_dst_create(pcopy, INVALID_REG,
1601bf215546Sopenharmony_ci                        entry->interval->interval.reg->flags &
1602bf215546Sopenharmony_ci                        (IR3_REG_HALF | IR3_REG_ARRAY));
1603bf215546Sopenharmony_ci      reg->size = entry->interval->interval.reg->size;
1604bf215546Sopenharmony_ci      reg->wrmask = entry->interval->interval.reg->wrmask;
1605bf215546Sopenharmony_ci      assign_reg(pcopy, reg, ra_interval_get_num(entry->interval));
1606bf215546Sopenharmony_ci   }
1607bf215546Sopenharmony_ci
1608bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->parallel_copies_count; i++) {
1609bf215546Sopenharmony_ci      struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
1610bf215546Sopenharmony_ci      struct ir3_register *reg =
1611bf215546Sopenharmony_ci         ir3_src_create(pcopy, INVALID_REG,
1612bf215546Sopenharmony_ci                        entry->interval->interval.reg->flags &
1613bf215546Sopenharmony_ci                        (IR3_REG_HALF | IR3_REG_ARRAY));
1614bf215546Sopenharmony_ci      reg->size = entry->interval->interval.reg->size;
1615bf215546Sopenharmony_ci      reg->wrmask = entry->interval->interval.reg->wrmask;
1616bf215546Sopenharmony_ci      assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags));
1617bf215546Sopenharmony_ci   }
1618bf215546Sopenharmony_ci
1619bf215546Sopenharmony_ci   list_del(&pcopy->node);
1620bf215546Sopenharmony_ci   list_addtail(&pcopy->node, &instr->node);
1621bf215546Sopenharmony_ci   ctx->parallel_copies_count = 0;
1622bf215546Sopenharmony_ci}
1623bf215546Sopenharmony_ci
1624bf215546Sopenharmony_cistatic void
1625bf215546Sopenharmony_cihandle_normal_instr(struct ra_ctx *ctx, struct ir3_instruction *instr)
1626bf215546Sopenharmony_ci{
1627bf215546Sopenharmony_ci   /* First, mark sources as going-to-be-killed while allocating the dest. */
1628bf215546Sopenharmony_ci   ra_foreach_src (src, instr) {
1629bf215546Sopenharmony_ci      mark_src_killed(ctx, src);
1630bf215546Sopenharmony_ci   }
1631bf215546Sopenharmony_ci
1632bf215546Sopenharmony_ci   /* Pre-insert tied dst copies. */
1633bf215546Sopenharmony_ci   ra_foreach_dst (dst, instr) {
1634bf215546Sopenharmony_ci      insert_tied_dst_copy(ctx, dst);
1635bf215546Sopenharmony_ci   }
1636bf215546Sopenharmony_ci
1637bf215546Sopenharmony_ci   /* Allocate the destination. */
1638bf215546Sopenharmony_ci   ra_foreach_dst (dst, instr) {
1639bf215546Sopenharmony_ci      allocate_dst(ctx, dst);
1640bf215546Sopenharmony_ci   }
1641bf215546Sopenharmony_ci
1642bf215546Sopenharmony_ci   /* Now handle sources. Go backward so that in case there are multiple
1643bf215546Sopenharmony_ci    * sources with the same def and that def is killed we only remove it at
1644bf215546Sopenharmony_ci    * the end.
1645bf215546Sopenharmony_ci    */
1646bf215546Sopenharmony_ci   ra_foreach_src_rev (src, instr) {
1647bf215546Sopenharmony_ci      assign_src(ctx, instr, src);
1648bf215546Sopenharmony_ci   }
1649bf215546Sopenharmony_ci
1650bf215546Sopenharmony_ci   /* Now finally insert the destination into the map. */
1651bf215546Sopenharmony_ci   ra_foreach_dst (dst, instr) {
1652bf215546Sopenharmony_ci      insert_dst(ctx, dst);
1653bf215546Sopenharmony_ci   }
1654bf215546Sopenharmony_ci
1655bf215546Sopenharmony_ci   insert_parallel_copy_instr(ctx, instr);
1656bf215546Sopenharmony_ci}
1657bf215546Sopenharmony_ci
1658bf215546Sopenharmony_cistatic void
1659bf215546Sopenharmony_cihandle_split(struct ra_ctx *ctx, struct ir3_instruction *instr)
1660bf215546Sopenharmony_ci{
1661bf215546Sopenharmony_ci   struct ir3_register *dst = instr->dsts[0];
1662bf215546Sopenharmony_ci   struct ir3_register *src = instr->srcs[0];
1663bf215546Sopenharmony_ci
1664bf215546Sopenharmony_ci   if (dst->merge_set == NULL || src->def->merge_set != dst->merge_set) {
1665bf215546Sopenharmony_ci      handle_normal_instr(ctx, instr);
1666bf215546Sopenharmony_ci      return;
1667bf215546Sopenharmony_ci   }
1668bf215546Sopenharmony_ci
1669bf215546Sopenharmony_ci   struct ra_interval *src_interval = &ctx->intervals[src->def->name];
1670bf215546Sopenharmony_ci
1671bf215546Sopenharmony_ci   physreg_t physreg = ra_interval_get_physreg(src_interval);
1672bf215546Sopenharmony_ci   assign_src(ctx, instr, src);
1673bf215546Sopenharmony_ci
1674bf215546Sopenharmony_ci   allocate_dst_fixed(
1675bf215546Sopenharmony_ci      ctx, dst, physreg - src->def->merge_set_offset + dst->merge_set_offset);
1676bf215546Sopenharmony_ci   insert_dst(ctx, dst);
1677bf215546Sopenharmony_ci}
1678bf215546Sopenharmony_ci
1679bf215546Sopenharmony_cistatic void
1680bf215546Sopenharmony_cihandle_collect(struct ra_ctx *ctx, struct ir3_instruction *instr)
1681bf215546Sopenharmony_ci{
1682bf215546Sopenharmony_ci   struct ir3_merge_set *dst_set = instr->dsts[0]->merge_set;
1683bf215546Sopenharmony_ci   unsigned dst_offset = instr->dsts[0]->merge_set_offset;
1684bf215546Sopenharmony_ci
1685bf215546Sopenharmony_ci   if (!dst_set || dst_set->regs_count == 1) {
1686bf215546Sopenharmony_ci      handle_normal_instr(ctx, instr);
1687bf215546Sopenharmony_ci      return;
1688bf215546Sopenharmony_ci   }
1689bf215546Sopenharmony_ci
1690bf215546Sopenharmony_ci   /* We need to check if any of the sources are contained in an interval
1691bf215546Sopenharmony_ci    * that is at least as large as the vector. In this case, we should put
1692bf215546Sopenharmony_ci    * the vector inside that larger interval. (There should be one
1693bf215546Sopenharmony_ci    * unambiguous place to put it, because values sharing the same merge set
1694bf215546Sopenharmony_ci    * should be allocated together.) This can happen in a case like:
1695bf215546Sopenharmony_ci    *
1696bf215546Sopenharmony_ci    * ssa_1 (wrmask=0xf) = ...
1697bf215546Sopenharmony_ci    * ssa_2 = split ssa_1 off:0
1698bf215546Sopenharmony_ci    * ssa_3 = split ssa_1 off:1
1699bf215546Sopenharmony_ci    * ssa_4 (wrmask=0x3) = collect (kill)ssa_2, (kill)ssa_3
1700bf215546Sopenharmony_ci    * ... = (kill)ssa_1
1701bf215546Sopenharmony_ci    * ... = (kill)ssa_4
1702bf215546Sopenharmony_ci    *
1703bf215546Sopenharmony_ci    * ssa_4 will be coalesced with ssa_1 and needs to be allocated inside it.
1704bf215546Sopenharmony_ci    */
1705bf215546Sopenharmony_ci   physreg_t dst_fixed = (physreg_t)~0u;
1706bf215546Sopenharmony_ci
1707bf215546Sopenharmony_ci   ra_foreach_src (src, instr) {
1708bf215546Sopenharmony_ci      if (src->flags & IR3_REG_FIRST_KILL) {
1709bf215546Sopenharmony_ci         mark_src_killed(ctx, src);
1710bf215546Sopenharmony_ci      }
1711bf215546Sopenharmony_ci
1712bf215546Sopenharmony_ci      struct ra_interval *interval = &ctx->intervals[src->def->name];
1713bf215546Sopenharmony_ci
1714bf215546Sopenharmony_ci      if (src->def->merge_set != dst_set || interval->is_killed)
1715bf215546Sopenharmony_ci         continue;
1716bf215546Sopenharmony_ci      while (interval->interval.parent != NULL) {
1717bf215546Sopenharmony_ci         interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
1718bf215546Sopenharmony_ci      }
1719bf215546Sopenharmony_ci      if (reg_size(interval->interval.reg) >= reg_size(instr->dsts[0])) {
1720bf215546Sopenharmony_ci         dst_fixed = interval->physreg_start -
1721bf215546Sopenharmony_ci                     interval->interval.reg->merge_set_offset + dst_offset;
1722bf215546Sopenharmony_ci      } else {
1723bf215546Sopenharmony_ci         /* For sources whose root interval is smaller than the
1724bf215546Sopenharmony_ci          * destination (i.e. the normal case), we will shuffle them
1725bf215546Sopenharmony_ci          * around after allocating the destination. Mark them killed so
1726bf215546Sopenharmony_ci          * that the destination can be allocated over them, even if they
1727bf215546Sopenharmony_ci          * aren't actually killed.
1728bf215546Sopenharmony_ci          */
1729bf215546Sopenharmony_ci         ra_file_mark_killed(ra_get_file(ctx, src), interval);
1730bf215546Sopenharmony_ci      }
1731bf215546Sopenharmony_ci   }
1732bf215546Sopenharmony_ci
1733bf215546Sopenharmony_ci   if (dst_fixed != (physreg_t)~0u)
1734bf215546Sopenharmony_ci      allocate_dst_fixed(ctx, instr->dsts[0], dst_fixed);
1735bf215546Sopenharmony_ci   else
1736bf215546Sopenharmony_ci      allocate_dst(ctx, instr->dsts[0]);
1737bf215546Sopenharmony_ci
1738bf215546Sopenharmony_ci   /* Remove the temporary is_killed we added */
1739bf215546Sopenharmony_ci   ra_foreach_src (src, instr) {
1740bf215546Sopenharmony_ci      struct ra_interval *interval = &ctx->intervals[src->def->name];
1741bf215546Sopenharmony_ci      while (interval->interval.parent != NULL) {
1742bf215546Sopenharmony_ci         interval = ir3_reg_interval_to_ra_interval(interval->interval.parent);
1743bf215546Sopenharmony_ci      }
1744bf215546Sopenharmony_ci
1745bf215546Sopenharmony_ci      /* Filter out cases where it actually should be killed */
1746bf215546Sopenharmony_ci      if (interval != &ctx->intervals[src->def->name] ||
1747bf215546Sopenharmony_ci          !(src->flags & IR3_REG_KILL)) {
1748bf215546Sopenharmony_ci         ra_file_unmark_killed(ra_get_file(ctx, src), interval);
1749bf215546Sopenharmony_ci      }
1750bf215546Sopenharmony_ci   }
1751bf215546Sopenharmony_ci
1752bf215546Sopenharmony_ci   ra_foreach_src_rev (src, instr) {
1753bf215546Sopenharmony_ci      assign_src(ctx, instr, src);
1754bf215546Sopenharmony_ci   }
1755bf215546Sopenharmony_ci
1756bf215546Sopenharmony_ci   /* We need to do this before insert_dst(), so that children of the
1757bf215546Sopenharmony_ci    * destination which got marked as killed and then shuffled around to make
1758bf215546Sopenharmony_ci    * space for the destination have the correct pcopy destination that
1759bf215546Sopenharmony_ci    * matches what we assign the source of the collect to in assign_src().
1760bf215546Sopenharmony_ci    *
1761bf215546Sopenharmony_ci    * TODO: In this case we'll wind up copying the value in the pcopy and
1762bf215546Sopenharmony_ci    * then again in the collect. We could avoid one of those by updating the
1763bf215546Sopenharmony_ci    * pcopy destination to match up with the final location of the source
1764bf215546Sopenharmony_ci    * after the collect and making the collect a no-op. However this doesn't
1765bf215546Sopenharmony_ci    * seem to happen often.
1766bf215546Sopenharmony_ci    */
1767bf215546Sopenharmony_ci   insert_parallel_copy_instr(ctx, instr);
1768bf215546Sopenharmony_ci
1769bf215546Sopenharmony_ci   /* Note: insert_dst will automatically shuffle around any intervals that
1770bf215546Sopenharmony_ci    * are a child of the collect by making them children of the collect.
1771bf215546Sopenharmony_ci    */
1772bf215546Sopenharmony_ci
1773bf215546Sopenharmony_ci   insert_dst(ctx, instr->dsts[0]);
1774bf215546Sopenharmony_ci}
1775bf215546Sopenharmony_ci
1776bf215546Sopenharmony_ci/* Parallel copies before RA should only be at the end of the block, for
1777bf215546Sopenharmony_ci * phi's. For these we only need to fill in the sources, and then we fill in
1778bf215546Sopenharmony_ci * the destinations in the successor block.
1779bf215546Sopenharmony_ci */
1780bf215546Sopenharmony_cistatic void
1781bf215546Sopenharmony_cihandle_pcopy(struct ra_ctx *ctx, struct ir3_instruction *instr)
1782bf215546Sopenharmony_ci{
1783bf215546Sopenharmony_ci   ra_foreach_src_rev (src, instr) {
1784bf215546Sopenharmony_ci      assign_src(ctx, instr, src);
1785bf215546Sopenharmony_ci   }
1786bf215546Sopenharmony_ci}
1787bf215546Sopenharmony_ci
1788bf215546Sopenharmony_ci/* Some inputs may need to be precolored. We need to handle those first, so
1789bf215546Sopenharmony_ci * that other non-precolored inputs don't accidentally get allocated over
1790bf215546Sopenharmony_ci * them. Inputs are the very first thing in the shader, so it shouldn't be a
1791bf215546Sopenharmony_ci * problem to allocate them to a specific physreg.
1792bf215546Sopenharmony_ci */
1793bf215546Sopenharmony_ci
1794bf215546Sopenharmony_cistatic void
1795bf215546Sopenharmony_cihandle_precolored_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1796bf215546Sopenharmony_ci{
1797bf215546Sopenharmony_ci   if (instr->dsts[0]->num == INVALID_REG)
1798bf215546Sopenharmony_ci      return;
1799bf215546Sopenharmony_ci
1800bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1801bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1802bf215546Sopenharmony_ci   physreg_t physreg = ra_reg_get_physreg(instr->dsts[0]);
1803bf215546Sopenharmony_ci   allocate_dst_fixed(ctx, instr->dsts[0], physreg);
1804bf215546Sopenharmony_ci
1805bf215546Sopenharmony_ci   d("insert precolored dst %u physreg %u", instr->dsts[0]->name,
1806bf215546Sopenharmony_ci     ra_interval_get_physreg(interval));
1807bf215546Sopenharmony_ci
1808bf215546Sopenharmony_ci   ra_file_insert(file, interval);
1809bf215546Sopenharmony_ci   interval->frozen = true;
1810bf215546Sopenharmony_ci}
1811bf215546Sopenharmony_ci
1812bf215546Sopenharmony_cistatic void
1813bf215546Sopenharmony_cihandle_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1814bf215546Sopenharmony_ci{
1815bf215546Sopenharmony_ci   if (instr->dsts[0]->num != INVALID_REG)
1816bf215546Sopenharmony_ci      return;
1817bf215546Sopenharmony_ci
1818bf215546Sopenharmony_ci   allocate_dst(ctx, instr->dsts[0]);
1819bf215546Sopenharmony_ci
1820bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1821bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1822bf215546Sopenharmony_ci   ra_file_insert(file, interval);
1823bf215546Sopenharmony_ci}
1824bf215546Sopenharmony_ci
1825bf215546Sopenharmony_cistatic void
1826bf215546Sopenharmony_ciassign_input(struct ra_ctx *ctx, struct ir3_instruction *instr)
1827bf215546Sopenharmony_ci{
1828bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name];
1829bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, instr->dsts[0]);
1830bf215546Sopenharmony_ci
1831bf215546Sopenharmony_ci   if (instr->dsts[0]->num == INVALID_REG) {
1832bf215546Sopenharmony_ci      assign_reg(instr, instr->dsts[0], ra_interval_get_num(interval));
1833bf215546Sopenharmony_ci   } else {
1834bf215546Sopenharmony_ci      interval->frozen = false;
1835bf215546Sopenharmony_ci   }
1836bf215546Sopenharmony_ci
1837bf215546Sopenharmony_ci   if (instr->dsts[0]->flags & IR3_REG_UNUSED)
1838bf215546Sopenharmony_ci      ra_file_remove(file, interval);
1839bf215546Sopenharmony_ci
1840bf215546Sopenharmony_ci   ra_foreach_src_rev (src, instr)
1841bf215546Sopenharmony_ci      assign_src(ctx, instr, src);
1842bf215546Sopenharmony_ci}
1843bf215546Sopenharmony_ci
1844bf215546Sopenharmony_ci/* chmask is a bit weird, because it has pre-colored sources due to the need
1845bf215546Sopenharmony_ci * to pass some registers to the next stage. Fortunately there are only at
1846bf215546Sopenharmony_ci * most two, and there should be no other live values by the time we get to
1847bf215546Sopenharmony_ci * this instruction, so we only have to do the minimum and don't need any
1848bf215546Sopenharmony_ci * fancy fallbacks.
1849bf215546Sopenharmony_ci *
1850bf215546Sopenharmony_ci * TODO: Add more complete handling of precolored sources, e.g. for function
1851bf215546Sopenharmony_ci * argument handling. We'd need a way to mark sources as fixed so that they
1852bf215546Sopenharmony_ci * don't get moved around when placing other sources in the fallback case, and
1853bf215546Sopenharmony_ci * a duplication of much of the logic in get_reg(). This also opens another
1854bf215546Sopenharmony_ci * can of worms, e.g. what if the precolored source is a split of a vector
1855bf215546Sopenharmony_ci * which is still live -- this breaks our assumption that splits don't incur
1856bf215546Sopenharmony_ci * any "extra" register requirements and we'd have to break it out of the
1857bf215546Sopenharmony_ci * parent ra_interval.
1858bf215546Sopenharmony_ci */
1859bf215546Sopenharmony_ci
1860bf215546Sopenharmony_cistatic void
1861bf215546Sopenharmony_cihandle_precolored_source(struct ra_ctx *ctx, struct ir3_register *src)
1862bf215546Sopenharmony_ci{
1863bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, src);
1864bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[src->def->name];
1865bf215546Sopenharmony_ci   physreg_t physreg = ra_reg_get_physreg(src);
1866bf215546Sopenharmony_ci
1867bf215546Sopenharmony_ci   if (ra_interval_get_num(interval) == src->num)
1868bf215546Sopenharmony_ci      return;
1869bf215546Sopenharmony_ci
1870bf215546Sopenharmony_ci   /* Try evicting stuff in our way if it isn't free. This won't move
1871bf215546Sopenharmony_ci    * anything unless it overlaps with our precolored physreg, so we don't
1872bf215546Sopenharmony_ci    * have to worry about evicting other precolored sources.
1873bf215546Sopenharmony_ci    */
1874bf215546Sopenharmony_ci   if (!get_reg_specified(ctx, file, src, physreg, true)) {
1875bf215546Sopenharmony_ci      unsigned eviction_count;
1876bf215546Sopenharmony_ci      if (!try_evict_regs(ctx, file, src, physreg, &eviction_count, true,
1877bf215546Sopenharmony_ci                          false)) {
1878bf215546Sopenharmony_ci         unreachable("failed to evict for precolored source!");
1879bf215546Sopenharmony_ci         return;
1880bf215546Sopenharmony_ci      }
1881bf215546Sopenharmony_ci   }
1882bf215546Sopenharmony_ci
1883bf215546Sopenharmony_ci   ra_move_interval(ctx, file, interval, physreg);
1884bf215546Sopenharmony_ci}
1885bf215546Sopenharmony_ci
1886bf215546Sopenharmony_cistatic void
1887bf215546Sopenharmony_cihandle_chmask(struct ra_ctx *ctx, struct ir3_instruction *instr)
1888bf215546Sopenharmony_ci{
1889bf215546Sopenharmony_ci   /* Note: we purposely don't mark sources as killed, so that we can reuse
1890bf215546Sopenharmony_ci    * some of the get_reg() machinery as-if the source is a destination.
1891bf215546Sopenharmony_ci    * Marking it as killed would make e.g. get_reg_specified() wouldn't work
1892bf215546Sopenharmony_ci    * correctly.
1893bf215546Sopenharmony_ci    */
1894bf215546Sopenharmony_ci   ra_foreach_src (src, instr) {
1895bf215546Sopenharmony_ci      assert(src->num != INVALID_REG);
1896bf215546Sopenharmony_ci      handle_precolored_source(ctx, src);
1897bf215546Sopenharmony_ci   }
1898bf215546Sopenharmony_ci
1899bf215546Sopenharmony_ci   ra_foreach_src (src, instr) {
1900bf215546Sopenharmony_ci      struct ra_file *file = ra_get_file(ctx, src);
1901bf215546Sopenharmony_ci      struct ra_interval *interval = &ctx->intervals[src->def->name];
1902bf215546Sopenharmony_ci      if (src->flags & IR3_REG_FIRST_KILL)
1903bf215546Sopenharmony_ci         ra_file_remove(file, interval);
1904bf215546Sopenharmony_ci   }
1905bf215546Sopenharmony_ci
1906bf215546Sopenharmony_ci   insert_parallel_copy_instr(ctx, instr);
1907bf215546Sopenharmony_ci}
1908bf215546Sopenharmony_ci
1909bf215546Sopenharmony_cistatic physreg_t
1910bf215546Sopenharmony_ciread_register(struct ra_ctx *ctx, struct ir3_block *block,
1911bf215546Sopenharmony_ci              struct ir3_register *def)
1912bf215546Sopenharmony_ci{
1913bf215546Sopenharmony_ci   struct ra_block_state *state = &ctx->blocks[block->index];
1914bf215546Sopenharmony_ci   if (state->renames) {
1915bf215546Sopenharmony_ci      struct hash_entry *entry = _mesa_hash_table_search(state->renames, def);
1916bf215546Sopenharmony_ci      if (entry) {
1917bf215546Sopenharmony_ci         return (physreg_t)(uintptr_t)entry->data;
1918bf215546Sopenharmony_ci      }
1919bf215546Sopenharmony_ci   }
1920bf215546Sopenharmony_ci
1921bf215546Sopenharmony_ci   return ra_reg_get_physreg(def);
1922bf215546Sopenharmony_ci}
1923bf215546Sopenharmony_ci
1924bf215546Sopenharmony_cistatic void
1925bf215546Sopenharmony_cihandle_live_in(struct ra_ctx *ctx, struct ir3_register *def)
1926bf215546Sopenharmony_ci{
1927bf215546Sopenharmony_ci   physreg_t physreg = ~0;
1928bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
1929bf215546Sopenharmony_ci      struct ir3_block *pred = ctx->block->predecessors[i];
1930bf215546Sopenharmony_ci      struct ra_block_state *pred_state = &ctx->blocks[pred->index];
1931bf215546Sopenharmony_ci
1932bf215546Sopenharmony_ci      if (!pred_state->visited)
1933bf215546Sopenharmony_ci         continue;
1934bf215546Sopenharmony_ci
1935bf215546Sopenharmony_ci      physreg = read_register(ctx, pred, def);
1936bf215546Sopenharmony_ci      break;
1937bf215546Sopenharmony_ci   }
1938bf215546Sopenharmony_ci
1939bf215546Sopenharmony_ci   assert(physreg != (physreg_t)~0);
1940bf215546Sopenharmony_ci
1941bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[def->name];
1942bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, def);
1943bf215546Sopenharmony_ci   ra_interval_init(interval, def);
1944bf215546Sopenharmony_ci   interval->physreg_start = physreg;
1945bf215546Sopenharmony_ci   interval->physreg_end = physreg + reg_size(def);
1946bf215546Sopenharmony_ci   ra_file_insert(file, interval);
1947bf215546Sopenharmony_ci}
1948bf215546Sopenharmony_ci
1949bf215546Sopenharmony_cistatic void
1950bf215546Sopenharmony_cihandle_live_out(struct ra_ctx *ctx, struct ir3_register *def)
1951bf215546Sopenharmony_ci{
1952bf215546Sopenharmony_ci   /* Skip parallelcopy's which in the original program are only used as phi
1953bf215546Sopenharmony_ci    * arguments. Even though phi arguments are live out, they are only
1954bf215546Sopenharmony_ci    * assigned when the phi is.
1955bf215546Sopenharmony_ci    */
1956bf215546Sopenharmony_ci   if (def->instr->opc == OPC_META_PARALLEL_COPY)
1957bf215546Sopenharmony_ci      return;
1958bf215546Sopenharmony_ci
1959bf215546Sopenharmony_ci   struct ra_block_state *state = &ctx->blocks[ctx->block->index];
1960bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[def->name];
1961bf215546Sopenharmony_ci   physreg_t physreg = ra_interval_get_physreg(interval);
1962bf215546Sopenharmony_ci   if (physreg != ra_reg_get_physreg(def)) {
1963bf215546Sopenharmony_ci      if (!state->renames)
1964bf215546Sopenharmony_ci         state->renames = _mesa_pointer_hash_table_create(ctx);
1965bf215546Sopenharmony_ci      _mesa_hash_table_insert(state->renames, def, (void *)(uintptr_t)physreg);
1966bf215546Sopenharmony_ci   }
1967bf215546Sopenharmony_ci}
1968bf215546Sopenharmony_ci
1969bf215546Sopenharmony_cistatic void
1970bf215546Sopenharmony_cihandle_phi(struct ra_ctx *ctx, struct ir3_register *def)
1971bf215546Sopenharmony_ci{
1972bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, def);
1973bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[def->name];
1974bf215546Sopenharmony_ci
1975bf215546Sopenharmony_ci   /* phis are always scalar, so they should already be the smallest possible
1976bf215546Sopenharmony_ci    * size. However they may be coalesced with other live-in values/phi
1977bf215546Sopenharmony_ci    * nodes, so check for that here.
1978bf215546Sopenharmony_ci    */
1979bf215546Sopenharmony_ci   struct ir3_reg_interval *parent_ir3 =
1980bf215546Sopenharmony_ci      ir3_reg_interval_search(&file->reg_ctx.intervals, def->interval_start);
1981bf215546Sopenharmony_ci   physreg_t physreg;
1982bf215546Sopenharmony_ci   if (parent_ir3) {
1983bf215546Sopenharmony_ci      struct ra_interval *parent = ir3_reg_interval_to_ra_interval(parent_ir3);
1984bf215546Sopenharmony_ci      physreg = ra_interval_get_physreg(parent) +
1985bf215546Sopenharmony_ci                (def->interval_start - parent_ir3->reg->interval_start);
1986bf215546Sopenharmony_ci   } else {
1987bf215546Sopenharmony_ci      physreg = get_reg(ctx, file, def);
1988bf215546Sopenharmony_ci   }
1989bf215546Sopenharmony_ci
1990bf215546Sopenharmony_ci   allocate_dst_fixed(ctx, def, physreg);
1991bf215546Sopenharmony_ci
1992bf215546Sopenharmony_ci   ra_file_insert(file, interval);
1993bf215546Sopenharmony_ci}
1994bf215546Sopenharmony_ci
1995bf215546Sopenharmony_cistatic void
1996bf215546Sopenharmony_ciassign_phi(struct ra_ctx *ctx, struct ir3_instruction *phi)
1997bf215546Sopenharmony_ci{
1998bf215546Sopenharmony_ci   struct ra_file *file = ra_get_file(ctx, phi->dsts[0]);
1999bf215546Sopenharmony_ci   struct ra_interval *interval = &ctx->intervals[phi->dsts[0]->name];
2000bf215546Sopenharmony_ci   assert(!interval->interval.parent);
2001bf215546Sopenharmony_ci   unsigned num = ra_interval_get_num(interval);
2002bf215546Sopenharmony_ci   assign_reg(phi, phi->dsts[0], num);
2003bf215546Sopenharmony_ci
2004bf215546Sopenharmony_ci   /* Assign the parallelcopy sources of this phi */
2005bf215546Sopenharmony_ci   for (unsigned i = 0; i < phi->srcs_count; i++) {
2006bf215546Sopenharmony_ci      if (phi->srcs[i]->def) {
2007bf215546Sopenharmony_ci         assign_reg(phi, phi->srcs[i], num);
2008bf215546Sopenharmony_ci         assign_reg(phi, phi->srcs[i]->def, num);
2009bf215546Sopenharmony_ci      }
2010bf215546Sopenharmony_ci   }
2011bf215546Sopenharmony_ci
2012bf215546Sopenharmony_ci   if (phi->dsts[0]->flags & IR3_REG_UNUSED)
2013bf215546Sopenharmony_ci      ra_file_remove(file, interval);
2014bf215546Sopenharmony_ci}
2015bf215546Sopenharmony_ci
2016bf215546Sopenharmony_ci/* When we split a live range, we sometimes need to emit fixup code at the end
2017bf215546Sopenharmony_ci * of a block. For example, something like:
2018bf215546Sopenharmony_ci *
2019bf215546Sopenharmony_ci * a = ...
2020bf215546Sopenharmony_ci * if (...) {
2021bf215546Sopenharmony_ci *    ...
2022bf215546Sopenharmony_ci *    a' = a
2023bf215546Sopenharmony_ci *    b = ... // a evicted to make room for b
2024bf215546Sopenharmony_ci *    ...
2025bf215546Sopenharmony_ci * }
2026bf215546Sopenharmony_ci * ... = a
2027bf215546Sopenharmony_ci *
2028bf215546Sopenharmony_ci * When we insert the copy to a' in insert_parallel_copy_instr(), this forces
2029bf215546Sopenharmony_ci * to insert another copy "a = a'" at the end of the if. Normally this would
2030bf215546Sopenharmony_ci * also entail adding a phi node, but since we're about to go out of SSA
2031bf215546Sopenharmony_ci * anyway we just insert an extra move. Note, however, that "b" might be used
2032bf215546Sopenharmony_ci * in a phi node at the end of the if and share registers with "a", so we
2033bf215546Sopenharmony_ci * have to be careful to extend any preexisting parallelcopy instruction
2034bf215546Sopenharmony_ci * instead of creating our own in order to guarantee that they properly get
2035bf215546Sopenharmony_ci * swapped.
2036bf215546Sopenharmony_ci */
2037bf215546Sopenharmony_ci
2038bf215546Sopenharmony_cistatic void
2039bf215546Sopenharmony_ciinsert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src,
2040bf215546Sopenharmony_ci                    struct ir3_register *reg)
2041bf215546Sopenharmony_ci{
2042bf215546Sopenharmony_ci   struct ir3_instruction *old_pcopy = NULL;
2043bf215546Sopenharmony_ci   if (!list_is_empty(&block->instr_list)) {
2044bf215546Sopenharmony_ci      struct ir3_instruction *last =
2045bf215546Sopenharmony_ci         list_entry(block->instr_list.prev, struct ir3_instruction, node);
2046bf215546Sopenharmony_ci      if (last->opc == OPC_META_PARALLEL_COPY)
2047bf215546Sopenharmony_ci         old_pcopy = last;
2048bf215546Sopenharmony_ci   }
2049bf215546Sopenharmony_ci
2050bf215546Sopenharmony_ci   unsigned old_pcopy_srcs = old_pcopy ? old_pcopy->srcs_count : 0;
2051bf215546Sopenharmony_ci   struct ir3_instruction *pcopy = ir3_instr_create(
2052bf215546Sopenharmony_ci      block, OPC_META_PARALLEL_COPY, old_pcopy_srcs + 1, old_pcopy_srcs + 1);
2053bf215546Sopenharmony_ci
2054bf215546Sopenharmony_ci   for (unsigned i = 0; i < old_pcopy_srcs; i++) {
2055bf215546Sopenharmony_ci      old_pcopy->dsts[i]->instr = pcopy;
2056bf215546Sopenharmony_ci      pcopy->dsts[pcopy->dsts_count++] = old_pcopy->dsts[i];
2057bf215546Sopenharmony_ci   }
2058bf215546Sopenharmony_ci
2059bf215546Sopenharmony_ci   unsigned flags = reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
2060bf215546Sopenharmony_ci
2061bf215546Sopenharmony_ci   struct ir3_register *dst_reg = ir3_dst_create(pcopy, INVALID_REG, flags);
2062bf215546Sopenharmony_ci   dst_reg->wrmask = reg->wrmask;
2063bf215546Sopenharmony_ci   dst_reg->size = reg->size;
2064bf215546Sopenharmony_ci   assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags));
2065bf215546Sopenharmony_ci
2066bf215546Sopenharmony_ci   for (unsigned i = 0; i < old_pcopy_srcs; i++) {
2067bf215546Sopenharmony_ci      pcopy->srcs[pcopy->srcs_count++] = old_pcopy->srcs[i];
2068bf215546Sopenharmony_ci   }
2069bf215546Sopenharmony_ci
2070bf215546Sopenharmony_ci   struct ir3_register *src_reg = ir3_src_create(pcopy, INVALID_REG, flags);
2071bf215546Sopenharmony_ci   src_reg->wrmask = reg->wrmask;
2072bf215546Sopenharmony_ci   src_reg->size = reg->size;
2073bf215546Sopenharmony_ci   assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));
2074bf215546Sopenharmony_ci
2075bf215546Sopenharmony_ci   if (old_pcopy)
2076bf215546Sopenharmony_ci      list_del(&old_pcopy->node);
2077bf215546Sopenharmony_ci}
2078bf215546Sopenharmony_ci
2079bf215546Sopenharmony_cistatic void
2080bf215546Sopenharmony_ciinsert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
2081bf215546Sopenharmony_ci{
2082bf215546Sopenharmony_ci   physreg_t physreg = ra_interval_get_physreg(interval);
2083bf215546Sopenharmony_ci
2084bf215546Sopenharmony_ci   bool shared = interval->interval.reg->flags & IR3_REG_SHARED;
2085bf215546Sopenharmony_ci   struct ir3_block **predecessors =
2086bf215546Sopenharmony_ci      shared ? ctx->block->physical_predecessors : ctx->block->predecessors;
2087bf215546Sopenharmony_ci   unsigned predecessors_count = shared
2088bf215546Sopenharmony_ci                                    ? ctx->block->physical_predecessors_count
2089bf215546Sopenharmony_ci                                    : ctx->block->predecessors_count;
2090bf215546Sopenharmony_ci
2091bf215546Sopenharmony_ci   for (unsigned i = 0; i < predecessors_count; i++) {
2092bf215546Sopenharmony_ci      struct ir3_block *pred = predecessors[i];
2093bf215546Sopenharmony_ci      struct ra_block_state *pred_state = &ctx->blocks[pred->index];
2094bf215546Sopenharmony_ci
2095bf215546Sopenharmony_ci      if (!pred_state->visited)
2096bf215546Sopenharmony_ci         continue;
2097bf215546Sopenharmony_ci
2098bf215546Sopenharmony_ci      physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg);
2099bf215546Sopenharmony_ci      if (pred_reg != physreg) {
2100bf215546Sopenharmony_ci         insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg);
2101bf215546Sopenharmony_ci
2102bf215546Sopenharmony_ci         /* This is a bit tricky, but when visiting the destination of a
2103bf215546Sopenharmony_ci          * physical-only edge, we have two predecessors (the if and the
2104bf215546Sopenharmony_ci          * header block) and both have multiple successors. We pick the
2105bf215546Sopenharmony_ci          * register for all live-ins from the normal edge, which should
2106bf215546Sopenharmony_ci          * guarantee that there's no need for shuffling things around in
2107bf215546Sopenharmony_ci          * the normal predecessor as long as there are no phi nodes, but
2108bf215546Sopenharmony_ci          * we still may need to insert fixup code in the physical
2109bf215546Sopenharmony_ci          * predecessor (i.e. the last block of the if) and that has
2110bf215546Sopenharmony_ci          * another successor (the block after the if) so we need to update
2111bf215546Sopenharmony_ci          * the renames state for when we process the other successor. This
2112bf215546Sopenharmony_ci          * crucially depends on the other successor getting processed
2113bf215546Sopenharmony_ci          * after this.
2114bf215546Sopenharmony_ci          *
2115bf215546Sopenharmony_ci          * For normal (non-physical) edges we disallow critical edges so
2116bf215546Sopenharmony_ci          * that hacks like this aren't necessary.
2117bf215546Sopenharmony_ci          */
2118bf215546Sopenharmony_ci         if (!pred_state->renames)
2119bf215546Sopenharmony_ci            pred_state->renames = _mesa_pointer_hash_table_create(ctx);
2120bf215546Sopenharmony_ci         _mesa_hash_table_insert(pred_state->renames, interval->interval.reg,
2121bf215546Sopenharmony_ci                                 (void *)(uintptr_t)physreg);
2122bf215546Sopenharmony_ci      }
2123bf215546Sopenharmony_ci   }
2124bf215546Sopenharmony_ci}
2125bf215546Sopenharmony_ci
2126bf215546Sopenharmony_cistatic void
2127bf215546Sopenharmony_ciinsert_file_live_in_moves(struct ra_ctx *ctx, struct ra_file *file)
2128bf215546Sopenharmony_ci{
2129bf215546Sopenharmony_ci   BITSET_WORD *live_in = ctx->live->live_in[ctx->block->index];
2130bf215546Sopenharmony_ci   rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2131bf215546Sopenharmony_ci                    physreg_node) {
2132bf215546Sopenharmony_ci      /* Skip phi nodes. This needs to happen after phi nodes are allocated,
2133bf215546Sopenharmony_ci       * because we may have to move live-ins around to make space for phi
2134bf215546Sopenharmony_ci       * nodes, but we shouldn't be handling phi nodes here.
2135bf215546Sopenharmony_ci       */
2136bf215546Sopenharmony_ci      if (BITSET_TEST(live_in, interval->interval.reg->name))
2137bf215546Sopenharmony_ci         insert_live_in_move(ctx, interval);
2138bf215546Sopenharmony_ci   }
2139bf215546Sopenharmony_ci}
2140bf215546Sopenharmony_ci
2141bf215546Sopenharmony_cistatic void
2142bf215546Sopenharmony_ciinsert_entry_regs(struct ra_block_state *state, struct ra_file *file)
2143bf215546Sopenharmony_ci{
2144bf215546Sopenharmony_ci   rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2145bf215546Sopenharmony_ci                    physreg_node) {
2146bf215546Sopenharmony_ci      _mesa_hash_table_insert(state->entry_regs, interval->interval.reg,
2147bf215546Sopenharmony_ci                              (void *)(uintptr_t)interval->physreg_start);
2148bf215546Sopenharmony_ci   }
2149bf215546Sopenharmony_ci}
2150bf215546Sopenharmony_ci
2151bf215546Sopenharmony_cistatic void
2152bf215546Sopenharmony_ciinsert_live_in_moves(struct ra_ctx *ctx)
2153bf215546Sopenharmony_ci{
2154bf215546Sopenharmony_ci   insert_file_live_in_moves(ctx, &ctx->full);
2155bf215546Sopenharmony_ci   insert_file_live_in_moves(ctx, &ctx->half);
2156bf215546Sopenharmony_ci   insert_file_live_in_moves(ctx, &ctx->shared);
2157bf215546Sopenharmony_ci
2158bf215546Sopenharmony_ci   /* If not all predecessors are visited, insert live-in regs so that
2159bf215546Sopenharmony_ci    * insert_live_out_moves() will work.
2160bf215546Sopenharmony_ci    */
2161bf215546Sopenharmony_ci   bool all_preds_visited = true;
2162bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
2163bf215546Sopenharmony_ci      if (!ctx->blocks[ctx->block->predecessors[i]->index].visited) {
2164bf215546Sopenharmony_ci         all_preds_visited = false;
2165bf215546Sopenharmony_ci         break;
2166bf215546Sopenharmony_ci      }
2167bf215546Sopenharmony_ci   }
2168bf215546Sopenharmony_ci
2169bf215546Sopenharmony_ci   if (!all_preds_visited) {
2170bf215546Sopenharmony_ci      struct ra_block_state *state = &ctx->blocks[ctx->block->index];
2171bf215546Sopenharmony_ci      state->entry_regs = _mesa_pointer_hash_table_create(ctx);
2172bf215546Sopenharmony_ci
2173bf215546Sopenharmony_ci      insert_entry_regs(state, &ctx->full);
2174bf215546Sopenharmony_ci      insert_entry_regs(state, &ctx->half);
2175bf215546Sopenharmony_ci      insert_entry_regs(state, &ctx->shared);
2176bf215546Sopenharmony_ci   }
2177bf215546Sopenharmony_ci}
2178bf215546Sopenharmony_ci
2179bf215546Sopenharmony_cistatic void
2180bf215546Sopenharmony_ciinsert_live_out_move(struct ra_ctx *ctx, struct ra_interval *interval)
2181bf215546Sopenharmony_ci{
2182bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; i++) {
2183bf215546Sopenharmony_ci      if (!ctx->block->successors[i])
2184bf215546Sopenharmony_ci         continue;
2185bf215546Sopenharmony_ci
2186bf215546Sopenharmony_ci      struct ir3_block *succ = ctx->block->successors[i];
2187bf215546Sopenharmony_ci      struct ra_block_state *succ_state = &ctx->blocks[succ->index];
2188bf215546Sopenharmony_ci
2189bf215546Sopenharmony_ci      if (!succ_state->visited)
2190bf215546Sopenharmony_ci         continue;
2191bf215546Sopenharmony_ci
2192bf215546Sopenharmony_ci      struct hash_entry *entry = _mesa_hash_table_search(
2193bf215546Sopenharmony_ci         succ_state->entry_regs, interval->interval.reg);
2194bf215546Sopenharmony_ci      if (!entry)
2195bf215546Sopenharmony_ci         continue;
2196bf215546Sopenharmony_ci
2197bf215546Sopenharmony_ci      physreg_t new_reg = (physreg_t)(uintptr_t)entry->data;
2198bf215546Sopenharmony_ci      if (new_reg != interval->physreg_start) {
2199bf215546Sopenharmony_ci         insert_liveout_copy(ctx->block, new_reg, interval->physreg_start,
2200bf215546Sopenharmony_ci                             interval->interval.reg);
2201bf215546Sopenharmony_ci      }
2202bf215546Sopenharmony_ci   }
2203bf215546Sopenharmony_ci}
2204bf215546Sopenharmony_ci
2205bf215546Sopenharmony_cistatic void
2206bf215546Sopenharmony_ciinsert_file_live_out_moves(struct ra_ctx *ctx, struct ra_file *file)
2207bf215546Sopenharmony_ci{
2208bf215546Sopenharmony_ci   rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals,
2209bf215546Sopenharmony_ci                    physreg_node) {
2210bf215546Sopenharmony_ci      insert_live_out_move(ctx, interval);
2211bf215546Sopenharmony_ci   }
2212bf215546Sopenharmony_ci}
2213bf215546Sopenharmony_ci
2214bf215546Sopenharmony_cistatic void
2215bf215546Sopenharmony_ciinsert_live_out_moves(struct ra_ctx *ctx)
2216bf215546Sopenharmony_ci{
2217bf215546Sopenharmony_ci   insert_file_live_out_moves(ctx, &ctx->full);
2218bf215546Sopenharmony_ci   insert_file_live_out_moves(ctx, &ctx->half);
2219bf215546Sopenharmony_ci   insert_file_live_out_moves(ctx, &ctx->shared);
2220bf215546Sopenharmony_ci}
2221bf215546Sopenharmony_ci
2222bf215546Sopenharmony_cistatic void
2223bf215546Sopenharmony_cihandle_block(struct ra_ctx *ctx, struct ir3_block *block)
2224bf215546Sopenharmony_ci{
2225bf215546Sopenharmony_ci   ctx->block = block;
2226bf215546Sopenharmony_ci
2227bf215546Sopenharmony_ci   /* Reset the register files from the last block */
2228bf215546Sopenharmony_ci   ra_file_init(&ctx->full);
2229bf215546Sopenharmony_ci   ra_file_init(&ctx->half);
2230bf215546Sopenharmony_ci   ra_file_init(&ctx->shared);
2231bf215546Sopenharmony_ci
2232bf215546Sopenharmony_ci   /* Handle live-ins, phis, and input meta-instructions. These all appear
2233bf215546Sopenharmony_ci    * live at the beginning of the block, and interfere with each other
2234bf215546Sopenharmony_ci    * therefore need to be allocated "in parallel". This means that we
2235bf215546Sopenharmony_ci    * have to allocate all of them, inserting them into the file, and then
2236bf215546Sopenharmony_ci    * delay updating the IR until all of them are allocated.
2237bf215546Sopenharmony_ci    *
2238bf215546Sopenharmony_ci    * Handle precolored inputs first, because we need to make sure that other
2239bf215546Sopenharmony_ci    * inputs don't overwrite them. We shouldn't have both live-ins/phi nodes
2240bf215546Sopenharmony_ci    * and inputs at the same time, because the first block doesn't have
2241bf215546Sopenharmony_ci    * predecessors. Therefore handle_live_in doesn't have to worry about
2242bf215546Sopenharmony_ci    * them.
2243bf215546Sopenharmony_ci    */
2244bf215546Sopenharmony_ci
2245bf215546Sopenharmony_ci   foreach_instr (instr, &block->instr_list) {
2246bf215546Sopenharmony_ci      if (instr->opc == OPC_META_INPUT)
2247bf215546Sopenharmony_ci         handle_precolored_input(ctx, instr);
2248bf215546Sopenharmony_ci      else
2249bf215546Sopenharmony_ci         break;
2250bf215546Sopenharmony_ci   }
2251bf215546Sopenharmony_ci
2252bf215546Sopenharmony_ci   unsigned name;
2253bf215546Sopenharmony_ci   BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
2254bf215546Sopenharmony_ci                       ctx->live->definitions_count) {
2255bf215546Sopenharmony_ci      struct ir3_register *reg = ctx->live->definitions[name];
2256bf215546Sopenharmony_ci      handle_live_in(ctx, reg);
2257bf215546Sopenharmony_ci   }
2258bf215546Sopenharmony_ci
2259bf215546Sopenharmony_ci   foreach_instr (instr, &block->instr_list) {
2260bf215546Sopenharmony_ci      if (instr->opc == OPC_META_PHI)
2261bf215546Sopenharmony_ci         handle_phi(ctx, instr->dsts[0]);
2262bf215546Sopenharmony_ci      else if (instr->opc == OPC_META_INPUT ||
2263bf215546Sopenharmony_ci               instr->opc == OPC_META_TEX_PREFETCH)
2264bf215546Sopenharmony_ci         handle_input(ctx, instr);
2265bf215546Sopenharmony_ci      else
2266bf215546Sopenharmony_ci         break;
2267bf215546Sopenharmony_ci   }
2268bf215546Sopenharmony_ci
2269bf215546Sopenharmony_ci   /* After this point, every live-in/phi/input has an interval assigned to
2270bf215546Sopenharmony_ci    * it. We delay actually assigning values until everything has been
2271bf215546Sopenharmony_ci    * allocated, so we can simply ignore any parallel copy entries created
2272bf215546Sopenharmony_ci    * when shuffling them around.
2273bf215546Sopenharmony_ci    */
2274bf215546Sopenharmony_ci   ctx->parallel_copies_count = 0;
2275bf215546Sopenharmony_ci
2276bf215546Sopenharmony_ci   insert_live_in_moves(ctx);
2277bf215546Sopenharmony_ci
2278bf215546Sopenharmony_ci   if (RA_DEBUG) {
2279bf215546Sopenharmony_ci      d("after live-in block %u:\n", block->index);
2280bf215546Sopenharmony_ci      ra_ctx_dump(ctx);
2281bf215546Sopenharmony_ci   }
2282bf215546Sopenharmony_ci
2283bf215546Sopenharmony_ci   /* Now we're done with processing live-ins, and can handle the body of the
2284bf215546Sopenharmony_ci    * block.
2285bf215546Sopenharmony_ci    */
2286bf215546Sopenharmony_ci   foreach_instr (instr, &block->instr_list) {
2287bf215546Sopenharmony_ci      di(instr, "processing");
2288bf215546Sopenharmony_ci
2289bf215546Sopenharmony_ci      if (instr->opc == OPC_META_PHI)
2290bf215546Sopenharmony_ci         assign_phi(ctx, instr);
2291bf215546Sopenharmony_ci      else if (instr->opc == OPC_META_INPUT ||
2292bf215546Sopenharmony_ci               instr->opc == OPC_META_TEX_PREFETCH)
2293bf215546Sopenharmony_ci         assign_input(ctx, instr);
2294bf215546Sopenharmony_ci      else if (instr->opc == OPC_META_SPLIT)
2295bf215546Sopenharmony_ci         handle_split(ctx, instr);
2296bf215546Sopenharmony_ci      else if (instr->opc == OPC_META_COLLECT)
2297bf215546Sopenharmony_ci         handle_collect(ctx, instr);
2298bf215546Sopenharmony_ci      else if (instr->opc == OPC_META_PARALLEL_COPY)
2299bf215546Sopenharmony_ci         handle_pcopy(ctx, instr);
2300bf215546Sopenharmony_ci      else if (instr->opc == OPC_CHMASK)
2301bf215546Sopenharmony_ci         handle_chmask(ctx, instr);
2302bf215546Sopenharmony_ci      else
2303bf215546Sopenharmony_ci         handle_normal_instr(ctx, instr);
2304bf215546Sopenharmony_ci
2305bf215546Sopenharmony_ci      if (RA_DEBUG)
2306bf215546Sopenharmony_ci         ra_ctx_dump(ctx);
2307bf215546Sopenharmony_ci   }
2308bf215546Sopenharmony_ci
2309bf215546Sopenharmony_ci   insert_live_out_moves(ctx);
2310bf215546Sopenharmony_ci
2311bf215546Sopenharmony_ci   BITSET_FOREACH_SET (name, ctx->live->live_out[block->index],
2312bf215546Sopenharmony_ci                       ctx->live->definitions_count) {
2313bf215546Sopenharmony_ci      struct ir3_register *reg = ctx->live->definitions[name];
2314bf215546Sopenharmony_ci      handle_live_out(ctx, reg);
2315bf215546Sopenharmony_ci   }
2316bf215546Sopenharmony_ci
2317bf215546Sopenharmony_ci   ctx->blocks[block->index].visited = true;
2318bf215546Sopenharmony_ci}
2319bf215546Sopenharmony_ci
2320bf215546Sopenharmony_cistatic unsigned
2321bf215546Sopenharmony_cicalc_target_full_pressure(struct ir3_shader_variant *v, unsigned pressure)
2322bf215546Sopenharmony_ci{
2323bf215546Sopenharmony_ci   /* Registers are allocated in units of vec4, so switch from units of
2324bf215546Sopenharmony_ci    * half-regs to vec4.
2325bf215546Sopenharmony_ci    */
2326bf215546Sopenharmony_ci   unsigned reg_count = DIV_ROUND_UP(pressure, 2 * 4);
2327bf215546Sopenharmony_ci
2328bf215546Sopenharmony_ci   bool double_threadsize = ir3_should_double_threadsize(v, reg_count);
2329bf215546Sopenharmony_ci
2330bf215546Sopenharmony_ci   unsigned target = reg_count;
2331bf215546Sopenharmony_ci   unsigned reg_independent_max_waves =
2332bf215546Sopenharmony_ci      ir3_get_reg_independent_max_waves(v, double_threadsize);
2333bf215546Sopenharmony_ci   unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves(
2334bf215546Sopenharmony_ci      v->compiler, reg_count, double_threadsize);
2335bf215546Sopenharmony_ci   unsigned target_waves =
2336bf215546Sopenharmony_ci      MIN2(reg_independent_max_waves, reg_dependent_max_waves);
2337bf215546Sopenharmony_ci
2338bf215546Sopenharmony_ci   while (target <= RA_FULL_SIZE / (2 * 4) &&
2339bf215546Sopenharmony_ci          ir3_should_double_threadsize(v, target) == double_threadsize &&
2340bf215546Sopenharmony_ci          ir3_get_reg_dependent_max_waves(v->compiler, target,
2341bf215546Sopenharmony_ci                                          double_threadsize) >= target_waves)
2342bf215546Sopenharmony_ci      target++;
2343bf215546Sopenharmony_ci
2344bf215546Sopenharmony_ci   return (target - 1) * 2 * 4;
2345bf215546Sopenharmony_ci}
2346bf215546Sopenharmony_ci
2347bf215546Sopenharmony_cistatic void
2348bf215546Sopenharmony_ciadd_pressure(struct ir3_pressure *pressure, struct ir3_register *reg,
2349bf215546Sopenharmony_ci             bool merged_regs)
2350bf215546Sopenharmony_ci{
2351bf215546Sopenharmony_ci   unsigned size = reg_size(reg);
2352bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_HALF)
2353bf215546Sopenharmony_ci      pressure->half += size;
2354bf215546Sopenharmony_ci   if (!(reg->flags & IR3_REG_HALF) || merged_regs)
2355bf215546Sopenharmony_ci      pressure->full += size;
2356bf215546Sopenharmony_ci}
2357bf215546Sopenharmony_ci
2358bf215546Sopenharmony_cistatic void
2359bf215546Sopenharmony_cidummy_interval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval)
2360bf215546Sopenharmony_ci{
2361bf215546Sopenharmony_ci}
2362bf215546Sopenharmony_ci
2363bf215546Sopenharmony_cistatic void
2364bf215546Sopenharmony_cidummy_interval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval)
2365bf215546Sopenharmony_ci{
2366bf215546Sopenharmony_ci}
2367bf215546Sopenharmony_ci
2368bf215546Sopenharmony_cistatic void
2369bf215546Sopenharmony_cidummy_interval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *parent,
2370bf215546Sopenharmony_ci                     struct ir3_reg_interval *child)
2371bf215546Sopenharmony_ci{
2372bf215546Sopenharmony_ci}
2373bf215546Sopenharmony_ci
2374bf215546Sopenharmony_ci/* Calculate the minimum possible limit on register pressure so that spilling
2375bf215546Sopenharmony_ci * still succeeds. Used to implement IR3_SHADER_DEBUG=spillall.
2376bf215546Sopenharmony_ci */
2377bf215546Sopenharmony_ci
2378bf215546Sopenharmony_cistatic void
2379bf215546Sopenharmony_cicalc_min_limit_pressure(struct ir3_shader_variant *v,
2380bf215546Sopenharmony_ci                        struct ir3_liveness *live,
2381bf215546Sopenharmony_ci                        struct ir3_pressure *limit)
2382bf215546Sopenharmony_ci{
2383bf215546Sopenharmony_ci   struct ir3_block *start = ir3_start_block(v->ir);
2384bf215546Sopenharmony_ci   struct ir3_reg_ctx *ctx = ralloc(NULL, struct ir3_reg_ctx);
2385bf215546Sopenharmony_ci   struct ir3_reg_interval *intervals =
2386bf215546Sopenharmony_ci      rzalloc_array(ctx, struct ir3_reg_interval, live->definitions_count);
2387bf215546Sopenharmony_ci
2388bf215546Sopenharmony_ci   ctx->interval_add = dummy_interval_add;
2389bf215546Sopenharmony_ci   ctx->interval_delete = dummy_interval_delete;
2390bf215546Sopenharmony_ci   ctx->interval_readd = dummy_interval_readd;
2391bf215546Sopenharmony_ci
2392bf215546Sopenharmony_ci   limit->full = limit->half = 0;
2393bf215546Sopenharmony_ci
2394bf215546Sopenharmony_ci   struct ir3_pressure cur_pressure = {0};
2395bf215546Sopenharmony_ci   foreach_instr (input, &start->instr_list) {
2396bf215546Sopenharmony_ci      if (input->opc != OPC_META_INPUT &&
2397bf215546Sopenharmony_ci          input->opc != OPC_META_TEX_PREFETCH)
2398bf215546Sopenharmony_ci         break;
2399bf215546Sopenharmony_ci
2400bf215546Sopenharmony_ci      add_pressure(&cur_pressure, input->dsts[0], v->mergedregs);
2401bf215546Sopenharmony_ci   }
2402bf215546Sopenharmony_ci
2403bf215546Sopenharmony_ci   limit->full = MAX2(limit->full, cur_pressure.full);
2404bf215546Sopenharmony_ci   limit->half = MAX2(limit->half, cur_pressure.half);
2405bf215546Sopenharmony_ci
2406bf215546Sopenharmony_ci   foreach_instr (input, &start->instr_list) {
2407bf215546Sopenharmony_ci      if (input->opc != OPC_META_INPUT &&
2408bf215546Sopenharmony_ci          input->opc != OPC_META_TEX_PREFETCH)
2409bf215546Sopenharmony_ci         break;
2410bf215546Sopenharmony_ci
2411bf215546Sopenharmony_ci      /* pre-colored inputs may have holes, which increases the pressure. */
2412bf215546Sopenharmony_ci      struct ir3_register *dst = input->dsts[0];
2413bf215546Sopenharmony_ci      if (dst->num != INVALID_REG) {
2414bf215546Sopenharmony_ci         unsigned physreg = ra_reg_get_physreg(dst) + reg_size(dst);
2415bf215546Sopenharmony_ci         if (dst->flags & IR3_REG_HALF)
2416bf215546Sopenharmony_ci            limit->half = MAX2(limit->half, physreg);
2417bf215546Sopenharmony_ci         if (!(dst->flags & IR3_REG_HALF) || v->mergedregs)
2418bf215546Sopenharmony_ci            limit->full = MAX2(limit->full, physreg);
2419bf215546Sopenharmony_ci      }
2420bf215546Sopenharmony_ci   }
2421bf215546Sopenharmony_ci
2422bf215546Sopenharmony_ci   foreach_block (block, &v->ir->block_list) {
2423bf215546Sopenharmony_ci      rb_tree_init(&ctx->intervals);
2424bf215546Sopenharmony_ci
2425bf215546Sopenharmony_ci      unsigned name;
2426bf215546Sopenharmony_ci      BITSET_FOREACH_SET (name, live->live_in[block->index],
2427bf215546Sopenharmony_ci                          live->definitions_count) {
2428bf215546Sopenharmony_ci         struct ir3_register *reg = live->definitions[name];
2429bf215546Sopenharmony_ci         ir3_reg_interval_init(&intervals[reg->name], reg);
2430bf215546Sopenharmony_ci         ir3_reg_interval_insert(ctx, &intervals[reg->name]);
2431bf215546Sopenharmony_ci      }
2432bf215546Sopenharmony_ci
2433bf215546Sopenharmony_ci      foreach_instr (instr, &block->instr_list) {
2434bf215546Sopenharmony_ci         ra_foreach_dst (dst, instr) {
2435bf215546Sopenharmony_ci            ir3_reg_interval_init(&intervals[dst->name], dst);
2436bf215546Sopenharmony_ci         }
2437bf215546Sopenharmony_ci         /* phis and parallel copies can be deleted via spilling */
2438bf215546Sopenharmony_ci
2439bf215546Sopenharmony_ci         if (instr->opc == OPC_META_PHI) {
2440bf215546Sopenharmony_ci            ir3_reg_interval_insert(ctx, &intervals[instr->dsts[0]->name]);
2441bf215546Sopenharmony_ci            continue;
2442bf215546Sopenharmony_ci         }
2443bf215546Sopenharmony_ci
2444bf215546Sopenharmony_ci         if (instr->opc == OPC_META_PARALLEL_COPY)
2445bf215546Sopenharmony_ci            continue;
2446bf215546Sopenharmony_ci
2447bf215546Sopenharmony_ci         cur_pressure = (struct ir3_pressure) {0};
2448bf215546Sopenharmony_ci
2449bf215546Sopenharmony_ci         ra_foreach_dst (dst, instr) {
2450bf215546Sopenharmony_ci            if (dst->tied && !(dst->tied->flags & IR3_REG_KILL))
2451bf215546Sopenharmony_ci               add_pressure(&cur_pressure, dst, v->mergedregs);
2452bf215546Sopenharmony_ci         }
2453bf215546Sopenharmony_ci
2454bf215546Sopenharmony_ci         ra_foreach_src_rev (src, instr) {
2455bf215546Sopenharmony_ci            /* We currently don't support spilling the parent of a source when
2456bf215546Sopenharmony_ci             * making space for sources, so we have to keep track of the
2457bf215546Sopenharmony_ci             * intervals and figure out the root of the tree to figure out how
2458bf215546Sopenharmony_ci             * much space we need.
2459bf215546Sopenharmony_ci             *
2460bf215546Sopenharmony_ci             * TODO: We should probably support this in the spiller.
2461bf215546Sopenharmony_ci             */
2462bf215546Sopenharmony_ci            struct ir3_reg_interval *interval = &intervals[src->def->name];
2463bf215546Sopenharmony_ci            while (interval->parent)
2464bf215546Sopenharmony_ci               interval = interval->parent;
2465bf215546Sopenharmony_ci            add_pressure(&cur_pressure, interval->reg, v->mergedregs);
2466bf215546Sopenharmony_ci
2467bf215546Sopenharmony_ci            if (src->flags & IR3_REG_FIRST_KILL)
2468bf215546Sopenharmony_ci               ir3_reg_interval_remove(ctx, &intervals[src->def->name]);
2469bf215546Sopenharmony_ci         }
2470bf215546Sopenharmony_ci
2471bf215546Sopenharmony_ci         limit->full = MAX2(limit->full, cur_pressure.full);
2472bf215546Sopenharmony_ci         limit->half = MAX2(limit->half, cur_pressure.half);
2473bf215546Sopenharmony_ci
2474bf215546Sopenharmony_ci         cur_pressure = (struct ir3_pressure) {0};
2475bf215546Sopenharmony_ci
2476bf215546Sopenharmony_ci         ra_foreach_dst (dst, instr) {
2477bf215546Sopenharmony_ci            ir3_reg_interval_init(&intervals[dst->name], dst);
2478bf215546Sopenharmony_ci            ir3_reg_interval_insert(ctx, &intervals[dst->name]);
2479bf215546Sopenharmony_ci            add_pressure(&cur_pressure, dst, v->mergedregs);
2480bf215546Sopenharmony_ci         }
2481bf215546Sopenharmony_ci
2482bf215546Sopenharmony_ci         limit->full = MAX2(limit->full, cur_pressure.full);
2483bf215546Sopenharmony_ci         limit->half = MAX2(limit->half, cur_pressure.half);
2484bf215546Sopenharmony_ci      }
2485bf215546Sopenharmony_ci   }
2486bf215546Sopenharmony_ci
2487bf215546Sopenharmony_ci   /* Account for the base register, which needs to be available everywhere. */
2488bf215546Sopenharmony_ci   limit->full += 2;
2489bf215546Sopenharmony_ci
2490bf215546Sopenharmony_ci   ralloc_free(ctx);
2491bf215546Sopenharmony_ci}
2492bf215546Sopenharmony_ci
2493bf215546Sopenharmony_ci/*
2494bf215546Sopenharmony_ci * If barriers are used, it must be possible for all waves in the workgroup
2495bf215546Sopenharmony_ci * to execute concurrently. Thus we may have to reduce the registers limit.
2496bf215546Sopenharmony_ci */
2497bf215546Sopenharmony_cistatic void
2498bf215546Sopenharmony_cicalc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v,
2499bf215546Sopenharmony_ci                                        struct ir3_pressure *limit_pressure)
2500bf215546Sopenharmony_ci{
2501bf215546Sopenharmony_ci   const struct ir3_compiler *compiler = v->compiler;
2502bf215546Sopenharmony_ci
2503bf215546Sopenharmony_ci   unsigned threads_per_wg;
2504bf215546Sopenharmony_ci   if (v->local_size_variable) {
2505bf215546Sopenharmony_ci      /* We have to expect the worst case. */
2506bf215546Sopenharmony_ci      threads_per_wg = compiler->max_variable_workgroup_size;
2507bf215546Sopenharmony_ci   } else {
2508bf215546Sopenharmony_ci      threads_per_wg = v->local_size[0] * v->local_size[1] * v->local_size[2];
2509bf215546Sopenharmony_ci   }
2510bf215546Sopenharmony_ci
2511bf215546Sopenharmony_ci   /* The register file is grouped into reg_size_vec4 number of parts.
2512bf215546Sopenharmony_ci    * Each part has enough registers to add a single vec4 register to
2513bf215546Sopenharmony_ci    * each thread of a single-sized wave-pair. With double threadsize
2514bf215546Sopenharmony_ci    * each wave-pair would consume two parts of the register file to get
2515bf215546Sopenharmony_ci    * a single vec4 for a thread. The more active wave-pairs the less
2516bf215546Sopenharmony_ci    * parts each could get.
2517bf215546Sopenharmony_ci    */
2518bf215546Sopenharmony_ci
2519bf215546Sopenharmony_ci   bool double_threadsize = ir3_should_double_threadsize(v, 0);
2520bf215546Sopenharmony_ci   unsigned waves_per_wg = DIV_ROUND_UP(
2521bf215546Sopenharmony_ci      threads_per_wg, compiler->threadsize_base * (double_threadsize ? 2 : 1) *
2522bf215546Sopenharmony_ci                         compiler->wave_granularity);
2523bf215546Sopenharmony_ci
2524bf215546Sopenharmony_ci   uint32_t vec4_regs_per_thread =
2525bf215546Sopenharmony_ci      compiler->reg_size_vec4 / (waves_per_wg * (double_threadsize ? 2 : 1));
2526bf215546Sopenharmony_ci   assert(vec4_regs_per_thread > 0);
2527bf215546Sopenharmony_ci
2528bf215546Sopenharmony_ci   uint32_t half_regs_per_thread = vec4_regs_per_thread * 4 * 2;
2529bf215546Sopenharmony_ci
2530bf215546Sopenharmony_ci   if (limit_pressure->full > half_regs_per_thread) {
2531bf215546Sopenharmony_ci      if (v->mergedregs) {
2532bf215546Sopenharmony_ci         limit_pressure->full = half_regs_per_thread;
2533bf215546Sopenharmony_ci      } else {
2534bf215546Sopenharmony_ci         /* TODO: Handle !mergedregs case, probably we would have to do this
2535bf215546Sopenharmony_ci          * after the first register pressure pass.
2536bf215546Sopenharmony_ci          */
2537bf215546Sopenharmony_ci      }
2538bf215546Sopenharmony_ci   }
2539bf215546Sopenharmony_ci}
2540bf215546Sopenharmony_ci
2541bf215546Sopenharmony_ciint
2542bf215546Sopenharmony_ciir3_ra(struct ir3_shader_variant *v)
2543bf215546Sopenharmony_ci{
2544bf215546Sopenharmony_ci   ir3_calc_dominance(v->ir);
2545bf215546Sopenharmony_ci
2546bf215546Sopenharmony_ci   ir3_create_parallel_copies(v->ir);
2547bf215546Sopenharmony_ci
2548bf215546Sopenharmony_ci   struct ra_ctx *ctx = rzalloc(NULL, struct ra_ctx);
2549bf215546Sopenharmony_ci
2550bf215546Sopenharmony_ci   ctx->merged_regs = v->mergedregs;
2551bf215546Sopenharmony_ci   ctx->compiler = v->compiler;
2552bf215546Sopenharmony_ci   ctx->stage = v->type;
2553bf215546Sopenharmony_ci
2554bf215546Sopenharmony_ci   struct ir3_liveness *live = ir3_calc_liveness(ctx, v->ir);
2555bf215546Sopenharmony_ci
2556bf215546Sopenharmony_ci   ir3_debug_print(v->ir, "AFTER: create_parallel_copies");
2557bf215546Sopenharmony_ci
2558bf215546Sopenharmony_ci   ir3_merge_regs(live, v->ir);
2559bf215546Sopenharmony_ci
2560bf215546Sopenharmony_ci   struct ir3_pressure max_pressure;
2561bf215546Sopenharmony_ci   ir3_calc_pressure(v, live, &max_pressure);
2562bf215546Sopenharmony_ci   d("max pressure:");
2563bf215546Sopenharmony_ci   d("\tfull: %u", max_pressure.full);
2564bf215546Sopenharmony_ci   d("\thalf: %u", max_pressure.half);
2565bf215546Sopenharmony_ci   d("\tshared: %u", max_pressure.shared);
2566bf215546Sopenharmony_ci
2567bf215546Sopenharmony_ci   struct ir3_pressure limit_pressure;
2568bf215546Sopenharmony_ci   limit_pressure.full = RA_FULL_SIZE;
2569bf215546Sopenharmony_ci   limit_pressure.half = RA_HALF_SIZE;
2570bf215546Sopenharmony_ci   limit_pressure.shared = RA_SHARED_SIZE;
2571bf215546Sopenharmony_ci
2572bf215546Sopenharmony_ci   if (gl_shader_stage_is_compute(v->type) && v->has_barrier) {
2573bf215546Sopenharmony_ci      calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure);
2574bf215546Sopenharmony_ci   }
2575bf215546Sopenharmony_ci
2576bf215546Sopenharmony_ci   /* If the user forces a doubled threadsize, we may have to lower the limit
2577bf215546Sopenharmony_ci    * because on some gens the register file is not big enough to hold a
2578bf215546Sopenharmony_ci    * double-size wave with all 48 registers in use.
2579bf215546Sopenharmony_ci    */
2580bf215546Sopenharmony_ci   if (v->real_wavesize == IR3_DOUBLE_ONLY) {
2581bf215546Sopenharmony_ci      limit_pressure.full =
2582bf215546Sopenharmony_ci         MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
2583bf215546Sopenharmony_ci   }
2584bf215546Sopenharmony_ci
2585bf215546Sopenharmony_ci   /* If requested, lower the limit so that spilling happens more often. */
2586bf215546Sopenharmony_ci   if (ir3_shader_debug & IR3_DBG_SPILLALL)
2587bf215546Sopenharmony_ci      calc_min_limit_pressure(v, live, &limit_pressure);
2588bf215546Sopenharmony_ci
2589bf215546Sopenharmony_ci   if (max_pressure.shared > limit_pressure.shared) {
2590bf215546Sopenharmony_ci      /* TODO shared reg -> normal reg spilling */
2591bf215546Sopenharmony_ci      d("shared max pressure exceeded!");
2592bf215546Sopenharmony_ci      goto fail;
2593bf215546Sopenharmony_ci   }
2594bf215546Sopenharmony_ci
2595bf215546Sopenharmony_ci   bool spilled = false;
2596bf215546Sopenharmony_ci   if (max_pressure.full > limit_pressure.full ||
2597bf215546Sopenharmony_ci       max_pressure.half > limit_pressure.half) {
2598bf215546Sopenharmony_ci      if (!v->compiler->has_pvtmem) {
2599bf215546Sopenharmony_ci         d("max pressure exceeded!");
2600bf215546Sopenharmony_ci         goto fail;
2601bf215546Sopenharmony_ci      }
2602bf215546Sopenharmony_ci      d("max pressure exceeded, spilling!");
2603bf215546Sopenharmony_ci      IR3_PASS(v->ir, ir3_spill, v, &live, &limit_pressure);
2604bf215546Sopenharmony_ci      ir3_calc_pressure(v, live, &max_pressure);
2605bf215546Sopenharmony_ci      assert(max_pressure.full <= limit_pressure.full &&
2606bf215546Sopenharmony_ci             max_pressure.half <= limit_pressure.half);
2607bf215546Sopenharmony_ci      spilled = true;
2608bf215546Sopenharmony_ci   }
2609bf215546Sopenharmony_ci
2610bf215546Sopenharmony_ci   ctx->live = live;
2611bf215546Sopenharmony_ci   ctx->intervals =
2612bf215546Sopenharmony_ci      rzalloc_array(ctx, struct ra_interval, live->definitions_count);
2613bf215546Sopenharmony_ci   ctx->blocks = rzalloc_array(ctx, struct ra_block_state, live->block_count);
2614bf215546Sopenharmony_ci
2615bf215546Sopenharmony_ci   ctx->full.size = calc_target_full_pressure(v, max_pressure.full);
2616bf215546Sopenharmony_ci   d("full size: %u", ctx->full.size);
2617bf215546Sopenharmony_ci
2618bf215546Sopenharmony_ci   if (!v->mergedregs)
2619bf215546Sopenharmony_ci      ctx->half.size = RA_HALF_SIZE;
2620bf215546Sopenharmony_ci
2621bf215546Sopenharmony_ci   ctx->shared.size = RA_SHARED_SIZE;
2622bf215546Sopenharmony_ci
2623bf215546Sopenharmony_ci   ctx->full.start = ctx->half.start = ctx->shared.start = 0;
2624bf215546Sopenharmony_ci
2625bf215546Sopenharmony_ci   foreach_block (block, &v->ir->block_list)
2626bf215546Sopenharmony_ci      handle_block(ctx, block);
2627bf215546Sopenharmony_ci
2628bf215546Sopenharmony_ci   ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count);
2629bf215546Sopenharmony_ci
2630bf215546Sopenharmony_ci   /* Strip array-ness and SSA-ness at the end, because various helpers still
2631bf215546Sopenharmony_ci    * need to work even on definitions that have already been assigned. For
2632bf215546Sopenharmony_ci    * example, we need to preserve array-ness so that array live-ins have the
2633bf215546Sopenharmony_ci    * right size.
2634bf215546Sopenharmony_ci    */
2635bf215546Sopenharmony_ci   foreach_block (block, &v->ir->block_list) {
2636bf215546Sopenharmony_ci      foreach_instr (instr, &block->instr_list) {
2637bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->dsts_count; i++) {
2638bf215546Sopenharmony_ci            instr->dsts[i]->flags &= ~IR3_REG_SSA;
2639bf215546Sopenharmony_ci
2640bf215546Sopenharmony_ci            /* Parallel copies of array registers copy the whole register, and
2641bf215546Sopenharmony_ci             * we need some way to let the parallel copy code know that this was
2642bf215546Sopenharmony_ci             * an array whose size is determined by reg->size. So keep the array
2643bf215546Sopenharmony_ci             * flag on those. spill/reload also need to work on the entire
2644bf215546Sopenharmony_ci             * array.
2645bf215546Sopenharmony_ci             */
2646bf215546Sopenharmony_ci            if (!is_meta(instr) && instr->opc != OPC_RELOAD_MACRO)
2647bf215546Sopenharmony_ci               instr->dsts[i]->flags &= ~IR3_REG_ARRAY;
2648bf215546Sopenharmony_ci         }
2649bf215546Sopenharmony_ci
2650bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->srcs_count; i++) {
2651bf215546Sopenharmony_ci            instr->srcs[i]->flags &= ~IR3_REG_SSA;
2652bf215546Sopenharmony_ci
2653bf215546Sopenharmony_ci            if (!is_meta(instr) && instr->opc != OPC_SPILL_MACRO)
2654bf215546Sopenharmony_ci               instr->srcs[i]->flags &= ~IR3_REG_ARRAY;
2655bf215546Sopenharmony_ci         }
2656bf215546Sopenharmony_ci      }
2657bf215546Sopenharmony_ci   }
2658bf215546Sopenharmony_ci
2659bf215546Sopenharmony_ci   ir3_debug_print(v->ir, "AFTER: register allocation");
2660bf215546Sopenharmony_ci
2661bf215546Sopenharmony_ci   if (spilled) {
2662bf215546Sopenharmony_ci      IR3_PASS(v->ir, ir3_lower_spill);
2663bf215546Sopenharmony_ci   }
2664bf215546Sopenharmony_ci
2665bf215546Sopenharmony_ci   ir3_lower_copies(v);
2666bf215546Sopenharmony_ci
2667bf215546Sopenharmony_ci   ir3_debug_print(v->ir, "AFTER: ir3_lower_copies");
2668bf215546Sopenharmony_ci
2669bf215546Sopenharmony_ci   ralloc_free(ctx);
2670bf215546Sopenharmony_ci
2671bf215546Sopenharmony_ci   return 0;
2672bf215546Sopenharmony_cifail:
2673bf215546Sopenharmony_ci   ralloc_free(ctx);
2674bf215546Sopenharmony_ci   return -1;
2675bf215546Sopenharmony_ci}
2676