1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#ifndef _IR3_RA_H
25bf215546Sopenharmony_ci#define _IR3_RA_H
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "util/rb_tree.h"
28bf215546Sopenharmony_ci#include "ir3.h"
29bf215546Sopenharmony_ci#include "ir3_compiler.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#ifdef DEBUG
32bf215546Sopenharmony_ci#define RA_DEBUG (ir3_shader_debug & IR3_DBG_RAMSGS)
33bf215546Sopenharmony_ci#else
34bf215546Sopenharmony_ci#define RA_DEBUG 0
35bf215546Sopenharmony_ci#endif
36bf215546Sopenharmony_ci#define d(fmt, ...)                                                            \
37bf215546Sopenharmony_ci   do {                                                                        \
38bf215546Sopenharmony_ci      if (RA_DEBUG) {                                                          \
39bf215546Sopenharmony_ci         mesa_logi("RA: " fmt, ##__VA_ARGS__);                                 \
40bf215546Sopenharmony_ci      }                                                                        \
41bf215546Sopenharmony_ci   } while (0)
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci#define di(instr, fmt, ...)                                                    \
44bf215546Sopenharmony_ci   do {                                                                        \
45bf215546Sopenharmony_ci      if (RA_DEBUG) {                                                          \
46bf215546Sopenharmony_ci         struct log_stream *stream = mesa_log_streami();                       \
47bf215546Sopenharmony_ci         mesa_log_stream_printf(stream, "RA: " fmt ": ", ##__VA_ARGS__);       \
48bf215546Sopenharmony_ci         ir3_print_instr_stream(stream, instr);                                \
49bf215546Sopenharmony_ci         mesa_log_stream_destroy(stream);                                      \
50bf215546Sopenharmony_ci      }                                                                        \
51bf215546Sopenharmony_ci   } while (0)
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_citypedef uint16_t physreg_t;
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_cistatic inline unsigned
56bf215546Sopenharmony_cira_physreg_to_num(physreg_t physreg, unsigned flags)
57bf215546Sopenharmony_ci{
58bf215546Sopenharmony_ci   if (!(flags & IR3_REG_HALF))
59bf215546Sopenharmony_ci      physreg /= 2;
60bf215546Sopenharmony_ci   if (flags & IR3_REG_SHARED)
61bf215546Sopenharmony_ci      physreg += 48 * 4;
62bf215546Sopenharmony_ci   return physreg;
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_cistatic inline physreg_t
66bf215546Sopenharmony_cira_num_to_physreg(unsigned num, unsigned flags)
67bf215546Sopenharmony_ci{
68bf215546Sopenharmony_ci   if (flags & IR3_REG_SHARED)
69bf215546Sopenharmony_ci      num -= 48 * 4;
70bf215546Sopenharmony_ci   if (!(flags & IR3_REG_HALF))
71bf215546Sopenharmony_ci      num *= 2;
72bf215546Sopenharmony_ci   return num;
73bf215546Sopenharmony_ci}
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_cistatic inline unsigned
76bf215546Sopenharmony_cira_reg_get_num(const struct ir3_register *reg)
77bf215546Sopenharmony_ci{
78bf215546Sopenharmony_ci   return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num;
79bf215546Sopenharmony_ci}
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_cistatic inline physreg_t
82bf215546Sopenharmony_cira_reg_get_physreg(const struct ir3_register *reg)
83bf215546Sopenharmony_ci{
84bf215546Sopenharmony_ci   return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags);
85bf215546Sopenharmony_ci}
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_cistatic inline bool
88bf215546Sopenharmony_cidef_is_gpr(const struct ir3_register *reg)
89bf215546Sopenharmony_ci{
90bf215546Sopenharmony_ci   return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0;
91bf215546Sopenharmony_ci}
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci/* Note: don't count undef as a source.
94bf215546Sopenharmony_ci */
95bf215546Sopenharmony_cistatic inline bool
96bf215546Sopenharmony_cira_reg_is_src(const struct ir3_register *reg)
97bf215546Sopenharmony_ci{
98bf215546Sopenharmony_ci   return (reg->flags & IR3_REG_SSA) && reg->def && def_is_gpr(reg->def);
99bf215546Sopenharmony_ci}
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_cistatic inline bool
102bf215546Sopenharmony_cira_reg_is_dst(const struct ir3_register *reg)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   return (reg->flags & IR3_REG_SSA) && def_is_gpr(reg) &&
105bf215546Sopenharmony_ci          ((reg->flags & IR3_REG_ARRAY) || reg->wrmask);
106bf215546Sopenharmony_ci}
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci/* Iterators for sources and destinations which:
109bf215546Sopenharmony_ci * - Don't include fake sources (irrelevant for RA)
110bf215546Sopenharmony_ci * - Don't include non-SSA sources (immediates and constants, also irrelevant)
111bf215546Sopenharmony_ci */
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci#define ra_foreach_src_n(__srcreg, __n, __instr)                               \
114bf215546Sopenharmony_ci   foreach_src_n(__srcreg, __n, __instr)                                       \
115bf215546Sopenharmony_ci      if (ra_reg_is_src(__srcreg))
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci#define ra_foreach_src(__srcreg, __instr)                                      \
118bf215546Sopenharmony_ci   ra_foreach_src_n(__srcreg, __i, __instr)
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci#define ra_foreach_src_rev(__srcreg, __instr)                                  \
121bf215546Sopenharmony_ci   for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
122bf215546Sopenharmony_ci      for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0;       \
123bf215546Sopenharmony_ci           __i--)                                                              \
124bf215546Sopenharmony_ci         if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci#define ra_foreach_dst_n(__dstreg, __n, __instr)                               \
127bf215546Sopenharmony_ci   foreach_dst_n(__dstreg, __n, __instr)                                         \
128bf215546Sopenharmony_ci      if (ra_reg_is_dst(__dstreg))
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci#define ra_foreach_dst(__dstreg, __instr)                                      \
131bf215546Sopenharmony_ci   ra_foreach_dst_n(__dstreg, __i, __instr)
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci#define RA_HALF_SIZE     (4 * 48)
134bf215546Sopenharmony_ci#define RA_FULL_SIZE     (4 * 48 * 2)
135bf215546Sopenharmony_ci#define RA_SHARED_SIZE   (2 * 4 * 8)
136bf215546Sopenharmony_ci#define RA_MAX_FILE_SIZE RA_FULL_SIZE
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_cistruct ir3_liveness {
139bf215546Sopenharmony_ci   unsigned block_count;
140bf215546Sopenharmony_ci   unsigned interval_offset;
141bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_register *, definitions);
142bf215546Sopenharmony_ci   DECLARE_ARRAY(BITSET_WORD *, live_out);
143bf215546Sopenharmony_ci   DECLARE_ARRAY(BITSET_WORD *, live_in);
144bf215546Sopenharmony_ci};
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_cistruct ir3_liveness *ir3_calc_liveness(void *mem_ctx, struct ir3 *ir);
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_cibool ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def,
149bf215546Sopenharmony_ci                        struct ir3_instruction *instr);
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_civoid ir3_create_parallel_copies(struct ir3 *ir);
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_civoid ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir);
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_civoid ir3_force_merge(struct ir3_register *a, struct ir3_register *b,
156bf215546Sopenharmony_ci                     int b_offset);
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_cistruct ir3_pressure {
159bf215546Sopenharmony_ci   unsigned full, half, shared;
160bf215546Sopenharmony_ci};
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_civoid ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
163bf215546Sopenharmony_ci                       struct ir3_pressure *max_pressure);
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_cibool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
166bf215546Sopenharmony_ci               struct ir3_liveness **live,
167bf215546Sopenharmony_ci               const struct ir3_pressure *limit_pressure);
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_cibool ir3_lower_spill(struct ir3 *ir);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_civoid ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
172bf215546Sopenharmony_ci                     unsigned half_size, unsigned block_count);
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_civoid ir3_lower_copies(struct ir3_shader_variant *v);
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci/* Register interval datastructure
177bf215546Sopenharmony_ci *
178bf215546Sopenharmony_ci * ir3_reg_ctx is used to track which registers are live. The tricky part is
179bf215546Sopenharmony_ci * that some registers may overlap each other, when registers with overlapping
180bf215546Sopenharmony_ci * live ranges get coalesced. For example, splits will overlap with their
181bf215546Sopenharmony_ci * parent vector and sometimes collect sources will also overlap with the
182bf215546Sopenharmony_ci * collect'ed vector. ir3_merge_regs guarantees for us that none of the
183bf215546Sopenharmony_ci * registers in a merge set that are live at any given point partially
184bf215546Sopenharmony_ci * overlap, which means that we can organize them into a forest. While each
185bf215546Sopenharmony_ci * register has a per-merge-set offset, ir3_merge_regs also computes a
186bf215546Sopenharmony_ci * "global" offset which allows us to throw away the original merge sets and
187bf215546Sopenharmony_ci * think of registers as just intervals in a forest of live intervals. When a
188bf215546Sopenharmony_ci * register becomes live, we insert it into the forest, and when it dies we
189bf215546Sopenharmony_ci * remove it from the forest (and then its children get moved up a level). We
190bf215546Sopenharmony_ci * use red-black trees to keep track of each level of the forest, so insertion
191bf215546Sopenharmony_ci * and deletion should be fast operations. ir3_reg_ctx handles all the
192bf215546Sopenharmony_ci * internal bookkeeping for this, so that it can be shared between RA,
193bf215546Sopenharmony_ci * spilling, and register pressure tracking.
194bf215546Sopenharmony_ci */
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_cistruct ir3_reg_interval {
197bf215546Sopenharmony_ci   struct rb_node node;
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   struct rb_tree children;
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   struct ir3_reg_interval *parent;
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   struct ir3_register *reg;
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   bool inserted;
206bf215546Sopenharmony_ci};
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_cistruct ir3_reg_ctx {
209bf215546Sopenharmony_ci   /* The tree of top-level intervals in the forest. */
210bf215546Sopenharmony_ci   struct rb_tree intervals;
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   /* Users of ir3_reg_ctx need to keep around additional state that is
213bf215546Sopenharmony_ci    * modified when top-level intervals are added or removed. For register
214bf215546Sopenharmony_ci    * pressure tracking, this is just the register pressure, but for RA we
215bf215546Sopenharmony_ci    * need to keep track of the physreg of each top-level interval. These
216bf215546Sopenharmony_ci    * callbacks provide a place to let users deriving from ir3_reg_ctx update
217bf215546Sopenharmony_ci    * their state when top-level intervals are inserted/removed.
218bf215546Sopenharmony_ci    */
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci   /* Called when an interval is added and it turns out to be at the top
221bf215546Sopenharmony_ci    * level.
222bf215546Sopenharmony_ci    */
223bf215546Sopenharmony_ci   void (*interval_add)(struct ir3_reg_ctx *ctx,
224bf215546Sopenharmony_ci                        struct ir3_reg_interval *interval);
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci   /* Called when an interval is deleted from the top level. */
227bf215546Sopenharmony_ci   void (*interval_delete)(struct ir3_reg_ctx *ctx,
228bf215546Sopenharmony_ci                           struct ir3_reg_interval *interval);
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci   /* Called when an interval is deleted and its child becomes top-level.
231bf215546Sopenharmony_ci    */
232bf215546Sopenharmony_ci   void (*interval_readd)(struct ir3_reg_ctx *ctx,
233bf215546Sopenharmony_ci                          struct ir3_reg_interval *parent,
234bf215546Sopenharmony_ci                          struct ir3_reg_interval *child);
235bf215546Sopenharmony_ci};
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_cistatic inline struct ir3_reg_interval *
238bf215546Sopenharmony_ciir3_rb_node_to_interval(struct rb_node *node)
239bf215546Sopenharmony_ci{
240bf215546Sopenharmony_ci   return rb_node_data(struct ir3_reg_interval, node, node);
241bf215546Sopenharmony_ci}
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_cistatic inline const struct ir3_reg_interval *
244bf215546Sopenharmony_ciir3_rb_node_to_interval_const(const struct rb_node *node)
245bf215546Sopenharmony_ci{
246bf215546Sopenharmony_ci   return rb_node_data(struct ir3_reg_interval, node, node);
247bf215546Sopenharmony_ci}
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_cistatic inline struct ir3_reg_interval *
250bf215546Sopenharmony_ciir3_reg_interval_next(struct ir3_reg_interval *interval)
251bf215546Sopenharmony_ci{
252bf215546Sopenharmony_ci   struct rb_node *next = rb_node_next(&interval->node);
253bf215546Sopenharmony_ci   return next ? ir3_rb_node_to_interval(next) : NULL;
254bf215546Sopenharmony_ci}
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_cistatic inline struct ir3_reg_interval *
257bf215546Sopenharmony_ciir3_reg_interval_next_or_null(struct ir3_reg_interval *interval)
258bf215546Sopenharmony_ci{
259bf215546Sopenharmony_ci   return interval ? ir3_reg_interval_next(interval) : NULL;
260bf215546Sopenharmony_ci}
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_cistatic inline void
263bf215546Sopenharmony_ciir3_reg_interval_init(struct ir3_reg_interval *interval,
264bf215546Sopenharmony_ci                      struct ir3_register *reg)
265bf215546Sopenharmony_ci{
266bf215546Sopenharmony_ci   rb_tree_init(&interval->children);
267bf215546Sopenharmony_ci   interval->reg = reg;
268bf215546Sopenharmony_ci   interval->parent = NULL;
269bf215546Sopenharmony_ci   interval->inserted = false;
270bf215546Sopenharmony_ci}
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_civoid ir3_reg_interval_dump(struct log_stream *stream,
273bf215546Sopenharmony_ci                           struct ir3_reg_interval *interval);
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_civoid ir3_reg_interval_insert(struct ir3_reg_ctx *ctx,
276bf215546Sopenharmony_ci                             struct ir3_reg_interval *interval);
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_civoid ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
279bf215546Sopenharmony_ci                             struct ir3_reg_interval *interval);
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_civoid ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,
282bf215546Sopenharmony_ci                                 struct ir3_reg_interval *interval);
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci#endif
285