1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#ifndef _IR3_RA_H 25bf215546Sopenharmony_ci#define _IR3_RA_H 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/rb_tree.h" 28bf215546Sopenharmony_ci#include "ir3.h" 29bf215546Sopenharmony_ci#include "ir3_compiler.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#ifdef DEBUG 32bf215546Sopenharmony_ci#define RA_DEBUG (ir3_shader_debug & IR3_DBG_RAMSGS) 33bf215546Sopenharmony_ci#else 34bf215546Sopenharmony_ci#define RA_DEBUG 0 35bf215546Sopenharmony_ci#endif 36bf215546Sopenharmony_ci#define d(fmt, ...) \ 37bf215546Sopenharmony_ci do { \ 38bf215546Sopenharmony_ci if (RA_DEBUG) { \ 39bf215546Sopenharmony_ci mesa_logi("RA: " fmt, ##__VA_ARGS__); \ 40bf215546Sopenharmony_ci } \ 41bf215546Sopenharmony_ci } while (0) 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci#define di(instr, fmt, ...) \ 44bf215546Sopenharmony_ci do { \ 45bf215546Sopenharmony_ci if (RA_DEBUG) { \ 46bf215546Sopenharmony_ci struct log_stream *stream = mesa_log_streami(); \ 47bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "RA: " fmt ": ", ##__VA_ARGS__); \ 48bf215546Sopenharmony_ci ir3_print_instr_stream(stream, instr); \ 49bf215546Sopenharmony_ci mesa_log_stream_destroy(stream); \ 50bf215546Sopenharmony_ci } \ 51bf215546Sopenharmony_ci } while (0) 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_citypedef uint16_t physreg_t; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_cistatic inline unsigned 56bf215546Sopenharmony_cira_physreg_to_num(physreg_t physreg, unsigned flags) 57bf215546Sopenharmony_ci{ 58bf215546Sopenharmony_ci if (!(flags & IR3_REG_HALF)) 59bf215546Sopenharmony_ci physreg /= 2; 60bf215546Sopenharmony_ci if (flags & IR3_REG_SHARED) 61bf215546Sopenharmony_ci physreg += 48 * 4; 62bf215546Sopenharmony_ci return physreg; 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_cistatic inline physreg_t 66bf215546Sopenharmony_cira_num_to_physreg(unsigned num, unsigned flags) 67bf215546Sopenharmony_ci{ 68bf215546Sopenharmony_ci if (flags & IR3_REG_SHARED) 69bf215546Sopenharmony_ci num -= 48 * 4; 70bf215546Sopenharmony_ci if (!(flags & IR3_REG_HALF)) 71bf215546Sopenharmony_ci num *= 2; 72bf215546Sopenharmony_ci return num; 73bf215546Sopenharmony_ci} 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_cistatic inline unsigned 76bf215546Sopenharmony_cira_reg_get_num(const struct ir3_register *reg) 77bf215546Sopenharmony_ci{ 78bf215546Sopenharmony_ci return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num; 79bf215546Sopenharmony_ci} 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_cistatic inline physreg_t 82bf215546Sopenharmony_cira_reg_get_physreg(const struct ir3_register *reg) 83bf215546Sopenharmony_ci{ 84bf215546Sopenharmony_ci return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags); 85bf215546Sopenharmony_ci} 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_cistatic inline bool 88bf215546Sopenharmony_cidef_is_gpr(const struct ir3_register *reg) 89bf215546Sopenharmony_ci{ 90bf215546Sopenharmony_ci return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0; 91bf215546Sopenharmony_ci} 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci/* Note: don't count undef as a source. 94bf215546Sopenharmony_ci */ 95bf215546Sopenharmony_cistatic inline bool 96bf215546Sopenharmony_cira_reg_is_src(const struct ir3_register *reg) 97bf215546Sopenharmony_ci{ 98bf215546Sopenharmony_ci return (reg->flags & IR3_REG_SSA) && reg->def && def_is_gpr(reg->def); 99bf215546Sopenharmony_ci} 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_cistatic inline bool 102bf215546Sopenharmony_cira_reg_is_dst(const struct ir3_register *reg) 103bf215546Sopenharmony_ci{ 104bf215546Sopenharmony_ci return (reg->flags & IR3_REG_SSA) && def_is_gpr(reg) && 105bf215546Sopenharmony_ci ((reg->flags & IR3_REG_ARRAY) || reg->wrmask); 106bf215546Sopenharmony_ci} 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci/* Iterators for sources and destinations which: 109bf215546Sopenharmony_ci * - Don't include fake sources (irrelevant for RA) 110bf215546Sopenharmony_ci * - Don't include non-SSA sources (immediates and constants, also irrelevant) 111bf215546Sopenharmony_ci */ 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci#define ra_foreach_src_n(__srcreg, __n, __instr) \ 114bf215546Sopenharmony_ci foreach_src_n(__srcreg, __n, __instr) \ 115bf215546Sopenharmony_ci if (ra_reg_is_src(__srcreg)) 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci#define ra_foreach_src(__srcreg, __instr) \ 118bf215546Sopenharmony_ci ra_foreach_src_n(__srcreg, __i, __instr) 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci#define ra_foreach_src_rev(__srcreg, __instr) \ 121bf215546Sopenharmony_ci for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \ 122bf215546Sopenharmony_ci for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0; \ 123bf215546Sopenharmony_ci __i--) \ 124bf215546Sopenharmony_ci if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i]))) 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci#define ra_foreach_dst_n(__dstreg, __n, __instr) \ 127bf215546Sopenharmony_ci foreach_dst_n(__dstreg, __n, __instr) \ 128bf215546Sopenharmony_ci if (ra_reg_is_dst(__dstreg)) 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci#define ra_foreach_dst(__dstreg, __instr) \ 131bf215546Sopenharmony_ci ra_foreach_dst_n(__dstreg, __i, __instr) 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci#define RA_HALF_SIZE (4 * 48) 134bf215546Sopenharmony_ci#define RA_FULL_SIZE (4 * 48 * 2) 135bf215546Sopenharmony_ci#define RA_SHARED_SIZE (2 * 4 * 8) 136bf215546Sopenharmony_ci#define RA_MAX_FILE_SIZE RA_FULL_SIZE 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_cistruct ir3_liveness { 139bf215546Sopenharmony_ci unsigned block_count; 140bf215546Sopenharmony_ci unsigned interval_offset; 141bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_register *, definitions); 142bf215546Sopenharmony_ci DECLARE_ARRAY(BITSET_WORD *, live_out); 143bf215546Sopenharmony_ci DECLARE_ARRAY(BITSET_WORD *, live_in); 144bf215546Sopenharmony_ci}; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_cistruct ir3_liveness *ir3_calc_liveness(void *mem_ctx, struct ir3 *ir); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_cibool ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def, 149bf215546Sopenharmony_ci struct ir3_instruction *instr); 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_civoid ir3_create_parallel_copies(struct ir3 *ir); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_civoid ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_civoid ir3_force_merge(struct ir3_register *a, struct ir3_register *b, 156bf215546Sopenharmony_ci int b_offset); 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_cistruct ir3_pressure { 159bf215546Sopenharmony_ci unsigned full, half, shared; 160bf215546Sopenharmony_ci}; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_civoid ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live, 163bf215546Sopenharmony_ci struct ir3_pressure *max_pressure); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_cibool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v, 166bf215546Sopenharmony_ci struct ir3_liveness **live, 167bf215546Sopenharmony_ci const struct ir3_pressure *limit_pressure); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_cibool ir3_lower_spill(struct ir3 *ir); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_civoid ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, 172bf215546Sopenharmony_ci unsigned half_size, unsigned block_count); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_civoid ir3_lower_copies(struct ir3_shader_variant *v); 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci/* Register interval datastructure 177bf215546Sopenharmony_ci * 178bf215546Sopenharmony_ci * ir3_reg_ctx is used to track which registers are live. The tricky part is 179bf215546Sopenharmony_ci * that some registers may overlap each other, when registers with overlapping 180bf215546Sopenharmony_ci * live ranges get coalesced. For example, splits will overlap with their 181bf215546Sopenharmony_ci * parent vector and sometimes collect sources will also overlap with the 182bf215546Sopenharmony_ci * collect'ed vector. ir3_merge_regs guarantees for us that none of the 183bf215546Sopenharmony_ci * registers in a merge set that are live at any given point partially 184bf215546Sopenharmony_ci * overlap, which means that we can organize them into a forest. While each 185bf215546Sopenharmony_ci * register has a per-merge-set offset, ir3_merge_regs also computes a 186bf215546Sopenharmony_ci * "global" offset which allows us to throw away the original merge sets and 187bf215546Sopenharmony_ci * think of registers as just intervals in a forest of live intervals. When a 188bf215546Sopenharmony_ci * register becomes live, we insert it into the forest, and when it dies we 189bf215546Sopenharmony_ci * remove it from the forest (and then its children get moved up a level). We 190bf215546Sopenharmony_ci * use red-black trees to keep track of each level of the forest, so insertion 191bf215546Sopenharmony_ci * and deletion should be fast operations. ir3_reg_ctx handles all the 192bf215546Sopenharmony_ci * internal bookkeeping for this, so that it can be shared between RA, 193bf215546Sopenharmony_ci * spilling, and register pressure tracking. 194bf215546Sopenharmony_ci */ 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_cistruct ir3_reg_interval { 197bf215546Sopenharmony_ci struct rb_node node; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci struct rb_tree children; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci struct ir3_reg_interval *parent; 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci struct ir3_register *reg; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci bool inserted; 206bf215546Sopenharmony_ci}; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_cistruct ir3_reg_ctx { 209bf215546Sopenharmony_ci /* The tree of top-level intervals in the forest. */ 210bf215546Sopenharmony_ci struct rb_tree intervals; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci /* Users of ir3_reg_ctx need to keep around additional state that is 213bf215546Sopenharmony_ci * modified when top-level intervals are added or removed. For register 214bf215546Sopenharmony_ci * pressure tracking, this is just the register pressure, but for RA we 215bf215546Sopenharmony_ci * need to keep track of the physreg of each top-level interval. These 216bf215546Sopenharmony_ci * callbacks provide a place to let users deriving from ir3_reg_ctx update 217bf215546Sopenharmony_ci * their state when top-level intervals are inserted/removed. 218bf215546Sopenharmony_ci */ 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci /* Called when an interval is added and it turns out to be at the top 221bf215546Sopenharmony_ci * level. 222bf215546Sopenharmony_ci */ 223bf215546Sopenharmony_ci void (*interval_add)(struct ir3_reg_ctx *ctx, 224bf215546Sopenharmony_ci struct ir3_reg_interval *interval); 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci /* Called when an interval is deleted from the top level. */ 227bf215546Sopenharmony_ci void (*interval_delete)(struct ir3_reg_ctx *ctx, 228bf215546Sopenharmony_ci struct ir3_reg_interval *interval); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci /* Called when an interval is deleted and its child becomes top-level. 231bf215546Sopenharmony_ci */ 232bf215546Sopenharmony_ci void (*interval_readd)(struct ir3_reg_ctx *ctx, 233bf215546Sopenharmony_ci struct ir3_reg_interval *parent, 234bf215546Sopenharmony_ci struct ir3_reg_interval *child); 235bf215546Sopenharmony_ci}; 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_cistatic inline struct ir3_reg_interval * 238bf215546Sopenharmony_ciir3_rb_node_to_interval(struct rb_node *node) 239bf215546Sopenharmony_ci{ 240bf215546Sopenharmony_ci return rb_node_data(struct ir3_reg_interval, node, node); 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_cistatic inline const struct ir3_reg_interval * 244bf215546Sopenharmony_ciir3_rb_node_to_interval_const(const struct rb_node *node) 245bf215546Sopenharmony_ci{ 246bf215546Sopenharmony_ci return rb_node_data(struct ir3_reg_interval, node, node); 247bf215546Sopenharmony_ci} 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_cistatic inline struct ir3_reg_interval * 250bf215546Sopenharmony_ciir3_reg_interval_next(struct ir3_reg_interval *interval) 251bf215546Sopenharmony_ci{ 252bf215546Sopenharmony_ci struct rb_node *next = rb_node_next(&interval->node); 253bf215546Sopenharmony_ci return next ? ir3_rb_node_to_interval(next) : NULL; 254bf215546Sopenharmony_ci} 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_cistatic inline struct ir3_reg_interval * 257bf215546Sopenharmony_ciir3_reg_interval_next_or_null(struct ir3_reg_interval *interval) 258bf215546Sopenharmony_ci{ 259bf215546Sopenharmony_ci return interval ? ir3_reg_interval_next(interval) : NULL; 260bf215546Sopenharmony_ci} 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_cistatic inline void 263bf215546Sopenharmony_ciir3_reg_interval_init(struct ir3_reg_interval *interval, 264bf215546Sopenharmony_ci struct ir3_register *reg) 265bf215546Sopenharmony_ci{ 266bf215546Sopenharmony_ci rb_tree_init(&interval->children); 267bf215546Sopenharmony_ci interval->reg = reg; 268bf215546Sopenharmony_ci interval->parent = NULL; 269bf215546Sopenharmony_ci interval->inserted = false; 270bf215546Sopenharmony_ci} 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_civoid ir3_reg_interval_dump(struct log_stream *stream, 273bf215546Sopenharmony_ci struct ir3_reg_interval *interval); 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_civoid ir3_reg_interval_insert(struct ir3_reg_ctx *ctx, 276bf215546Sopenharmony_ci struct ir3_reg_interval *interval); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_civoid ir3_reg_interval_remove(struct ir3_reg_ctx *ctx, 279bf215546Sopenharmony_ci struct ir3_reg_interval *interval); 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_civoid ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx, 282bf215546Sopenharmony_ci struct ir3_reg_interval *interval); 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci#endif 285