1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation 3bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22bf215546Sopenharmony_ci * SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "ir3_ra.h" 26bf215546Sopenharmony_ci#include "util/rb_tree.h" 27bf215546Sopenharmony_ci#include "util/u_math.h" 28bf215546Sopenharmony_ci#include "ir3_shader.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci/* This file implements an SSA-based register allocator. Unlike other 31bf215546Sopenharmony_ci * SSA-based allocators, it handles vector split/collect "smartly," meaning 32bf215546Sopenharmony_ci * that multiple values may share the same register interval. From the 33bf215546Sopenharmony_ci * perspective of the allocator itself, only the top-level intervals matter, 34bf215546Sopenharmony_ci * and the allocator is only concerned with allocating top-level intervals, 35bf215546Sopenharmony_ci * which may mean moving other top-level intervals around. Other intervals, 36bf215546Sopenharmony_ci * like the destination of a split instruction or the source of a collect 37bf215546Sopenharmony_ci * instruction, are "locked" to their parent interval. The details of this are 38bf215546Sopenharmony_ci * mostly handled by ir3_merge_regs and ir3_reg_ctx. 39bf215546Sopenharmony_ci * 40bf215546Sopenharmony_ci * We currently don't do any backtracking, but we do use the merge sets as a 41bf215546Sopenharmony_ci * form of affinity to try to avoid moves from phis/splits/collects. Each 42bf215546Sopenharmony_ci * merge set is what a more "classic" graph-coloring or live-range based 43bf215546Sopenharmony_ci * allocator would consider a single register, but here we use it as merely a 44bf215546Sopenharmony_ci * hint, except when multiple overlapping values are live at the same time. 45bf215546Sopenharmony_ci * Each merge set has a "preferred" register, and we try to honor that when 46bf215546Sopenharmony_ci * allocating values in the merge set. 47bf215546Sopenharmony_ci */ 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci/* ir3_reg_ctx implementation. */ 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_cistatic int 52bf215546Sopenharmony_ciir3_reg_interval_cmp(const struct rb_node *node, const void *data) 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci unsigned reg = *(const unsigned *)data; 55bf215546Sopenharmony_ci const struct ir3_reg_interval *interval = 56bf215546Sopenharmony_ci ir3_rb_node_to_interval_const(node); 57bf215546Sopenharmony_ci if (interval->reg->interval_start > reg) 58bf215546Sopenharmony_ci return -1; 59bf215546Sopenharmony_ci else if (interval->reg->interval_end <= reg) 60bf215546Sopenharmony_ci return 1; 61bf215546Sopenharmony_ci else 62bf215546Sopenharmony_ci return 0; 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_cistatic struct ir3_reg_interval * 66bf215546Sopenharmony_ciir3_reg_interval_search(struct rb_tree *tree, unsigned offset) 67bf215546Sopenharmony_ci{ 68bf215546Sopenharmony_ci struct rb_node *node = rb_tree_search(tree, &offset, ir3_reg_interval_cmp); 69bf215546Sopenharmony_ci return node ? ir3_rb_node_to_interval(node) : NULL; 70bf215546Sopenharmony_ci} 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_cistatic struct ir3_reg_interval * 73bf215546Sopenharmony_ciir3_reg_interval_search_sloppy(struct rb_tree *tree, unsigned offset) 74bf215546Sopenharmony_ci{ 75bf215546Sopenharmony_ci struct rb_node *node = 76bf215546Sopenharmony_ci rb_tree_search_sloppy(tree, &offset, ir3_reg_interval_cmp); 77bf215546Sopenharmony_ci return node ? ir3_rb_node_to_interval(node) : NULL; 78bf215546Sopenharmony_ci} 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci/* Get the interval covering the reg, or the closest to the right if it 81bf215546Sopenharmony_ci * doesn't exist. 82bf215546Sopenharmony_ci */ 83bf215546Sopenharmony_cistatic struct ir3_reg_interval * 84bf215546Sopenharmony_ciir3_reg_interval_search_right(struct rb_tree *tree, unsigned offset) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci struct ir3_reg_interval *interval = 87bf215546Sopenharmony_ci ir3_reg_interval_search_sloppy(tree, offset); 88bf215546Sopenharmony_ci if (!interval) { 89bf215546Sopenharmony_ci return NULL; 90bf215546Sopenharmony_ci } else if (interval->reg->interval_end > offset) { 91bf215546Sopenharmony_ci return interval; 92bf215546Sopenharmony_ci } else { 93bf215546Sopenharmony_ci /* There is no interval covering reg, and ra_file_search_sloppy() 94bf215546Sopenharmony_ci * returned the closest range to the left, so the next interval to the 95bf215546Sopenharmony_ci * right should be the closest to the right. 96bf215546Sopenharmony_ci */ 97bf215546Sopenharmony_ci return ir3_reg_interval_next_or_null(interval); 98bf215546Sopenharmony_ci } 99bf215546Sopenharmony_ci} 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_cistatic int 102bf215546Sopenharmony_ciir3_reg_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b) 103bf215546Sopenharmony_ci{ 104bf215546Sopenharmony_ci const struct ir3_reg_interval *a = ir3_rb_node_to_interval_const(_a); 105bf215546Sopenharmony_ci const struct ir3_reg_interval *b = ir3_rb_node_to_interval_const(_b); 106bf215546Sopenharmony_ci return b->reg->interval_start - a->reg->interval_start; 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_cistatic void 110bf215546Sopenharmony_ciinterval_insert(struct ir3_reg_ctx *ctx, struct rb_tree *tree, 111bf215546Sopenharmony_ci struct ir3_reg_interval *interval) 112bf215546Sopenharmony_ci{ 113bf215546Sopenharmony_ci struct ir3_reg_interval *right = 114bf215546Sopenharmony_ci ir3_reg_interval_search_right(tree, interval->reg->interval_start); 115bf215546Sopenharmony_ci if (right && right->reg->interval_start < interval->reg->interval_end) { 116bf215546Sopenharmony_ci /* We disallow trees where different members have different half-ness. 117bf215546Sopenharmony_ci * This means that we can't treat bitcasts as copies like normal 118bf215546Sopenharmony_ci * split/collect, so something like this would require an extra copy 119bf215546Sopenharmony_ci * in mergedregs mode, and count as 4 half-units of register pressure 120bf215546Sopenharmony_ci * instead of 2: 121bf215546Sopenharmony_ci * 122bf215546Sopenharmony_ci * f16vec2 foo = unpackFloat2x16(bar) 123bf215546Sopenharmony_ci * ... = foo.x 124bf215546Sopenharmony_ci * ... = bar 125bf215546Sopenharmony_ci * 126bf215546Sopenharmony_ci * However, relaxing this rule would open a huge can of worms. What 127bf215546Sopenharmony_ci * happens when there's a vector of 16 things, and the fifth element 128bf215546Sopenharmony_ci * has been bitcasted as a half-reg? Would that element alone have to 129bf215546Sopenharmony_ci * be small enough to be used as a half-reg source? Let's keep that 130bf215546Sopenharmony_ci * can of worms firmly shut for now. 131bf215546Sopenharmony_ci */ 132bf215546Sopenharmony_ci assert((interval->reg->flags & IR3_REG_HALF) == 133bf215546Sopenharmony_ci (right->reg->flags & IR3_REG_HALF)); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci if (right->reg->interval_end <= interval->reg->interval_end && 136bf215546Sopenharmony_ci right->reg->interval_start >= interval->reg->interval_start) { 137bf215546Sopenharmony_ci /* Check if we're inserting something that's already inserted */ 138bf215546Sopenharmony_ci assert(interval != right); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci /* "right" is contained in "interval" and must become a child of 141bf215546Sopenharmony_ci * it. There may be further children too. 142bf215546Sopenharmony_ci */ 143bf215546Sopenharmony_ci for (struct ir3_reg_interval *next = ir3_reg_interval_next(right); 144bf215546Sopenharmony_ci right && right->reg->interval_start < interval->reg->interval_end; 145bf215546Sopenharmony_ci right = next, next = ir3_reg_interval_next_or_null(next)) { 146bf215546Sopenharmony_ci /* "right" must be contained in "interval." */ 147bf215546Sopenharmony_ci assert(right->reg->interval_end <= interval->reg->interval_end); 148bf215546Sopenharmony_ci assert((interval->reg->flags & IR3_REG_HALF) == 149bf215546Sopenharmony_ci (right->reg->flags & IR3_REG_HALF)); 150bf215546Sopenharmony_ci if (!right->parent) 151bf215546Sopenharmony_ci ctx->interval_delete(ctx, right); 152bf215546Sopenharmony_ci right->parent = interval; 153bf215546Sopenharmony_ci rb_tree_remove(tree, &right->node); 154bf215546Sopenharmony_ci rb_tree_insert(&interval->children, &right->node, 155bf215546Sopenharmony_ci ir3_reg_interval_insert_cmp); 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci } else { 158bf215546Sopenharmony_ci /* "right" must contain "interval," since intervals must form a 159bf215546Sopenharmony_ci * tree. 160bf215546Sopenharmony_ci */ 161bf215546Sopenharmony_ci assert(right->reg->interval_start <= interval->reg->interval_start); 162bf215546Sopenharmony_ci interval->parent = right; 163bf215546Sopenharmony_ci interval_insert(ctx, &right->children, interval); 164bf215546Sopenharmony_ci return; 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci } 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci if (!interval->parent) 169bf215546Sopenharmony_ci ctx->interval_add(ctx, interval); 170bf215546Sopenharmony_ci rb_tree_insert(tree, &interval->node, ir3_reg_interval_insert_cmp); 171bf215546Sopenharmony_ci interval->inserted = true; 172bf215546Sopenharmony_ci} 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_civoid 175bf215546Sopenharmony_ciir3_reg_interval_insert(struct ir3_reg_ctx *ctx, 176bf215546Sopenharmony_ci struct ir3_reg_interval *interval) 177bf215546Sopenharmony_ci{ 178bf215546Sopenharmony_ci rb_tree_init(&interval->children); 179bf215546Sopenharmony_ci interval->parent = NULL; 180bf215546Sopenharmony_ci interval_insert(ctx, &ctx->intervals, interval); 181bf215546Sopenharmony_ci} 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci/* Call after ir3_reg_interval_remove_temp() to reinsert the interval */ 184bf215546Sopenharmony_cistatic void 185bf215546Sopenharmony_ciir3_reg_interval_reinsert(struct ir3_reg_ctx *ctx, 186bf215546Sopenharmony_ci struct ir3_reg_interval *interval) 187bf215546Sopenharmony_ci{ 188bf215546Sopenharmony_ci interval->parent = NULL; 189bf215546Sopenharmony_ci interval_insert(ctx, &ctx->intervals, interval); 190bf215546Sopenharmony_ci} 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_civoid 193bf215546Sopenharmony_ciir3_reg_interval_remove(struct ir3_reg_ctx *ctx, 194bf215546Sopenharmony_ci struct ir3_reg_interval *interval) 195bf215546Sopenharmony_ci{ 196bf215546Sopenharmony_ci if (interval->parent) { 197bf215546Sopenharmony_ci rb_tree_remove(&interval->parent->children, &interval->node); 198bf215546Sopenharmony_ci } else { 199bf215546Sopenharmony_ci ctx->interval_delete(ctx, interval); 200bf215546Sopenharmony_ci rb_tree_remove(&ctx->intervals, &interval->node); 201bf215546Sopenharmony_ci } 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci rb_tree_foreach_safe (struct ir3_reg_interval, child, &interval->children, 204bf215546Sopenharmony_ci node) { 205bf215546Sopenharmony_ci rb_tree_remove(&interval->children, &child->node); 206bf215546Sopenharmony_ci child->parent = interval->parent; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci if (interval->parent) { 209bf215546Sopenharmony_ci rb_tree_insert(&child->parent->children, &child->node, 210bf215546Sopenharmony_ci ir3_reg_interval_insert_cmp); 211bf215546Sopenharmony_ci } else { 212bf215546Sopenharmony_ci ctx->interval_readd(ctx, interval, child); 213bf215546Sopenharmony_ci rb_tree_insert(&ctx->intervals, &child->node, 214bf215546Sopenharmony_ci ir3_reg_interval_insert_cmp); 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci interval->inserted = false; 219bf215546Sopenharmony_ci} 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_cistatic void 222bf215546Sopenharmony_ci_mark_free(struct ir3_reg_interval *interval) 223bf215546Sopenharmony_ci{ 224bf215546Sopenharmony_ci interval->inserted = false; 225bf215546Sopenharmony_ci rb_tree_foreach (struct ir3_reg_interval, child, &interval->children, node) { 226bf215546Sopenharmony_ci _mark_free(child); 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci} 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci/* Remove an interval and all its children from the tree. */ 231bf215546Sopenharmony_civoid 232bf215546Sopenharmony_ciir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx, 233bf215546Sopenharmony_ci struct ir3_reg_interval *interval) 234bf215546Sopenharmony_ci{ 235bf215546Sopenharmony_ci assert(!interval->parent); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci ctx->interval_delete(ctx, interval); 238bf215546Sopenharmony_ci rb_tree_remove(&ctx->intervals, &interval->node); 239bf215546Sopenharmony_ci _mark_free(interval); 240bf215546Sopenharmony_ci} 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci/* Used when popping an interval to be shuffled around. Don't disturb children 243bf215546Sopenharmony_ci * so that it can be later reinserted. 244bf215546Sopenharmony_ci */ 245bf215546Sopenharmony_cistatic void 246bf215546Sopenharmony_ciir3_reg_interval_remove_temp(struct ir3_reg_ctx *ctx, 247bf215546Sopenharmony_ci struct ir3_reg_interval *interval) 248bf215546Sopenharmony_ci{ 249bf215546Sopenharmony_ci assert(!interval->parent); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci ctx->interval_delete(ctx, interval); 252bf215546Sopenharmony_ci rb_tree_remove(&ctx->intervals, &interval->node); 253bf215546Sopenharmony_ci} 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_cistatic void 256bf215546Sopenharmony_ciinterval_dump(struct log_stream *stream, struct ir3_reg_interval *interval, 257bf215546Sopenharmony_ci unsigned indent) 258bf215546Sopenharmony_ci{ 259bf215546Sopenharmony_ci for (unsigned i = 0; i < indent; i++) 260bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "\t"); 261bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "reg %u start %u\n", interval->reg->name, 262bf215546Sopenharmony_ci interval->reg->interval_start); 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci rb_tree_foreach (struct ir3_reg_interval, child, &interval->children, node) { 265bf215546Sopenharmony_ci interval_dump(stream, child, indent + 1); 266bf215546Sopenharmony_ci } 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci for (unsigned i = 0; i < indent; i++) 269bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "\t"); 270bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "reg %u end %u\n", interval->reg->name, 271bf215546Sopenharmony_ci interval->reg->interval_end); 272bf215546Sopenharmony_ci} 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_civoid 275bf215546Sopenharmony_ciir3_reg_interval_dump(struct log_stream *stream, struct ir3_reg_interval *interval) 276bf215546Sopenharmony_ci{ 277bf215546Sopenharmony_ci interval_dump(stream, interval, 0); 278bf215546Sopenharmony_ci} 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci/* These are the core datastructures used by the register allocator. First 281bf215546Sopenharmony_ci * ra_interval and ra_file, which are used for intra-block tracking and use 282bf215546Sopenharmony_ci * the ir3_reg_ctx infrastructure: 283bf215546Sopenharmony_ci */ 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_cistruct ra_interval { 286bf215546Sopenharmony_ci struct ir3_reg_interval interval; 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci struct rb_node physreg_node; 289bf215546Sopenharmony_ci physreg_t physreg_start, physreg_end; 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci /* True if this is a source of the current instruction which is entirely 292bf215546Sopenharmony_ci * killed. This means we can allocate the dest over it, but we can't break 293bf215546Sopenharmony_ci * it up. 294bf215546Sopenharmony_ci */ 295bf215546Sopenharmony_ci bool is_killed; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci /* True if this interval cannot be moved from its position. This is only 298bf215546Sopenharmony_ci * used for precolored inputs to ensure that other inputs don't get 299bf215546Sopenharmony_ci * allocated on top of them. 300bf215546Sopenharmony_ci */ 301bf215546Sopenharmony_ci bool frozen; 302bf215546Sopenharmony_ci}; 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_cistruct ra_file { 305bf215546Sopenharmony_ci struct ir3_reg_ctx reg_ctx; 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci BITSET_DECLARE(available, RA_MAX_FILE_SIZE); 308bf215546Sopenharmony_ci BITSET_DECLARE(available_to_evict, RA_MAX_FILE_SIZE); 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci struct rb_tree physreg_intervals; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci unsigned size; 313bf215546Sopenharmony_ci unsigned start; 314bf215546Sopenharmony_ci}; 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci/* State for inter-block tracking. When we split a live range to make space 317bf215546Sopenharmony_ci * for a vector, we may need to insert fixup code when a block has multiple 318bf215546Sopenharmony_ci * predecessors that have moved the same live value to different registers. 319bf215546Sopenharmony_ci * This keeps track of state required to do that. 320bf215546Sopenharmony_ci */ 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_cistruct ra_block_state { 323bf215546Sopenharmony_ci /* Map of defining ir3_register -> physreg it was allocated to at the end 324bf215546Sopenharmony_ci * of the block. 325bf215546Sopenharmony_ci */ 326bf215546Sopenharmony_ci struct hash_table *renames; 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci /* For loops, we need to process a block before all its predecessors have 329bf215546Sopenharmony_ci * been processed. In particular, we need to pick registers for values 330bf215546Sopenharmony_ci * without knowing if all the predecessors have been renamed. This keeps 331bf215546Sopenharmony_ci * track of the registers we chose so that when we visit the back-edge we 332bf215546Sopenharmony_ci * can move them appropriately. If all predecessors have been visited 333bf215546Sopenharmony_ci * before this block is visited then we don't need to fill this out. This 334bf215546Sopenharmony_ci * is a map from ir3_register -> physreg. 335bf215546Sopenharmony_ci */ 336bf215546Sopenharmony_ci struct hash_table *entry_regs; 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci /* True if the block has been visited and "renames" is complete. 339bf215546Sopenharmony_ci */ 340bf215546Sopenharmony_ci bool visited; 341bf215546Sopenharmony_ci}; 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_cistruct ra_parallel_copy { 344bf215546Sopenharmony_ci struct ra_interval *interval; 345bf215546Sopenharmony_ci physreg_t src; 346bf215546Sopenharmony_ci}; 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci/* The main context: */ 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_cistruct ra_ctx { 351bf215546Sopenharmony_ci /* r0.x - r47.w. On a6xx with merged-regs, hr0.x-hr47.w go into the bottom 352bf215546Sopenharmony_ci * half of this file too. 353bf215546Sopenharmony_ci */ 354bf215546Sopenharmony_ci struct ra_file full; 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci /* hr0.x - hr63.w, only used without merged-regs. */ 357bf215546Sopenharmony_ci struct ra_file half; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci /* Shared regs. */ 360bf215546Sopenharmony_ci struct ra_file shared; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci struct ir3_liveness *live; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci struct ir3_block *block; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci const struct ir3_compiler *compiler; 367bf215546Sopenharmony_ci gl_shader_stage stage; 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci /* Pending moves of top-level intervals that will be emitted once we're 370bf215546Sopenharmony_ci * finished: 371bf215546Sopenharmony_ci */ 372bf215546Sopenharmony_ci DECLARE_ARRAY(struct ra_parallel_copy, parallel_copies); 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci struct ra_interval *intervals; 375bf215546Sopenharmony_ci struct ra_block_state *blocks; 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci bool merged_regs; 378bf215546Sopenharmony_ci}; 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci#define foreach_interval(interval, file) \ 381bf215546Sopenharmony_ci rb_tree_foreach (struct ra_interval, interval, &(file)->physreg_intervals, \ 382bf215546Sopenharmony_ci physreg_node) 383bf215546Sopenharmony_ci#define foreach_interval_rev(interval, file) \ 384bf215546Sopenharmony_ci rb_tree_foreach (struct ra_interval, interval, &(file)->physreg_intervals, \ 385bf215546Sopenharmony_ci physreg_node) 386bf215546Sopenharmony_ci#define foreach_interval_safe(interval, file) \ 387bf215546Sopenharmony_ci rb_tree_foreach_safe (struct ra_interval, interval, \ 388bf215546Sopenharmony_ci &(file)->physreg_intervals, physreg_node) 389bf215546Sopenharmony_ci#define foreach_interval_rev_safe(interval, file) \ 390bf215546Sopenharmony_ci rb_tree_foreach_rev_safe(struct ra_interval, interval, \ 391bf215546Sopenharmony_ci &(file)->physreg_intervals, physreg_node) 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_cistatic struct ra_interval * 394bf215546Sopenharmony_cirb_node_to_interval(struct rb_node *node) 395bf215546Sopenharmony_ci{ 396bf215546Sopenharmony_ci return rb_node_data(struct ra_interval, node, physreg_node); 397bf215546Sopenharmony_ci} 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_cistatic const struct ra_interval * 400bf215546Sopenharmony_cirb_node_to_interval_const(const struct rb_node *node) 401bf215546Sopenharmony_ci{ 402bf215546Sopenharmony_ci return rb_node_data(struct ra_interval, node, physreg_node); 403bf215546Sopenharmony_ci} 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_cistatic struct ra_interval * 406bf215546Sopenharmony_cira_interval_next(struct ra_interval *interval) 407bf215546Sopenharmony_ci{ 408bf215546Sopenharmony_ci struct rb_node *next = rb_node_next(&interval->physreg_node); 409bf215546Sopenharmony_ci return next ? rb_node_to_interval(next) : NULL; 410bf215546Sopenharmony_ci} 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_cistatic struct ra_interval * 413bf215546Sopenharmony_cira_interval_next_or_null(struct ra_interval *interval) 414bf215546Sopenharmony_ci{ 415bf215546Sopenharmony_ci return interval ? ra_interval_next(interval) : NULL; 416bf215546Sopenharmony_ci} 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_cistatic int 419bf215546Sopenharmony_cira_interval_cmp(const struct rb_node *node, const void *data) 420bf215546Sopenharmony_ci{ 421bf215546Sopenharmony_ci physreg_t reg = *(const physreg_t *)data; 422bf215546Sopenharmony_ci const struct ra_interval *interval = rb_node_to_interval_const(node); 423bf215546Sopenharmony_ci if (interval->physreg_start > reg) 424bf215546Sopenharmony_ci return -1; 425bf215546Sopenharmony_ci else if (interval->physreg_end <= reg) 426bf215546Sopenharmony_ci return 1; 427bf215546Sopenharmony_ci else 428bf215546Sopenharmony_ci return 0; 429bf215546Sopenharmony_ci} 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_cistatic struct ra_interval * 432bf215546Sopenharmony_cira_interval_search_sloppy(struct rb_tree *tree, physreg_t reg) 433bf215546Sopenharmony_ci{ 434bf215546Sopenharmony_ci struct rb_node *node = rb_tree_search_sloppy(tree, ®, ra_interval_cmp); 435bf215546Sopenharmony_ci return node ? rb_node_to_interval(node) : NULL; 436bf215546Sopenharmony_ci} 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci/* Get the interval covering the reg, or the closest to the right if it 439bf215546Sopenharmony_ci * doesn't exist. 440bf215546Sopenharmony_ci */ 441bf215546Sopenharmony_cistatic struct ra_interval * 442bf215546Sopenharmony_cira_interval_search_right(struct rb_tree *tree, physreg_t reg) 443bf215546Sopenharmony_ci{ 444bf215546Sopenharmony_ci struct ra_interval *interval = ra_interval_search_sloppy(tree, reg); 445bf215546Sopenharmony_ci if (!interval) { 446bf215546Sopenharmony_ci return NULL; 447bf215546Sopenharmony_ci } else if (interval->physreg_end > reg) { 448bf215546Sopenharmony_ci return interval; 449bf215546Sopenharmony_ci } else { 450bf215546Sopenharmony_ci /* There is no interval covering reg, and ra_file_search_sloppy() 451bf215546Sopenharmony_ci * returned the closest range to the left, so the next interval to the 452bf215546Sopenharmony_ci * right should be the closest to the right. 453bf215546Sopenharmony_ci */ 454bf215546Sopenharmony_ci return ra_interval_next_or_null(interval); 455bf215546Sopenharmony_ci } 456bf215546Sopenharmony_ci} 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_cistatic struct ra_interval * 459bf215546Sopenharmony_cira_file_search_right(struct ra_file *file, physreg_t reg) 460bf215546Sopenharmony_ci{ 461bf215546Sopenharmony_ci return ra_interval_search_right(&file->physreg_intervals, reg); 462bf215546Sopenharmony_ci} 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_cistatic int 465bf215546Sopenharmony_cira_interval_insert_cmp(const struct rb_node *_a, const struct rb_node *_b) 466bf215546Sopenharmony_ci{ 467bf215546Sopenharmony_ci const struct ra_interval *a = rb_node_to_interval_const(_a); 468bf215546Sopenharmony_ci const struct ra_interval *b = rb_node_to_interval_const(_b); 469bf215546Sopenharmony_ci return b->physreg_start - a->physreg_start; 470bf215546Sopenharmony_ci} 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_cistatic struct ra_interval * 473bf215546Sopenharmony_ciir3_reg_interval_to_ra_interval(struct ir3_reg_interval *interval) 474bf215546Sopenharmony_ci{ 475bf215546Sopenharmony_ci return rb_node_data(struct ra_interval, interval, interval); 476bf215546Sopenharmony_ci} 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_cistatic struct ra_file * 479bf215546Sopenharmony_ciir3_reg_ctx_to_file(struct ir3_reg_ctx *ctx) 480bf215546Sopenharmony_ci{ 481bf215546Sopenharmony_ci return rb_node_data(struct ra_file, ctx, reg_ctx); 482bf215546Sopenharmony_ci} 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_cistatic void 485bf215546Sopenharmony_ciinterval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_interval) 486bf215546Sopenharmony_ci{ 487bf215546Sopenharmony_ci struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval); 488bf215546Sopenharmony_ci struct ra_file *file = ir3_reg_ctx_to_file(ctx); 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci /* We can assume in this case that physreg_start/physreg_end is already 491bf215546Sopenharmony_ci * initialized. 492bf215546Sopenharmony_ci */ 493bf215546Sopenharmony_ci for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) { 494bf215546Sopenharmony_ci BITSET_CLEAR(file->available, i); 495bf215546Sopenharmony_ci BITSET_CLEAR(file->available_to_evict, i); 496bf215546Sopenharmony_ci } 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci rb_tree_insert(&file->physreg_intervals, &interval->physreg_node, 499bf215546Sopenharmony_ci ra_interval_insert_cmp); 500bf215546Sopenharmony_ci} 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_cistatic void 503bf215546Sopenharmony_ciinterval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_interval) 504bf215546Sopenharmony_ci{ 505bf215546Sopenharmony_ci struct ra_interval *interval = ir3_reg_interval_to_ra_interval(_interval); 506bf215546Sopenharmony_ci struct ra_file *file = ir3_reg_ctx_to_file(ctx); 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) { 509bf215546Sopenharmony_ci BITSET_SET(file->available, i); 510bf215546Sopenharmony_ci BITSET_SET(file->available_to_evict, i); 511bf215546Sopenharmony_ci } 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci rb_tree_remove(&file->physreg_intervals, &interval->physreg_node); 514bf215546Sopenharmony_ci} 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_cistatic void 517bf215546Sopenharmony_ciinterval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *_parent, 518bf215546Sopenharmony_ci struct ir3_reg_interval *_child) 519bf215546Sopenharmony_ci{ 520bf215546Sopenharmony_ci struct ra_interval *parent = ir3_reg_interval_to_ra_interval(_parent); 521bf215546Sopenharmony_ci struct ra_interval *child = ir3_reg_interval_to_ra_interval(_child); 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci child->physreg_start = 524bf215546Sopenharmony_ci parent->physreg_start + (child->interval.reg->interval_start - 525bf215546Sopenharmony_ci parent->interval.reg->interval_start); 526bf215546Sopenharmony_ci child->physreg_end = 527bf215546Sopenharmony_ci child->physreg_start + 528bf215546Sopenharmony_ci (child->interval.reg->interval_end - child->interval.reg->interval_start); 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci interval_add(ctx, _child); 531bf215546Sopenharmony_ci} 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_cistatic void 534bf215546Sopenharmony_cira_file_init(struct ra_file *file) 535bf215546Sopenharmony_ci{ 536bf215546Sopenharmony_ci for (unsigned i = 0; i < file->size; i++) { 537bf215546Sopenharmony_ci BITSET_SET(file->available, i); 538bf215546Sopenharmony_ci BITSET_SET(file->available_to_evict, i); 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_ci rb_tree_init(&file->reg_ctx.intervals); 542bf215546Sopenharmony_ci rb_tree_init(&file->physreg_intervals); 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci file->reg_ctx.interval_add = interval_add; 545bf215546Sopenharmony_ci file->reg_ctx.interval_delete = interval_delete; 546bf215546Sopenharmony_ci file->reg_ctx.interval_readd = interval_readd; 547bf215546Sopenharmony_ci} 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_cistatic void 550bf215546Sopenharmony_cira_file_insert(struct ra_file *file, struct ra_interval *interval) 551bf215546Sopenharmony_ci{ 552bf215546Sopenharmony_ci assert(interval->physreg_start < interval->physreg_end); 553bf215546Sopenharmony_ci assert(interval->physreg_end <= file->size); 554bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_HALF) 555bf215546Sopenharmony_ci assert(interval->physreg_end <= RA_HALF_SIZE); 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci ir3_reg_interval_insert(&file->reg_ctx, &interval->interval); 558bf215546Sopenharmony_ci} 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_cistatic void 561bf215546Sopenharmony_cira_file_remove(struct ra_file *file, struct ra_interval *interval) 562bf215546Sopenharmony_ci{ 563bf215546Sopenharmony_ci ir3_reg_interval_remove(&file->reg_ctx, &interval->interval); 564bf215546Sopenharmony_ci} 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_cistatic void 567bf215546Sopenharmony_cira_file_mark_killed(struct ra_file *file, struct ra_interval *interval) 568bf215546Sopenharmony_ci{ 569bf215546Sopenharmony_ci assert(!interval->interval.parent); 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) { 572bf215546Sopenharmony_ci BITSET_SET(file->available, i); 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_ci interval->is_killed = true; 576bf215546Sopenharmony_ci} 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_cistatic void 579bf215546Sopenharmony_cira_file_unmark_killed(struct ra_file *file, struct ra_interval *interval) 580bf215546Sopenharmony_ci{ 581bf215546Sopenharmony_ci assert(!interval->interval.parent); 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci for (physreg_t i = interval->physreg_start; i < interval->physreg_end; i++) { 584bf215546Sopenharmony_ci BITSET_CLEAR(file->available, i); 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci interval->is_killed = false; 588bf215546Sopenharmony_ci} 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_cistatic physreg_t 591bf215546Sopenharmony_cira_interval_get_physreg(const struct ra_interval *interval) 592bf215546Sopenharmony_ci{ 593bf215546Sopenharmony_ci unsigned child_start = interval->interval.reg->interval_start; 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci while (interval->interval.parent) { 596bf215546Sopenharmony_ci interval = ir3_reg_interval_to_ra_interval(interval->interval.parent); 597bf215546Sopenharmony_ci } 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci return interval->physreg_start + 600bf215546Sopenharmony_ci (child_start - interval->interval.reg->interval_start); 601bf215546Sopenharmony_ci} 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_cistatic unsigned 604bf215546Sopenharmony_cira_interval_get_num(const struct ra_interval *interval) 605bf215546Sopenharmony_ci{ 606bf215546Sopenharmony_ci return ra_physreg_to_num(ra_interval_get_physreg(interval), 607bf215546Sopenharmony_ci interval->interval.reg->flags); 608bf215546Sopenharmony_ci} 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_cistatic void 611bf215546Sopenharmony_cira_interval_init(struct ra_interval *interval, struct ir3_register *reg) 612bf215546Sopenharmony_ci{ 613bf215546Sopenharmony_ci ir3_reg_interval_init(&interval->interval, reg); 614bf215546Sopenharmony_ci interval->is_killed = false; 615bf215546Sopenharmony_ci interval->frozen = false; 616bf215546Sopenharmony_ci} 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_cistatic void 619bf215546Sopenharmony_cira_interval_dump(struct log_stream *stream, struct ra_interval *interval) 620bf215546Sopenharmony_ci{ 621bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "physreg %u ", interval->physreg_start); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci ir3_reg_interval_dump(stream, &interval->interval); 624bf215546Sopenharmony_ci} 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_cistatic void 627bf215546Sopenharmony_cira_file_dump(struct log_stream *stream, struct ra_file *file) 628bf215546Sopenharmony_ci{ 629bf215546Sopenharmony_ci rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals, 630bf215546Sopenharmony_ci physreg_node) { 631bf215546Sopenharmony_ci ra_interval_dump(stream, interval); 632bf215546Sopenharmony_ci } 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci unsigned start, end; 635bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "available:\n"); 636bf215546Sopenharmony_ci BITSET_FOREACH_RANGE (start, end, file->available, file->size) { 637bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "%u-%u ", start, end); 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "\n"); 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "available to evict:\n"); 642bf215546Sopenharmony_ci BITSET_FOREACH_RANGE (start, end, file->available_to_evict, file->size) { 643bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "%u-%u ", start, end); 644bf215546Sopenharmony_ci } 645bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "\n"); 646bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "start: %u\n", file->start); 647bf215546Sopenharmony_ci} 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_cistatic void 650bf215546Sopenharmony_cira_ctx_dump(struct ra_ctx *ctx) 651bf215546Sopenharmony_ci{ 652bf215546Sopenharmony_ci struct log_stream *stream = mesa_log_streami(); 653bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "full:\n"); 654bf215546Sopenharmony_ci ra_file_dump(stream, &ctx->full); 655bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "half:\n"); 656bf215546Sopenharmony_ci ra_file_dump(stream, &ctx->half); 657bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "shared:"); 658bf215546Sopenharmony_ci ra_file_dump(stream, &ctx->shared); 659bf215546Sopenharmony_ci mesa_log_stream_destroy(stream); 660bf215546Sopenharmony_ci} 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_cistatic unsigned 663bf215546Sopenharmony_cireg_file_size(struct ra_file *file, struct ir3_register *reg) 664bf215546Sopenharmony_ci{ 665bf215546Sopenharmony_ci /* Half-regs can only take up the first half of the combined regfile */ 666bf215546Sopenharmony_ci if (reg->flags & IR3_REG_HALF) 667bf215546Sopenharmony_ci return MIN2(file->size, RA_HALF_SIZE); 668bf215546Sopenharmony_ci else 669bf215546Sopenharmony_ci return file->size; 670bf215546Sopenharmony_ci} 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci/* ra_pop_interval/ra_push_interval provide an API to shuffle around multiple 673bf215546Sopenharmony_ci * top-level intervals at once. Pop multiple intervals, then push them back in 674bf215546Sopenharmony_ci * any order. 675bf215546Sopenharmony_ci */ 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_cistruct ra_removed_interval { 678bf215546Sopenharmony_ci struct ra_interval *interval; 679bf215546Sopenharmony_ci unsigned size; 680bf215546Sopenharmony_ci}; 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_cistatic struct ra_removed_interval 683bf215546Sopenharmony_cira_pop_interval(struct ra_ctx *ctx, struct ra_file *file, 684bf215546Sopenharmony_ci struct ra_interval *interval) 685bf215546Sopenharmony_ci{ 686bf215546Sopenharmony_ci assert(!interval->interval.parent); 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci /* Check if we've already moved this reg before */ 689bf215546Sopenharmony_ci unsigned pcopy_index; 690bf215546Sopenharmony_ci for (pcopy_index = 0; pcopy_index < ctx->parallel_copies_count; 691bf215546Sopenharmony_ci pcopy_index++) { 692bf215546Sopenharmony_ci if (ctx->parallel_copies[pcopy_index].interval == interval) 693bf215546Sopenharmony_ci break; 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci if (pcopy_index == ctx->parallel_copies_count) { 697bf215546Sopenharmony_ci array_insert(ctx, ctx->parallel_copies, 698bf215546Sopenharmony_ci (struct ra_parallel_copy){ 699bf215546Sopenharmony_ci .interval = interval, 700bf215546Sopenharmony_ci .src = interval->physreg_start, 701bf215546Sopenharmony_ci }); 702bf215546Sopenharmony_ci } 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci ir3_reg_interval_remove_temp(&file->reg_ctx, &interval->interval); 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci return (struct ra_removed_interval){ 707bf215546Sopenharmony_ci .interval = interval, 708bf215546Sopenharmony_ci .size = interval->physreg_end - interval->physreg_start, 709bf215546Sopenharmony_ci }; 710bf215546Sopenharmony_ci} 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_cistatic void 713bf215546Sopenharmony_cira_push_interval(struct ra_ctx *ctx, struct ra_file *file, 714bf215546Sopenharmony_ci const struct ra_removed_interval *removed, physreg_t dst) 715bf215546Sopenharmony_ci{ 716bf215546Sopenharmony_ci struct ra_interval *interval = removed->interval; 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci interval->physreg_start = dst; 719bf215546Sopenharmony_ci interval->physreg_end = dst + removed->size; 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci assert(interval->physreg_end <= file->size); 722bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_HALF) 723bf215546Sopenharmony_ci assert(interval->physreg_end <= RA_HALF_SIZE); 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci ir3_reg_interval_reinsert(&file->reg_ctx, &interval->interval); 726bf215546Sopenharmony_ci} 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci/* Pick up the interval and place it at "dst". */ 729bf215546Sopenharmony_cistatic void 730bf215546Sopenharmony_cira_move_interval(struct ra_ctx *ctx, struct ra_file *file, 731bf215546Sopenharmony_ci struct ra_interval *interval, physreg_t dst) 732bf215546Sopenharmony_ci{ 733bf215546Sopenharmony_ci struct ra_removed_interval temp = ra_pop_interval(ctx, file, interval); 734bf215546Sopenharmony_ci ra_push_interval(ctx, file, &temp, dst); 735bf215546Sopenharmony_ci} 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_cistatic struct ra_file * 738bf215546Sopenharmony_cira_get_file(struct ra_ctx *ctx, struct ir3_register *reg) 739bf215546Sopenharmony_ci{ 740bf215546Sopenharmony_ci if (reg->flags & IR3_REG_SHARED) 741bf215546Sopenharmony_ci return &ctx->shared; 742bf215546Sopenharmony_ci else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) 743bf215546Sopenharmony_ci return &ctx->full; 744bf215546Sopenharmony_ci else 745bf215546Sopenharmony_ci return &ctx->half; 746bf215546Sopenharmony_ci} 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci/* Returns true if the proposed spot for "dst" or a killed source overlaps a 750bf215546Sopenharmony_ci * destination that's been allocated. 751bf215546Sopenharmony_ci */ 752bf215546Sopenharmony_cistatic bool 753bf215546Sopenharmony_cicheck_dst_overlap(struct ra_ctx *ctx, struct ra_file *file, 754bf215546Sopenharmony_ci struct ir3_register *dst, physreg_t start, 755bf215546Sopenharmony_ci physreg_t end) 756bf215546Sopenharmony_ci{ 757bf215546Sopenharmony_ci struct ir3_instruction *instr = dst->instr; 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci ra_foreach_dst (other_dst, instr) { 760bf215546Sopenharmony_ci /* We assume only destinations before the current one have been allocated. 761bf215546Sopenharmony_ci */ 762bf215546Sopenharmony_ci if (other_dst == dst) 763bf215546Sopenharmony_ci break; 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci if (ra_get_file(ctx, other_dst) != file) 766bf215546Sopenharmony_ci continue; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci struct ra_interval *other_interval = &ctx->intervals[other_dst->name]; 769bf215546Sopenharmony_ci assert(!other_interval->interval.parent); 770bf215546Sopenharmony_ci physreg_t other_start = other_interval->physreg_start; 771bf215546Sopenharmony_ci physreg_t other_end = other_interval->physreg_end; 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci if (other_end > start && end > other_start) 774bf215546Sopenharmony_ci return true; 775bf215546Sopenharmony_ci } 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci return false; 778bf215546Sopenharmony_ci} 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci/* True if the destination is "early-clobber," meaning that it cannot be 781bf215546Sopenharmony_ci * allocated over killed sources. Some destinations always require it, but it 782bf215546Sopenharmony_ci * also is implicitly true for tied destinations whose source is live-through. 783bf215546Sopenharmony_ci * If the source is killed, then we skip allocating a register for the 784bf215546Sopenharmony_ci * destination altogether so we don't need to worry about that case here. 785bf215546Sopenharmony_ci */ 786bf215546Sopenharmony_cistatic bool 787bf215546Sopenharmony_ciis_early_clobber(struct ir3_register *reg) 788bf215546Sopenharmony_ci{ 789bf215546Sopenharmony_ci return (reg->flags & IR3_REG_EARLY_CLOBBER) || reg->tied; 790bf215546Sopenharmony_ci} 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_cistatic bool 793bf215546Sopenharmony_ciget_reg_specified(struct ra_ctx *ctx, struct ra_file *file, 794bf215546Sopenharmony_ci struct ir3_register *reg, physreg_t physreg, bool is_source) 795bf215546Sopenharmony_ci{ 796bf215546Sopenharmony_ci for (unsigned i = 0; i < reg_size(reg); i++) { 797bf215546Sopenharmony_ci if (!BITSET_TEST(is_early_clobber(reg) || is_source ? 798bf215546Sopenharmony_ci file->available_to_evict : file->available, 799bf215546Sopenharmony_ci physreg + i)) 800bf215546Sopenharmony_ci return false; 801bf215546Sopenharmony_ci } 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci if (!is_source && 804bf215546Sopenharmony_ci check_dst_overlap(ctx, file, reg, physreg, physreg + reg_size(reg))) 805bf215546Sopenharmony_ci return false; 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci return true; 808bf215546Sopenharmony_ci} 809bf215546Sopenharmony_ci 810bf215546Sopenharmony_ci/* Try to evict any registers conflicting with the proposed spot "physreg" for 811bf215546Sopenharmony_ci * "reg". That is, move them to other places so that we can allocate "physreg" 812bf215546Sopenharmony_ci * here. 813bf215546Sopenharmony_ci */ 814bf215546Sopenharmony_ci 815bf215546Sopenharmony_cistatic bool 816bf215546Sopenharmony_citry_evict_regs(struct ra_ctx *ctx, struct ra_file *file, 817bf215546Sopenharmony_ci struct ir3_register *reg, physreg_t physreg, 818bf215546Sopenharmony_ci unsigned *_eviction_count, bool is_source, bool speculative) 819bf215546Sopenharmony_ci{ 820bf215546Sopenharmony_ci BITSET_DECLARE(available_to_evict, RA_MAX_FILE_SIZE); 821bf215546Sopenharmony_ci memcpy(available_to_evict, file->available_to_evict, 822bf215546Sopenharmony_ci sizeof(available_to_evict)); 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci BITSET_DECLARE(available, RA_MAX_FILE_SIZE); 825bf215546Sopenharmony_ci memcpy(available, file->available, sizeof(available)); 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci for (unsigned i = 0; i < reg_size(reg); i++) { 828bf215546Sopenharmony_ci BITSET_CLEAR(available_to_evict, physreg + i); 829bf215546Sopenharmony_ci BITSET_CLEAR(available, physreg + i); 830bf215546Sopenharmony_ci } 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci unsigned eviction_count = 0; 833bf215546Sopenharmony_ci /* Iterate over each range conflicting with physreg */ 834bf215546Sopenharmony_ci for (struct ra_interval *conflicting = ra_file_search_right(file, physreg), 835bf215546Sopenharmony_ci *next = ra_interval_next_or_null(conflicting); 836bf215546Sopenharmony_ci conflicting != NULL && 837bf215546Sopenharmony_ci conflicting->physreg_start < physreg + reg_size(reg); 838bf215546Sopenharmony_ci conflicting = next, next = ra_interval_next_or_null(next)) { 839bf215546Sopenharmony_ci if (!is_early_clobber(reg) && !is_source && conflicting->is_killed) 840bf215546Sopenharmony_ci continue; 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci if (conflicting->frozen) { 843bf215546Sopenharmony_ci assert(speculative); 844bf215546Sopenharmony_ci return false; 845bf215546Sopenharmony_ci } 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci unsigned conflicting_file_size = 848bf215546Sopenharmony_ci reg_file_size(file, conflicting->interval.reg); 849bf215546Sopenharmony_ci unsigned avail_start, avail_end; 850bf215546Sopenharmony_ci bool evicted = false; 851bf215546Sopenharmony_ci BITSET_FOREACH_RANGE (avail_start, avail_end, available_to_evict, 852bf215546Sopenharmony_ci conflicting_file_size) { 853bf215546Sopenharmony_ci unsigned size = avail_end - avail_start; 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_ci /* non-half registers must be aligned */ 856bf215546Sopenharmony_ci if (!(conflicting->interval.reg->flags & IR3_REG_HALF) && 857bf215546Sopenharmony_ci avail_start % 2 == 1) { 858bf215546Sopenharmony_ci avail_start++; 859bf215546Sopenharmony_ci size--; 860bf215546Sopenharmony_ci } 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci unsigned conflicting_size = 863bf215546Sopenharmony_ci conflicting->physreg_end - conflicting->physreg_start; 864bf215546Sopenharmony_ci if (size >= conflicting_size && 865bf215546Sopenharmony_ci !check_dst_overlap(ctx, file, reg, avail_start, avail_start + 866bf215546Sopenharmony_ci conflicting_size)) { 867bf215546Sopenharmony_ci for (unsigned i = 0; 868bf215546Sopenharmony_ci i < conflicting->physreg_end - conflicting->physreg_start; i++) 869bf215546Sopenharmony_ci BITSET_CLEAR(available_to_evict, avail_start + i); 870bf215546Sopenharmony_ci eviction_count += 871bf215546Sopenharmony_ci conflicting->physreg_end - conflicting->physreg_start; 872bf215546Sopenharmony_ci if (!speculative) 873bf215546Sopenharmony_ci ra_move_interval(ctx, file, conflicting, avail_start); 874bf215546Sopenharmony_ci evicted = true; 875bf215546Sopenharmony_ci break; 876bf215546Sopenharmony_ci } 877bf215546Sopenharmony_ci } 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci if (evicted) 880bf215546Sopenharmony_ci continue; 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci /* If we couldn't evict this range, we may be able to swap it with a 883bf215546Sopenharmony_ci * killed range to acheive the same effect. 884bf215546Sopenharmony_ci */ 885bf215546Sopenharmony_ci foreach_interval (killed, file) { 886bf215546Sopenharmony_ci if (!killed->is_killed) 887bf215546Sopenharmony_ci continue; 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci if (killed->physreg_end - killed->physreg_start != 890bf215546Sopenharmony_ci conflicting->physreg_end - conflicting->physreg_start) 891bf215546Sopenharmony_ci continue; 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci if (killed->physreg_end > conflicting_file_size || 894bf215546Sopenharmony_ci conflicting->physreg_end > reg_file_size(file, killed->interval.reg)) 895bf215546Sopenharmony_ci continue; 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci /* We can't swap the killed range if it partially/fully overlaps the 898bf215546Sopenharmony_ci * space we're trying to allocate or (in speculative mode) if it's 899bf215546Sopenharmony_ci * already been swapped and will overlap when we actually evict. 900bf215546Sopenharmony_ci */ 901bf215546Sopenharmony_ci bool killed_available = true; 902bf215546Sopenharmony_ci for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) { 903bf215546Sopenharmony_ci if (!BITSET_TEST(available, i)) { 904bf215546Sopenharmony_ci killed_available = false; 905bf215546Sopenharmony_ci break; 906bf215546Sopenharmony_ci } 907bf215546Sopenharmony_ci } 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci if (!killed_available) 910bf215546Sopenharmony_ci continue; 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci if (check_dst_overlap(ctx, file, reg, killed->physreg_start, 913bf215546Sopenharmony_ci killed->physreg_end)) 914bf215546Sopenharmony_ci continue; 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci /* Check for alignment if one is a full reg */ 917bf215546Sopenharmony_ci if ((!(killed->interval.reg->flags & IR3_REG_HALF) || 918bf215546Sopenharmony_ci !(conflicting->interval.reg->flags & IR3_REG_HALF)) && 919bf215546Sopenharmony_ci (killed->physreg_start % 2 != 0 || 920bf215546Sopenharmony_ci conflicting->physreg_start % 2 != 0)) 921bf215546Sopenharmony_ci continue; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci for (unsigned i = killed->physreg_start; i < killed->physreg_end; i++) { 924bf215546Sopenharmony_ci BITSET_CLEAR(available, i); 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci /* Because this will generate swaps instead of moves, multiply the 927bf215546Sopenharmony_ci * cost by 2. 928bf215546Sopenharmony_ci */ 929bf215546Sopenharmony_ci eviction_count += (killed->physreg_end - killed->physreg_start) * 2; 930bf215546Sopenharmony_ci if (!speculative) { 931bf215546Sopenharmony_ci physreg_t killed_start = killed->physreg_start, 932bf215546Sopenharmony_ci conflicting_start = conflicting->physreg_start; 933bf215546Sopenharmony_ci struct ra_removed_interval killed_removed = 934bf215546Sopenharmony_ci ra_pop_interval(ctx, file, killed); 935bf215546Sopenharmony_ci struct ra_removed_interval conflicting_removed = 936bf215546Sopenharmony_ci ra_pop_interval(ctx, file, conflicting); 937bf215546Sopenharmony_ci ra_push_interval(ctx, file, &killed_removed, conflicting_start); 938bf215546Sopenharmony_ci ra_push_interval(ctx, file, &conflicting_removed, killed_start); 939bf215546Sopenharmony_ci } 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci evicted = true; 942bf215546Sopenharmony_ci break; 943bf215546Sopenharmony_ci } 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci if (!evicted) 946bf215546Sopenharmony_ci return false; 947bf215546Sopenharmony_ci } 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci *_eviction_count = eviction_count; 950bf215546Sopenharmony_ci return true; 951bf215546Sopenharmony_ci} 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_cistatic int 954bf215546Sopenharmony_ciremoved_interval_cmp(const void *_i1, const void *_i2) 955bf215546Sopenharmony_ci{ 956bf215546Sopenharmony_ci const struct ra_removed_interval *i1 = _i1; 957bf215546Sopenharmony_ci const struct ra_removed_interval *i2 = _i2; 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_ci /* We sort the registers as follows: 960bf215546Sopenharmony_ci * 961bf215546Sopenharmony_ci * |------------------------------------------------------------------------------------------| 962bf215546Sopenharmony_ci * | | | | | | | 963bf215546Sopenharmony_ci * | Half | Half early-clobber | Half | Full | Full early-clobber | Full | 964bf215546Sopenharmony_ci * | live-through | destination | killed | killed | destination | live-through | 965bf215546Sopenharmony_ci * | | | | | | | 966bf215546Sopenharmony_ci * |------------------------------------------------------------------------------------------| 967bf215546Sopenharmony_ci * | | 968bf215546Sopenharmony_ci * | Destination | 969bf215546Sopenharmony_ci * | | 970bf215546Sopenharmony_ci * |-----------------| 971bf215546Sopenharmony_ci * 972bf215546Sopenharmony_ci * Half-registers have to be first so that they stay in the low half of 973bf215546Sopenharmony_ci * the register file. Then half and full killed must stay together so that 974bf215546Sopenharmony_ci * there's a contiguous range where we can put the register. With this 975bf215546Sopenharmony_ci * structure we should be able to accomodate any collection of intervals 976bf215546Sopenharmony_ci * such that the total number of half components is within the half limit 977bf215546Sopenharmony_ci * and the combined components are within the full limit. 978bf215546Sopenharmony_ci */ 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_ci unsigned i1_align = reg_elem_size(i1->interval->interval.reg); 981bf215546Sopenharmony_ci unsigned i2_align = reg_elem_size(i2->interval->interval.reg); 982bf215546Sopenharmony_ci if (i1_align > i2_align) 983bf215546Sopenharmony_ci return 1; 984bf215546Sopenharmony_ci if (i1_align < i2_align) 985bf215546Sopenharmony_ci return -1; 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_ci if (i1_align == 1) { 988bf215546Sopenharmony_ci if (i2->interval->is_killed) 989bf215546Sopenharmony_ci return -1; 990bf215546Sopenharmony_ci if (i1->interval->is_killed) 991bf215546Sopenharmony_ci return 1; 992bf215546Sopenharmony_ci } else { 993bf215546Sopenharmony_ci if (i2->interval->is_killed) 994bf215546Sopenharmony_ci return 1; 995bf215546Sopenharmony_ci if (i1->interval->is_killed) 996bf215546Sopenharmony_ci return -1; 997bf215546Sopenharmony_ci } 998bf215546Sopenharmony_ci 999bf215546Sopenharmony_ci return 0; 1000bf215546Sopenharmony_ci} 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_cistatic int 1003bf215546Sopenharmony_cidsts_cmp(const void *_i1, const void *_i2) 1004bf215546Sopenharmony_ci{ 1005bf215546Sopenharmony_ci struct ir3_register *i1 = *(struct ir3_register *const *) _i1; 1006bf215546Sopenharmony_ci struct ir3_register *i2 = *(struct ir3_register *const *) _i2; 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci /* Treat tied destinations as-if they are live-through sources, and normal 1009bf215546Sopenharmony_ci * destinations as killed sources. 1010bf215546Sopenharmony_ci */ 1011bf215546Sopenharmony_ci unsigned i1_align = reg_elem_size(i1); 1012bf215546Sopenharmony_ci unsigned i2_align = reg_elem_size(i2); 1013bf215546Sopenharmony_ci if (i1_align > i2_align) 1014bf215546Sopenharmony_ci return 1; 1015bf215546Sopenharmony_ci if (i1_align < i2_align) 1016bf215546Sopenharmony_ci return -1; 1017bf215546Sopenharmony_ci 1018bf215546Sopenharmony_ci if (i1_align == 1) { 1019bf215546Sopenharmony_ci if (!is_early_clobber(i2)) 1020bf215546Sopenharmony_ci return -1; 1021bf215546Sopenharmony_ci if (!is_early_clobber(i1)) 1022bf215546Sopenharmony_ci return 1; 1023bf215546Sopenharmony_ci } else { 1024bf215546Sopenharmony_ci if (!is_early_clobber(i2)) 1025bf215546Sopenharmony_ci return 1; 1026bf215546Sopenharmony_ci if (!is_early_clobber(i1)) 1027bf215546Sopenharmony_ci return -1; 1028bf215546Sopenharmony_ci } 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci return 0; 1031bf215546Sopenharmony_ci} 1032bf215546Sopenharmony_ci 1033bf215546Sopenharmony_ci/* "Compress" all the live intervals so that there is enough space for the 1034bf215546Sopenharmony_ci * destination register. As there can be gaps when a more-aligned interval 1035bf215546Sopenharmony_ci * follows a less-aligned interval, this also sorts them to remove such 1036bf215546Sopenharmony_ci * "padding", which may be required when space is very tight. This isn't 1037bf215546Sopenharmony_ci * amazing, but should be used only as a last resort in case the register file 1038bf215546Sopenharmony_ci * is almost full and badly fragmented. 1039bf215546Sopenharmony_ci * 1040bf215546Sopenharmony_ci * Return the physreg to use. 1041bf215546Sopenharmony_ci */ 1042bf215546Sopenharmony_cistatic physreg_t 1043bf215546Sopenharmony_cicompress_regs_left(struct ra_ctx *ctx, struct ra_file *file, 1044bf215546Sopenharmony_ci struct ir3_register *reg) 1045bf215546Sopenharmony_ci{ 1046bf215546Sopenharmony_ci unsigned align = reg_elem_size(reg); 1047bf215546Sopenharmony_ci DECLARE_ARRAY(struct ra_removed_interval, intervals); 1048bf215546Sopenharmony_ci intervals_count = intervals_sz = 0; 1049bf215546Sopenharmony_ci intervals = NULL; 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_register *, dsts); 1052bf215546Sopenharmony_ci dsts_count = dsts_sz = 0; 1053bf215546Sopenharmony_ci dsts = NULL; 1054bf215546Sopenharmony_ci array_insert(ctx, dsts, reg); 1055bf215546Sopenharmony_ci bool dst_inserted[reg->instr->dsts_count]; 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_ci unsigned dst_size = reg->tied ? 0 : reg_size(reg); 1058bf215546Sopenharmony_ci unsigned ec_dst_size = is_early_clobber(reg) ? reg_size(reg) : 0; 1059bf215546Sopenharmony_ci unsigned half_dst_size = 0, ec_half_dst_size = 0; 1060bf215546Sopenharmony_ci if (align == 1) { 1061bf215546Sopenharmony_ci half_dst_size = dst_size; 1062bf215546Sopenharmony_ci ec_half_dst_size = ec_dst_size; 1063bf215546Sopenharmony_ci } 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci unsigned removed_size = 0, removed_half_size = 0; 1066bf215546Sopenharmony_ci unsigned removed_killed_size = 0, removed_killed_half_size = 0; 1067bf215546Sopenharmony_ci unsigned file_size = 1068bf215546Sopenharmony_ci align == 1 ? MIN2(file->size, RA_HALF_SIZE) : file->size; 1069bf215546Sopenharmony_ci physreg_t start_reg = 0; 1070bf215546Sopenharmony_ci 1071bf215546Sopenharmony_ci foreach_interval_rev_safe (interval, file) { 1072bf215546Sopenharmony_ci /* We'll check if we can compact the intervals starting here. */ 1073bf215546Sopenharmony_ci physreg_t candidate_start = interval->physreg_end; 1074bf215546Sopenharmony_ci 1075bf215546Sopenharmony_ci /* Check if there are any other destinations we need to compact. */ 1076bf215546Sopenharmony_ci ra_foreach_dst_n (other_dst, n, reg->instr) { 1077bf215546Sopenharmony_ci if (other_dst == reg) 1078bf215546Sopenharmony_ci break; 1079bf215546Sopenharmony_ci if (ra_get_file(ctx, other_dst) != file) 1080bf215546Sopenharmony_ci continue; 1081bf215546Sopenharmony_ci if (dst_inserted[n]) 1082bf215546Sopenharmony_ci continue; 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci struct ra_interval *other_interval = &ctx->intervals[other_dst->name]; 1085bf215546Sopenharmony_ci /* if the destination partially overlaps this interval, we need to 1086bf215546Sopenharmony_ci * extend candidate_start to the end. 1087bf215546Sopenharmony_ci */ 1088bf215546Sopenharmony_ci if (other_interval->physreg_start < candidate_start) { 1089bf215546Sopenharmony_ci candidate_start = MAX2(candidate_start, 1090bf215546Sopenharmony_ci other_interval->physreg_end); 1091bf215546Sopenharmony_ci continue; 1092bf215546Sopenharmony_ci } 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci dst_inserted[n] = true; 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci /* dst intervals with a tied killed source are considered attached to 1097bf215546Sopenharmony_ci * that source. Don't actually insert them. This means we have to 1098bf215546Sopenharmony_ci * update them below if their tied source moves. 1099bf215546Sopenharmony_ci */ 1100bf215546Sopenharmony_ci if (other_dst->tied) { 1101bf215546Sopenharmony_ci struct ra_interval *tied_interval = 1102bf215546Sopenharmony_ci &ctx->intervals[other_dst->tied->def->name]; 1103bf215546Sopenharmony_ci if (tied_interval->is_killed) 1104bf215546Sopenharmony_ci continue; 1105bf215546Sopenharmony_ci } 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci d("popping destination %u physreg %u\n", 1108bf215546Sopenharmony_ci other_interval->interval.reg->name, 1109bf215546Sopenharmony_ci other_interval->physreg_start); 1110bf215546Sopenharmony_ci 1111bf215546Sopenharmony_ci array_insert(ctx, dsts, other_dst); 1112bf215546Sopenharmony_ci unsigned interval_size = reg_size(other_dst); 1113bf215546Sopenharmony_ci if (is_early_clobber(other_dst)) { 1114bf215546Sopenharmony_ci ec_dst_size += interval_size; 1115bf215546Sopenharmony_ci if (other_interval->interval.reg->flags & IR3_REG_HALF) 1116bf215546Sopenharmony_ci ec_half_dst_size += interval_size; 1117bf215546Sopenharmony_ci } else { 1118bf215546Sopenharmony_ci dst_size += interval_size; 1119bf215546Sopenharmony_ci if (other_interval->interval.reg->flags & IR3_REG_HALF) 1120bf215546Sopenharmony_ci half_dst_size += interval_size; 1121bf215546Sopenharmony_ci } 1122bf215546Sopenharmony_ci } 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci /* Check if we can sort the intervals *after* this one and have enough 1125bf215546Sopenharmony_ci * space leftover to accomodate all intervals, keeping in mind that killed 1126bf215546Sopenharmony_ci * sources overlap non-tied destinations. Also check that we have enough 1127bf215546Sopenharmony_ci * space leftover for half-registers, if we're inserting a half-register 1128bf215546Sopenharmony_ci * (otherwise we only shift any half-registers down so they should be 1129bf215546Sopenharmony_ci * safe). 1130bf215546Sopenharmony_ci */ 1131bf215546Sopenharmony_ci if (candidate_start + removed_size + ec_dst_size + 1132bf215546Sopenharmony_ci MAX2(removed_killed_size, dst_size) <= file->size && 1133bf215546Sopenharmony_ci (align != 1 || 1134bf215546Sopenharmony_ci candidate_start + removed_half_size + ec_half_dst_size + 1135bf215546Sopenharmony_ci MAX2(removed_killed_half_size, half_dst_size) <= file_size)) { 1136bf215546Sopenharmony_ci start_reg = candidate_start; 1137bf215546Sopenharmony_ci break; 1138bf215546Sopenharmony_ci } 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci /* We assume that all frozen intervals are at the start and that we 1141bf215546Sopenharmony_ci * can avoid popping them. 1142bf215546Sopenharmony_ci */ 1143bf215546Sopenharmony_ci assert(!interval->frozen); 1144bf215546Sopenharmony_ci 1145bf215546Sopenharmony_ci /* Killed sources are different because they go at the end and can 1146bf215546Sopenharmony_ci * overlap the register we're trying to add. 1147bf215546Sopenharmony_ci */ 1148bf215546Sopenharmony_ci unsigned interval_size = interval->physreg_end - interval->physreg_start; 1149bf215546Sopenharmony_ci if (interval->is_killed) { 1150bf215546Sopenharmony_ci removed_killed_size += interval_size; 1151bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_HALF) 1152bf215546Sopenharmony_ci removed_killed_half_size += interval_size; 1153bf215546Sopenharmony_ci } else { 1154bf215546Sopenharmony_ci removed_size += interval_size; 1155bf215546Sopenharmony_ci if (interval->interval.reg->flags & IR3_REG_HALF) 1156bf215546Sopenharmony_ci removed_half_size += interval_size; 1157bf215546Sopenharmony_ci } 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci /* Now that we've done the accounting, pop this off */ 1160bf215546Sopenharmony_ci d("popping interval %u physreg %u%s\n", interval->interval.reg->name, 1161bf215546Sopenharmony_ci interval->physreg_start, interval->is_killed ? ", killed" : ""); 1162bf215546Sopenharmony_ci array_insert(ctx, intervals, ra_pop_interval(ctx, file, interval)); 1163bf215546Sopenharmony_ci } 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_ci /* TODO: In addition to skipping registers at the beginning that are 1166bf215546Sopenharmony_ci * well-packed, we should try to skip registers at the end. 1167bf215546Sopenharmony_ci */ 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ci qsort(intervals, intervals_count, sizeof(*intervals), removed_interval_cmp); 1170bf215546Sopenharmony_ci qsort(dsts, dsts_count, sizeof(*dsts), dsts_cmp); 1171bf215546Sopenharmony_ci 1172bf215546Sopenharmony_ci physreg_t live_reg = start_reg; 1173bf215546Sopenharmony_ci physreg_t dst_reg = (physreg_t)~0; 1174bf215546Sopenharmony_ci physreg_t ret_reg = (physreg_t)~0; 1175bf215546Sopenharmony_ci unsigned dst_index = 0; 1176bf215546Sopenharmony_ci unsigned live_index = 0; 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci /* We have two lists of intervals to process, live intervals and destination 1179bf215546Sopenharmony_ci * intervals. Process them in the order of the disgram in insert_cmp(). 1180bf215546Sopenharmony_ci */ 1181bf215546Sopenharmony_ci while (live_index < intervals_count || dst_index < dsts_count) { 1182bf215546Sopenharmony_ci bool process_dst; 1183bf215546Sopenharmony_ci if (live_index == intervals_count) { 1184bf215546Sopenharmony_ci process_dst = true; 1185bf215546Sopenharmony_ci } else if (dst_index == dsts_count) { 1186bf215546Sopenharmony_ci process_dst = false; 1187bf215546Sopenharmony_ci } else { 1188bf215546Sopenharmony_ci struct ir3_register *dst = dsts[dst_index]; 1189bf215546Sopenharmony_ci struct ra_interval *live_interval = intervals[live_index].interval; 1190bf215546Sopenharmony_ci 1191bf215546Sopenharmony_ci bool live_half = live_interval->interval.reg->flags & IR3_REG_HALF; 1192bf215546Sopenharmony_ci bool live_killed = live_interval->is_killed; 1193bf215546Sopenharmony_ci bool dst_half = dst->flags & IR3_REG_HALF; 1194bf215546Sopenharmony_ci bool dst_early_clobber = is_early_clobber(dst); 1195bf215546Sopenharmony_ci 1196bf215546Sopenharmony_ci if (live_half && !live_killed) { 1197bf215546Sopenharmony_ci /* far-left of diagram. */ 1198bf215546Sopenharmony_ci process_dst = false; 1199bf215546Sopenharmony_ci } else if (dst_half && dst_early_clobber) { 1200bf215546Sopenharmony_ci /* mid-left of diagram. */ 1201bf215546Sopenharmony_ci process_dst = true; 1202bf215546Sopenharmony_ci } else if (!dst_early_clobber) { 1203bf215546Sopenharmony_ci /* bottom of disagram. */ 1204bf215546Sopenharmony_ci process_dst = true; 1205bf215546Sopenharmony_ci } else if (live_killed) { 1206bf215546Sopenharmony_ci /* middle of diagram. */ 1207bf215546Sopenharmony_ci process_dst = false; 1208bf215546Sopenharmony_ci } else if (!dst_half && dst_early_clobber) { 1209bf215546Sopenharmony_ci /* mid-right of diagram. */ 1210bf215546Sopenharmony_ci process_dst = true; 1211bf215546Sopenharmony_ci } else { 1212bf215546Sopenharmony_ci /* far right of diagram. */ 1213bf215546Sopenharmony_ci assert(!live_killed && !live_half); 1214bf215546Sopenharmony_ci process_dst = false; 1215bf215546Sopenharmony_ci } 1216bf215546Sopenharmony_ci } 1217bf215546Sopenharmony_ci 1218bf215546Sopenharmony_ci struct ir3_register *cur_reg = 1219bf215546Sopenharmony_ci process_dst ? dsts[dst_index] : 1220bf215546Sopenharmony_ci intervals[live_index].interval->interval.reg; 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_ci physreg_t physreg; 1223bf215546Sopenharmony_ci if (process_dst && !is_early_clobber(cur_reg)) { 1224bf215546Sopenharmony_ci if (dst_reg == (physreg_t)~0) 1225bf215546Sopenharmony_ci dst_reg = live_reg; 1226bf215546Sopenharmony_ci physreg = dst_reg; 1227bf215546Sopenharmony_ci } else { 1228bf215546Sopenharmony_ci physreg = live_reg; 1229bf215546Sopenharmony_ci struct ra_interval *live_interval = intervals[live_index].interval; 1230bf215546Sopenharmony_ci bool live_killed = live_interval->is_killed; 1231bf215546Sopenharmony_ci /* If this is live-through and we've processed the destinations, we 1232bf215546Sopenharmony_ci * need to make sure we take into account any overlapping destinations. 1233bf215546Sopenharmony_ci */ 1234bf215546Sopenharmony_ci if (!live_killed && dst_reg != (physreg_t)~0) 1235bf215546Sopenharmony_ci physreg = MAX2(physreg, dst_reg); 1236bf215546Sopenharmony_ci } 1237bf215546Sopenharmony_ci 1238bf215546Sopenharmony_ci if (!(cur_reg->flags & IR3_REG_HALF)) 1239bf215546Sopenharmony_ci physreg = ALIGN(physreg, 2); 1240bf215546Sopenharmony_ci 1241bf215546Sopenharmony_ci d("pushing reg %u physreg %u\n", cur_reg->name, physreg); 1242bf215546Sopenharmony_ci 1243bf215546Sopenharmony_ci unsigned interval_size = reg_size(cur_reg); 1244bf215546Sopenharmony_ci if (physreg + interval_size > 1245bf215546Sopenharmony_ci reg_file_size(file, cur_reg)) { 1246bf215546Sopenharmony_ci d("ran out of room for interval %u!\n", 1247bf215546Sopenharmony_ci cur_reg->name); 1248bf215546Sopenharmony_ci unreachable("reg pressure calculation was wrong!"); 1249bf215546Sopenharmony_ci return 0; 1250bf215546Sopenharmony_ci } 1251bf215546Sopenharmony_ci 1252bf215546Sopenharmony_ci if (process_dst) { 1253bf215546Sopenharmony_ci if (cur_reg == reg) { 1254bf215546Sopenharmony_ci ret_reg = physreg; 1255bf215546Sopenharmony_ci } else { 1256bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[cur_reg->name]; 1257bf215546Sopenharmony_ci interval->physreg_start = physreg; 1258bf215546Sopenharmony_ci interval->physreg_end = physreg + interval_size; 1259bf215546Sopenharmony_ci } 1260bf215546Sopenharmony_ci dst_index++; 1261bf215546Sopenharmony_ci } else { 1262bf215546Sopenharmony_ci ra_push_interval(ctx, file, &intervals[live_index], physreg); 1263bf215546Sopenharmony_ci live_index++; 1264bf215546Sopenharmony_ci } 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_ci physreg += interval_size; 1267bf215546Sopenharmony_ci 1268bf215546Sopenharmony_ci if (process_dst && !is_early_clobber(cur_reg)) { 1269bf215546Sopenharmony_ci dst_reg = physreg; 1270bf215546Sopenharmony_ci } else { 1271bf215546Sopenharmony_ci live_reg = physreg; 1272bf215546Sopenharmony_ci } 1273bf215546Sopenharmony_ci } 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci /* If we shuffled around a tied source that is killed, we may have to update 1276bf215546Sopenharmony_ci * its corresponding destination since we didn't insert it above. 1277bf215546Sopenharmony_ci */ 1278bf215546Sopenharmony_ci ra_foreach_dst (dst, reg->instr) { 1279bf215546Sopenharmony_ci if (dst == reg) 1280bf215546Sopenharmony_ci break; 1281bf215546Sopenharmony_ci 1282bf215546Sopenharmony_ci struct ir3_register *tied = dst->tied; 1283bf215546Sopenharmony_ci if (!tied) 1284bf215546Sopenharmony_ci continue; 1285bf215546Sopenharmony_ci 1286bf215546Sopenharmony_ci struct ra_interval *tied_interval = &ctx->intervals[tied->def->name]; 1287bf215546Sopenharmony_ci if (!tied_interval->is_killed) 1288bf215546Sopenharmony_ci continue; 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_ci struct ra_interval *dst_interval = &ctx->intervals[dst->name]; 1291bf215546Sopenharmony_ci unsigned dst_size = reg_size(dst); 1292bf215546Sopenharmony_ci dst_interval->physreg_start = ra_interval_get_physreg(tied_interval); 1293bf215546Sopenharmony_ci dst_interval->physreg_end = dst_interval->physreg_start + dst_size; 1294bf215546Sopenharmony_ci } 1295bf215546Sopenharmony_ci 1296bf215546Sopenharmony_ci return ret_reg; 1297bf215546Sopenharmony_ci} 1298bf215546Sopenharmony_ci 1299bf215546Sopenharmony_cistatic void 1300bf215546Sopenharmony_ciupdate_affinity(struct ra_file *file, struct ir3_register *reg, 1301bf215546Sopenharmony_ci physreg_t physreg) 1302bf215546Sopenharmony_ci{ 1303bf215546Sopenharmony_ci if (!reg->merge_set || reg->merge_set->preferred_reg != (physreg_t)~0) 1304bf215546Sopenharmony_ci return; 1305bf215546Sopenharmony_ci 1306bf215546Sopenharmony_ci if (physreg < reg->merge_set_offset) 1307bf215546Sopenharmony_ci return; 1308bf215546Sopenharmony_ci 1309bf215546Sopenharmony_ci if ((physreg - reg->merge_set_offset + reg->merge_set->size) > file->size) 1310bf215546Sopenharmony_ci return; 1311bf215546Sopenharmony_ci 1312bf215546Sopenharmony_ci reg->merge_set->preferred_reg = physreg - reg->merge_set_offset; 1313bf215546Sopenharmony_ci} 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci/* Try to find free space for a register without shuffling anything. This uses 1316bf215546Sopenharmony_ci * a round-robin algorithm to reduce false dependencies. 1317bf215546Sopenharmony_ci */ 1318bf215546Sopenharmony_cistatic physreg_t 1319bf215546Sopenharmony_cifind_best_gap(struct ra_ctx *ctx, struct ra_file *file, 1320bf215546Sopenharmony_ci struct ir3_register *dst, unsigned file_size, unsigned size, 1321bf215546Sopenharmony_ci unsigned align) 1322bf215546Sopenharmony_ci{ 1323bf215546Sopenharmony_ci /* This can happen if we create a very large merge set. Just bail out in that 1324bf215546Sopenharmony_ci * case. 1325bf215546Sopenharmony_ci */ 1326bf215546Sopenharmony_ci if (size > file_size) 1327bf215546Sopenharmony_ci return (physreg_t) ~0; 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_ci BITSET_WORD *available = 1330bf215546Sopenharmony_ci is_early_clobber(dst) ? file->available_to_evict : file->available; 1331bf215546Sopenharmony_ci 1332bf215546Sopenharmony_ci unsigned start = ALIGN(file->start, align) % (file_size - size + align); 1333bf215546Sopenharmony_ci unsigned candidate = start; 1334bf215546Sopenharmony_ci do { 1335bf215546Sopenharmony_ci bool is_available = true; 1336bf215546Sopenharmony_ci for (unsigned i = 0; i < size; i++) { 1337bf215546Sopenharmony_ci if (!BITSET_TEST(available, candidate + i)) { 1338bf215546Sopenharmony_ci is_available = false; 1339bf215546Sopenharmony_ci break; 1340bf215546Sopenharmony_ci } 1341bf215546Sopenharmony_ci } 1342bf215546Sopenharmony_ci 1343bf215546Sopenharmony_ci if (is_available) { 1344bf215546Sopenharmony_ci is_available = 1345bf215546Sopenharmony_ci !check_dst_overlap(ctx, file, dst, candidate, candidate + size); 1346bf215546Sopenharmony_ci } 1347bf215546Sopenharmony_ci 1348bf215546Sopenharmony_ci if (is_available) { 1349bf215546Sopenharmony_ci file->start = (candidate + size) % file_size; 1350bf215546Sopenharmony_ci return candidate; 1351bf215546Sopenharmony_ci } 1352bf215546Sopenharmony_ci 1353bf215546Sopenharmony_ci candidate += align; 1354bf215546Sopenharmony_ci if (candidate + size > file_size) 1355bf215546Sopenharmony_ci candidate = 0; 1356bf215546Sopenharmony_ci } while (candidate != start); 1357bf215546Sopenharmony_ci 1358bf215546Sopenharmony_ci return (physreg_t)~0; 1359bf215546Sopenharmony_ci} 1360bf215546Sopenharmony_ci 1361bf215546Sopenharmony_ci/* This is the main entrypoint for picking a register. Pick a free register 1362bf215546Sopenharmony_ci * for "reg", shuffling around sources if necessary. In the normal case where 1363bf215546Sopenharmony_ci * "is_source" is false, this register can overlap with killed sources 1364bf215546Sopenharmony_ci * (intervals with "is_killed == true"). If "is_source" is true, then 1365bf215546Sopenharmony_ci * is_killed is ignored and the register returned must not overlap with killed 1366bf215546Sopenharmony_ci * sources. This must be used for tied registers, because we're actually 1367bf215546Sopenharmony_ci * allocating the destination and the tied source at the same time. 1368bf215546Sopenharmony_ci */ 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_cistatic physreg_t 1371bf215546Sopenharmony_ciget_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg) 1372bf215546Sopenharmony_ci{ 1373bf215546Sopenharmony_ci unsigned file_size = reg_file_size(file, reg); 1374bf215546Sopenharmony_ci if (reg->merge_set && reg->merge_set->preferred_reg != (physreg_t)~0) { 1375bf215546Sopenharmony_ci physreg_t preferred_reg = 1376bf215546Sopenharmony_ci reg->merge_set->preferred_reg + reg->merge_set_offset; 1377bf215546Sopenharmony_ci if (preferred_reg + reg_size(reg) <= file_size && 1378bf215546Sopenharmony_ci preferred_reg % reg_elem_size(reg) == 0 && 1379bf215546Sopenharmony_ci get_reg_specified(ctx, file, reg, preferred_reg, false)) 1380bf215546Sopenharmony_ci return preferred_reg; 1381bf215546Sopenharmony_ci } 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci /* If this register is a subset of a merge set which we have not picked a 1384bf215546Sopenharmony_ci * register for, first try to allocate enough space for the entire merge 1385bf215546Sopenharmony_ci * set. 1386bf215546Sopenharmony_ci */ 1387bf215546Sopenharmony_ci unsigned size = reg_size(reg); 1388bf215546Sopenharmony_ci if (reg->merge_set && reg->merge_set->preferred_reg == (physreg_t)~0 && 1389bf215546Sopenharmony_ci size < reg->merge_set->size) { 1390bf215546Sopenharmony_ci physreg_t best_reg = find_best_gap(ctx, file, reg, file_size, 1391bf215546Sopenharmony_ci reg->merge_set->size, 1392bf215546Sopenharmony_ci reg->merge_set->alignment); 1393bf215546Sopenharmony_ci if (best_reg != (physreg_t)~0u) { 1394bf215546Sopenharmony_ci best_reg += reg->merge_set_offset; 1395bf215546Sopenharmony_ci return best_reg; 1396bf215546Sopenharmony_ci } 1397bf215546Sopenharmony_ci } 1398bf215546Sopenharmony_ci 1399bf215546Sopenharmony_ci /* For ALU and SFU instructions, if the src reg is avail to pick, use it. 1400bf215546Sopenharmony_ci * Because this doesn't introduce unnecessary dependencies, and it 1401bf215546Sopenharmony_ci * potentially avoids needing (ss) syncs for write after read hazards for 1402bf215546Sopenharmony_ci * SFU instructions: 1403bf215546Sopenharmony_ci */ 1404bf215546Sopenharmony_ci if (is_sfu(reg->instr) || is_alu(reg->instr)) { 1405bf215546Sopenharmony_ci for (unsigned i = 0; i < reg->instr->srcs_count; i++) { 1406bf215546Sopenharmony_ci struct ir3_register *src = reg->instr->srcs[i]; 1407bf215546Sopenharmony_ci if (!ra_reg_is_src(src)) 1408bf215546Sopenharmony_ci continue; 1409bf215546Sopenharmony_ci if (ra_get_file(ctx, src) == file && reg_size(src) >= size) { 1410bf215546Sopenharmony_ci struct ra_interval *src_interval = &ctx->intervals[src->def->name]; 1411bf215546Sopenharmony_ci physreg_t src_physreg = ra_interval_get_physreg(src_interval); 1412bf215546Sopenharmony_ci if (src_physreg % reg_elem_size(reg) == 0 && 1413bf215546Sopenharmony_ci src_physreg + size <= file_size && 1414bf215546Sopenharmony_ci get_reg_specified(ctx, file, reg, src_physreg, false)) 1415bf215546Sopenharmony_ci return src_physreg; 1416bf215546Sopenharmony_ci } 1417bf215546Sopenharmony_ci } 1418bf215546Sopenharmony_ci } 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci physreg_t best_reg = 1421bf215546Sopenharmony_ci find_best_gap(ctx, file, reg, file_size, size, reg_elem_size(reg)); 1422bf215546Sopenharmony_ci if (best_reg != (physreg_t)~0u) { 1423bf215546Sopenharmony_ci return best_reg; 1424bf215546Sopenharmony_ci } 1425bf215546Sopenharmony_ci 1426bf215546Sopenharmony_ci /* Ok, we couldn't find anything that fits. Here is where we have to start 1427bf215546Sopenharmony_ci * moving things around to make stuff fit. First try solely evicting 1428bf215546Sopenharmony_ci * registers in the way. 1429bf215546Sopenharmony_ci */ 1430bf215546Sopenharmony_ci unsigned best_eviction_count = ~0; 1431bf215546Sopenharmony_ci for (physreg_t i = 0; i + size <= file_size; i += reg_elem_size(reg)) { 1432bf215546Sopenharmony_ci unsigned eviction_count; 1433bf215546Sopenharmony_ci if (try_evict_regs(ctx, file, reg, i, &eviction_count, false, true)) { 1434bf215546Sopenharmony_ci if (eviction_count < best_eviction_count) { 1435bf215546Sopenharmony_ci best_eviction_count = eviction_count; 1436bf215546Sopenharmony_ci best_reg = i; 1437bf215546Sopenharmony_ci } 1438bf215546Sopenharmony_ci } 1439bf215546Sopenharmony_ci } 1440bf215546Sopenharmony_ci 1441bf215546Sopenharmony_ci if (best_eviction_count != ~0) { 1442bf215546Sopenharmony_ci ASSERTED bool result = try_evict_regs( 1443bf215546Sopenharmony_ci ctx, file, reg, best_reg, &best_eviction_count, false, false); 1444bf215546Sopenharmony_ci assert(result); 1445bf215546Sopenharmony_ci return best_reg; 1446bf215546Sopenharmony_ci } 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_ci /* Use the dumb fallback only if try_evict_regs() fails. */ 1449bf215546Sopenharmony_ci return compress_regs_left(ctx, file, reg); 1450bf215546Sopenharmony_ci} 1451bf215546Sopenharmony_ci 1452bf215546Sopenharmony_cistatic void 1453bf215546Sopenharmony_ciassign_reg(struct ir3_instruction *instr, struct ir3_register *reg, 1454bf215546Sopenharmony_ci unsigned num) 1455bf215546Sopenharmony_ci{ 1456bf215546Sopenharmony_ci if (reg->flags & IR3_REG_ARRAY) { 1457bf215546Sopenharmony_ci reg->array.base = num; 1458bf215546Sopenharmony_ci if (reg->flags & IR3_REG_RELATIV) 1459bf215546Sopenharmony_ci reg->array.offset += num; 1460bf215546Sopenharmony_ci else 1461bf215546Sopenharmony_ci reg->num = num + reg->array.offset; 1462bf215546Sopenharmony_ci } else { 1463bf215546Sopenharmony_ci reg->num = num; 1464bf215546Sopenharmony_ci } 1465bf215546Sopenharmony_ci} 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_cistatic void 1468bf215546Sopenharmony_cimark_src_killed(struct ra_ctx *ctx, struct ir3_register *src) 1469bf215546Sopenharmony_ci{ 1470bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[src->def->name]; 1471bf215546Sopenharmony_ci 1472bf215546Sopenharmony_ci if (!(src->flags & IR3_REG_FIRST_KILL) || interval->is_killed || 1473bf215546Sopenharmony_ci interval->interval.parent || 1474bf215546Sopenharmony_ci !rb_tree_is_empty(&interval->interval.children)) 1475bf215546Sopenharmony_ci return; 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci ra_file_mark_killed(ra_get_file(ctx, src), interval); 1478bf215546Sopenharmony_ci} 1479bf215546Sopenharmony_ci 1480bf215546Sopenharmony_cistatic void 1481bf215546Sopenharmony_ciinsert_dst(struct ra_ctx *ctx, struct ir3_register *dst) 1482bf215546Sopenharmony_ci{ 1483bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, dst); 1484bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[dst->name]; 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci d("insert dst %u physreg %u", dst->name, ra_interval_get_physreg(interval)); 1487bf215546Sopenharmony_ci 1488bf215546Sopenharmony_ci if (!(dst->flags & IR3_REG_UNUSED)) 1489bf215546Sopenharmony_ci ra_file_insert(file, interval); 1490bf215546Sopenharmony_ci 1491bf215546Sopenharmony_ci assign_reg(dst->instr, dst, ra_interval_get_num(interval)); 1492bf215546Sopenharmony_ci} 1493bf215546Sopenharmony_ci 1494bf215546Sopenharmony_cistatic void 1495bf215546Sopenharmony_ciallocate_dst_fixed(struct ra_ctx *ctx, struct ir3_register *dst, 1496bf215546Sopenharmony_ci physreg_t physreg) 1497bf215546Sopenharmony_ci{ 1498bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, dst); 1499bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[dst->name]; 1500bf215546Sopenharmony_ci update_affinity(file, dst, physreg); 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci ra_interval_init(interval, dst); 1503bf215546Sopenharmony_ci interval->physreg_start = physreg; 1504bf215546Sopenharmony_ci interval->physreg_end = physreg + reg_size(dst); 1505bf215546Sopenharmony_ci} 1506bf215546Sopenharmony_ci 1507bf215546Sopenharmony_ci/* If a tied destination interferes with its source register, we have to insert 1508bf215546Sopenharmony_ci * a copy beforehand to copy the source to the destination. Because we are using 1509bf215546Sopenharmony_ci * the parallel_copies array and not creating a separate copy, this copy will 1510bf215546Sopenharmony_ci * happen in parallel with any shuffling around of the tied source, so we have 1511bf215546Sopenharmony_ci * to copy the source *as it exists before it is shuffled around*. We do this by 1512bf215546Sopenharmony_ci * inserting the copy early, before any other copies are inserted. We don't 1513bf215546Sopenharmony_ci * actually know the destination of the copy, but that's ok because the 1514bf215546Sopenharmony_ci * dst_interval will be filled out later. 1515bf215546Sopenharmony_ci */ 1516bf215546Sopenharmony_cistatic void 1517bf215546Sopenharmony_ciinsert_tied_dst_copy(struct ra_ctx *ctx, struct ir3_register *dst) 1518bf215546Sopenharmony_ci{ 1519bf215546Sopenharmony_ci struct ir3_register *tied = dst->tied; 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_ci if (!tied) 1522bf215546Sopenharmony_ci return; 1523bf215546Sopenharmony_ci 1524bf215546Sopenharmony_ci struct ra_interval *tied_interval = &ctx->intervals[tied->def->name]; 1525bf215546Sopenharmony_ci struct ra_interval *dst_interval = &ctx->intervals[dst->name]; 1526bf215546Sopenharmony_ci 1527bf215546Sopenharmony_ci if (tied_interval->is_killed) 1528bf215546Sopenharmony_ci return; 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci physreg_t tied_physreg = ra_interval_get_physreg(tied_interval); 1531bf215546Sopenharmony_ci 1532bf215546Sopenharmony_ci array_insert(ctx, ctx->parallel_copies, 1533bf215546Sopenharmony_ci (struct ra_parallel_copy){ 1534bf215546Sopenharmony_ci .interval = dst_interval, 1535bf215546Sopenharmony_ci .src = tied_physreg, 1536bf215546Sopenharmony_ci }); 1537bf215546Sopenharmony_ci} 1538bf215546Sopenharmony_ci 1539bf215546Sopenharmony_cistatic void 1540bf215546Sopenharmony_ciallocate_dst(struct ra_ctx *ctx, struct ir3_register *dst) 1541bf215546Sopenharmony_ci{ 1542bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, dst); 1543bf215546Sopenharmony_ci 1544bf215546Sopenharmony_ci struct ir3_register *tied = dst->tied; 1545bf215546Sopenharmony_ci if (tied) { 1546bf215546Sopenharmony_ci struct ra_interval *tied_interval = &ctx->intervals[tied->def->name]; 1547bf215546Sopenharmony_ci if (tied_interval->is_killed) { 1548bf215546Sopenharmony_ci /* The easy case: the source is killed, so we can just reuse it 1549bf215546Sopenharmony_ci * for the destination. 1550bf215546Sopenharmony_ci */ 1551bf215546Sopenharmony_ci allocate_dst_fixed(ctx, dst, ra_interval_get_physreg(tied_interval)); 1552bf215546Sopenharmony_ci return; 1553bf215546Sopenharmony_ci } 1554bf215546Sopenharmony_ci } 1555bf215546Sopenharmony_ci 1556bf215546Sopenharmony_ci /* All the hard work is done by get_reg here. */ 1557bf215546Sopenharmony_ci physreg_t physreg = get_reg(ctx, file, dst); 1558bf215546Sopenharmony_ci 1559bf215546Sopenharmony_ci allocate_dst_fixed(ctx, dst, physreg); 1560bf215546Sopenharmony_ci} 1561bf215546Sopenharmony_ci 1562bf215546Sopenharmony_cistatic void 1563bf215546Sopenharmony_ciassign_src(struct ra_ctx *ctx, struct ir3_instruction *instr, 1564bf215546Sopenharmony_ci struct ir3_register *src) 1565bf215546Sopenharmony_ci{ 1566bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[src->def->name]; 1567bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, src); 1568bf215546Sopenharmony_ci 1569bf215546Sopenharmony_ci struct ir3_register *tied = src->tied; 1570bf215546Sopenharmony_ci physreg_t physreg; 1571bf215546Sopenharmony_ci if (tied) { 1572bf215546Sopenharmony_ci struct ra_interval *tied_interval = &ctx->intervals[tied->name]; 1573bf215546Sopenharmony_ci physreg = ra_interval_get_physreg(tied_interval); 1574bf215546Sopenharmony_ci } else { 1575bf215546Sopenharmony_ci physreg = ra_interval_get_physreg(interval); 1576bf215546Sopenharmony_ci } 1577bf215546Sopenharmony_ci 1578bf215546Sopenharmony_ci assign_reg(instr, src, ra_physreg_to_num(physreg, src->flags)); 1579bf215546Sopenharmony_ci 1580bf215546Sopenharmony_ci if (src->flags & IR3_REG_FIRST_KILL) 1581bf215546Sopenharmony_ci ra_file_remove(file, interval); 1582bf215546Sopenharmony_ci} 1583bf215546Sopenharmony_ci 1584bf215546Sopenharmony_ci/* Insert a parallel copy instruction before the instruction with the parallel 1585bf215546Sopenharmony_ci * copy entries we've built up. 1586bf215546Sopenharmony_ci */ 1587bf215546Sopenharmony_cistatic void 1588bf215546Sopenharmony_ciinsert_parallel_copy_instr(struct ra_ctx *ctx, struct ir3_instruction *instr) 1589bf215546Sopenharmony_ci{ 1590bf215546Sopenharmony_ci if (ctx->parallel_copies_count == 0) 1591bf215546Sopenharmony_ci return; 1592bf215546Sopenharmony_ci 1593bf215546Sopenharmony_ci struct ir3_instruction *pcopy = 1594bf215546Sopenharmony_ci ir3_instr_create(instr->block, OPC_META_PARALLEL_COPY, 1595bf215546Sopenharmony_ci ctx->parallel_copies_count, ctx->parallel_copies_count); 1596bf215546Sopenharmony_ci 1597bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->parallel_copies_count; i++) { 1598bf215546Sopenharmony_ci struct ra_parallel_copy *entry = &ctx->parallel_copies[i]; 1599bf215546Sopenharmony_ci struct ir3_register *reg = 1600bf215546Sopenharmony_ci ir3_dst_create(pcopy, INVALID_REG, 1601bf215546Sopenharmony_ci entry->interval->interval.reg->flags & 1602bf215546Sopenharmony_ci (IR3_REG_HALF | IR3_REG_ARRAY)); 1603bf215546Sopenharmony_ci reg->size = entry->interval->interval.reg->size; 1604bf215546Sopenharmony_ci reg->wrmask = entry->interval->interval.reg->wrmask; 1605bf215546Sopenharmony_ci assign_reg(pcopy, reg, ra_interval_get_num(entry->interval)); 1606bf215546Sopenharmony_ci } 1607bf215546Sopenharmony_ci 1608bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->parallel_copies_count; i++) { 1609bf215546Sopenharmony_ci struct ra_parallel_copy *entry = &ctx->parallel_copies[i]; 1610bf215546Sopenharmony_ci struct ir3_register *reg = 1611bf215546Sopenharmony_ci ir3_src_create(pcopy, INVALID_REG, 1612bf215546Sopenharmony_ci entry->interval->interval.reg->flags & 1613bf215546Sopenharmony_ci (IR3_REG_HALF | IR3_REG_ARRAY)); 1614bf215546Sopenharmony_ci reg->size = entry->interval->interval.reg->size; 1615bf215546Sopenharmony_ci reg->wrmask = entry->interval->interval.reg->wrmask; 1616bf215546Sopenharmony_ci assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags)); 1617bf215546Sopenharmony_ci } 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci list_del(&pcopy->node); 1620bf215546Sopenharmony_ci list_addtail(&pcopy->node, &instr->node); 1621bf215546Sopenharmony_ci ctx->parallel_copies_count = 0; 1622bf215546Sopenharmony_ci} 1623bf215546Sopenharmony_ci 1624bf215546Sopenharmony_cistatic void 1625bf215546Sopenharmony_cihandle_normal_instr(struct ra_ctx *ctx, struct ir3_instruction *instr) 1626bf215546Sopenharmony_ci{ 1627bf215546Sopenharmony_ci /* First, mark sources as going-to-be-killed while allocating the dest. */ 1628bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1629bf215546Sopenharmony_ci mark_src_killed(ctx, src); 1630bf215546Sopenharmony_ci } 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_ci /* Pre-insert tied dst copies. */ 1633bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 1634bf215546Sopenharmony_ci insert_tied_dst_copy(ctx, dst); 1635bf215546Sopenharmony_ci } 1636bf215546Sopenharmony_ci 1637bf215546Sopenharmony_ci /* Allocate the destination. */ 1638bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 1639bf215546Sopenharmony_ci allocate_dst(ctx, dst); 1640bf215546Sopenharmony_ci } 1641bf215546Sopenharmony_ci 1642bf215546Sopenharmony_ci /* Now handle sources. Go backward so that in case there are multiple 1643bf215546Sopenharmony_ci * sources with the same def and that def is killed we only remove it at 1644bf215546Sopenharmony_ci * the end. 1645bf215546Sopenharmony_ci */ 1646bf215546Sopenharmony_ci ra_foreach_src_rev (src, instr) { 1647bf215546Sopenharmony_ci assign_src(ctx, instr, src); 1648bf215546Sopenharmony_ci } 1649bf215546Sopenharmony_ci 1650bf215546Sopenharmony_ci /* Now finally insert the destination into the map. */ 1651bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 1652bf215546Sopenharmony_ci insert_dst(ctx, dst); 1653bf215546Sopenharmony_ci } 1654bf215546Sopenharmony_ci 1655bf215546Sopenharmony_ci insert_parallel_copy_instr(ctx, instr); 1656bf215546Sopenharmony_ci} 1657bf215546Sopenharmony_ci 1658bf215546Sopenharmony_cistatic void 1659bf215546Sopenharmony_cihandle_split(struct ra_ctx *ctx, struct ir3_instruction *instr) 1660bf215546Sopenharmony_ci{ 1661bf215546Sopenharmony_ci struct ir3_register *dst = instr->dsts[0]; 1662bf215546Sopenharmony_ci struct ir3_register *src = instr->srcs[0]; 1663bf215546Sopenharmony_ci 1664bf215546Sopenharmony_ci if (dst->merge_set == NULL || src->def->merge_set != dst->merge_set) { 1665bf215546Sopenharmony_ci handle_normal_instr(ctx, instr); 1666bf215546Sopenharmony_ci return; 1667bf215546Sopenharmony_ci } 1668bf215546Sopenharmony_ci 1669bf215546Sopenharmony_ci struct ra_interval *src_interval = &ctx->intervals[src->def->name]; 1670bf215546Sopenharmony_ci 1671bf215546Sopenharmony_ci physreg_t physreg = ra_interval_get_physreg(src_interval); 1672bf215546Sopenharmony_ci assign_src(ctx, instr, src); 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_ci allocate_dst_fixed( 1675bf215546Sopenharmony_ci ctx, dst, physreg - src->def->merge_set_offset + dst->merge_set_offset); 1676bf215546Sopenharmony_ci insert_dst(ctx, dst); 1677bf215546Sopenharmony_ci} 1678bf215546Sopenharmony_ci 1679bf215546Sopenharmony_cistatic void 1680bf215546Sopenharmony_cihandle_collect(struct ra_ctx *ctx, struct ir3_instruction *instr) 1681bf215546Sopenharmony_ci{ 1682bf215546Sopenharmony_ci struct ir3_merge_set *dst_set = instr->dsts[0]->merge_set; 1683bf215546Sopenharmony_ci unsigned dst_offset = instr->dsts[0]->merge_set_offset; 1684bf215546Sopenharmony_ci 1685bf215546Sopenharmony_ci if (!dst_set || dst_set->regs_count == 1) { 1686bf215546Sopenharmony_ci handle_normal_instr(ctx, instr); 1687bf215546Sopenharmony_ci return; 1688bf215546Sopenharmony_ci } 1689bf215546Sopenharmony_ci 1690bf215546Sopenharmony_ci /* We need to check if any of the sources are contained in an interval 1691bf215546Sopenharmony_ci * that is at least as large as the vector. In this case, we should put 1692bf215546Sopenharmony_ci * the vector inside that larger interval. (There should be one 1693bf215546Sopenharmony_ci * unambiguous place to put it, because values sharing the same merge set 1694bf215546Sopenharmony_ci * should be allocated together.) This can happen in a case like: 1695bf215546Sopenharmony_ci * 1696bf215546Sopenharmony_ci * ssa_1 (wrmask=0xf) = ... 1697bf215546Sopenharmony_ci * ssa_2 = split ssa_1 off:0 1698bf215546Sopenharmony_ci * ssa_3 = split ssa_1 off:1 1699bf215546Sopenharmony_ci * ssa_4 (wrmask=0x3) = collect (kill)ssa_2, (kill)ssa_3 1700bf215546Sopenharmony_ci * ... = (kill)ssa_1 1701bf215546Sopenharmony_ci * ... = (kill)ssa_4 1702bf215546Sopenharmony_ci * 1703bf215546Sopenharmony_ci * ssa_4 will be coalesced with ssa_1 and needs to be allocated inside it. 1704bf215546Sopenharmony_ci */ 1705bf215546Sopenharmony_ci physreg_t dst_fixed = (physreg_t)~0u; 1706bf215546Sopenharmony_ci 1707bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1708bf215546Sopenharmony_ci if (src->flags & IR3_REG_FIRST_KILL) { 1709bf215546Sopenharmony_ci mark_src_killed(ctx, src); 1710bf215546Sopenharmony_ci } 1711bf215546Sopenharmony_ci 1712bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[src->def->name]; 1713bf215546Sopenharmony_ci 1714bf215546Sopenharmony_ci if (src->def->merge_set != dst_set || interval->is_killed) 1715bf215546Sopenharmony_ci continue; 1716bf215546Sopenharmony_ci while (interval->interval.parent != NULL) { 1717bf215546Sopenharmony_ci interval = ir3_reg_interval_to_ra_interval(interval->interval.parent); 1718bf215546Sopenharmony_ci } 1719bf215546Sopenharmony_ci if (reg_size(interval->interval.reg) >= reg_size(instr->dsts[0])) { 1720bf215546Sopenharmony_ci dst_fixed = interval->physreg_start - 1721bf215546Sopenharmony_ci interval->interval.reg->merge_set_offset + dst_offset; 1722bf215546Sopenharmony_ci } else { 1723bf215546Sopenharmony_ci /* For sources whose root interval is smaller than the 1724bf215546Sopenharmony_ci * destination (i.e. the normal case), we will shuffle them 1725bf215546Sopenharmony_ci * around after allocating the destination. Mark them killed so 1726bf215546Sopenharmony_ci * that the destination can be allocated over them, even if they 1727bf215546Sopenharmony_ci * aren't actually killed. 1728bf215546Sopenharmony_ci */ 1729bf215546Sopenharmony_ci ra_file_mark_killed(ra_get_file(ctx, src), interval); 1730bf215546Sopenharmony_ci } 1731bf215546Sopenharmony_ci } 1732bf215546Sopenharmony_ci 1733bf215546Sopenharmony_ci if (dst_fixed != (physreg_t)~0u) 1734bf215546Sopenharmony_ci allocate_dst_fixed(ctx, instr->dsts[0], dst_fixed); 1735bf215546Sopenharmony_ci else 1736bf215546Sopenharmony_ci allocate_dst(ctx, instr->dsts[0]); 1737bf215546Sopenharmony_ci 1738bf215546Sopenharmony_ci /* Remove the temporary is_killed we added */ 1739bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1740bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[src->def->name]; 1741bf215546Sopenharmony_ci while (interval->interval.parent != NULL) { 1742bf215546Sopenharmony_ci interval = ir3_reg_interval_to_ra_interval(interval->interval.parent); 1743bf215546Sopenharmony_ci } 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci /* Filter out cases where it actually should be killed */ 1746bf215546Sopenharmony_ci if (interval != &ctx->intervals[src->def->name] || 1747bf215546Sopenharmony_ci !(src->flags & IR3_REG_KILL)) { 1748bf215546Sopenharmony_ci ra_file_unmark_killed(ra_get_file(ctx, src), interval); 1749bf215546Sopenharmony_ci } 1750bf215546Sopenharmony_ci } 1751bf215546Sopenharmony_ci 1752bf215546Sopenharmony_ci ra_foreach_src_rev (src, instr) { 1753bf215546Sopenharmony_ci assign_src(ctx, instr, src); 1754bf215546Sopenharmony_ci } 1755bf215546Sopenharmony_ci 1756bf215546Sopenharmony_ci /* We need to do this before insert_dst(), so that children of the 1757bf215546Sopenharmony_ci * destination which got marked as killed and then shuffled around to make 1758bf215546Sopenharmony_ci * space for the destination have the correct pcopy destination that 1759bf215546Sopenharmony_ci * matches what we assign the source of the collect to in assign_src(). 1760bf215546Sopenharmony_ci * 1761bf215546Sopenharmony_ci * TODO: In this case we'll wind up copying the value in the pcopy and 1762bf215546Sopenharmony_ci * then again in the collect. We could avoid one of those by updating the 1763bf215546Sopenharmony_ci * pcopy destination to match up with the final location of the source 1764bf215546Sopenharmony_ci * after the collect and making the collect a no-op. However this doesn't 1765bf215546Sopenharmony_ci * seem to happen often. 1766bf215546Sopenharmony_ci */ 1767bf215546Sopenharmony_ci insert_parallel_copy_instr(ctx, instr); 1768bf215546Sopenharmony_ci 1769bf215546Sopenharmony_ci /* Note: insert_dst will automatically shuffle around any intervals that 1770bf215546Sopenharmony_ci * are a child of the collect by making them children of the collect. 1771bf215546Sopenharmony_ci */ 1772bf215546Sopenharmony_ci 1773bf215546Sopenharmony_ci insert_dst(ctx, instr->dsts[0]); 1774bf215546Sopenharmony_ci} 1775bf215546Sopenharmony_ci 1776bf215546Sopenharmony_ci/* Parallel copies before RA should only be at the end of the block, for 1777bf215546Sopenharmony_ci * phi's. For these we only need to fill in the sources, and then we fill in 1778bf215546Sopenharmony_ci * the destinations in the successor block. 1779bf215546Sopenharmony_ci */ 1780bf215546Sopenharmony_cistatic void 1781bf215546Sopenharmony_cihandle_pcopy(struct ra_ctx *ctx, struct ir3_instruction *instr) 1782bf215546Sopenharmony_ci{ 1783bf215546Sopenharmony_ci ra_foreach_src_rev (src, instr) { 1784bf215546Sopenharmony_ci assign_src(ctx, instr, src); 1785bf215546Sopenharmony_ci } 1786bf215546Sopenharmony_ci} 1787bf215546Sopenharmony_ci 1788bf215546Sopenharmony_ci/* Some inputs may need to be precolored. We need to handle those first, so 1789bf215546Sopenharmony_ci * that other non-precolored inputs don't accidentally get allocated over 1790bf215546Sopenharmony_ci * them. Inputs are the very first thing in the shader, so it shouldn't be a 1791bf215546Sopenharmony_ci * problem to allocate them to a specific physreg. 1792bf215546Sopenharmony_ci */ 1793bf215546Sopenharmony_ci 1794bf215546Sopenharmony_cistatic void 1795bf215546Sopenharmony_cihandle_precolored_input(struct ra_ctx *ctx, struct ir3_instruction *instr) 1796bf215546Sopenharmony_ci{ 1797bf215546Sopenharmony_ci if (instr->dsts[0]->num == INVALID_REG) 1798bf215546Sopenharmony_ci return; 1799bf215546Sopenharmony_ci 1800bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, instr->dsts[0]); 1801bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name]; 1802bf215546Sopenharmony_ci physreg_t physreg = ra_reg_get_physreg(instr->dsts[0]); 1803bf215546Sopenharmony_ci allocate_dst_fixed(ctx, instr->dsts[0], physreg); 1804bf215546Sopenharmony_ci 1805bf215546Sopenharmony_ci d("insert precolored dst %u physreg %u", instr->dsts[0]->name, 1806bf215546Sopenharmony_ci ra_interval_get_physreg(interval)); 1807bf215546Sopenharmony_ci 1808bf215546Sopenharmony_ci ra_file_insert(file, interval); 1809bf215546Sopenharmony_ci interval->frozen = true; 1810bf215546Sopenharmony_ci} 1811bf215546Sopenharmony_ci 1812bf215546Sopenharmony_cistatic void 1813bf215546Sopenharmony_cihandle_input(struct ra_ctx *ctx, struct ir3_instruction *instr) 1814bf215546Sopenharmony_ci{ 1815bf215546Sopenharmony_ci if (instr->dsts[0]->num != INVALID_REG) 1816bf215546Sopenharmony_ci return; 1817bf215546Sopenharmony_ci 1818bf215546Sopenharmony_ci allocate_dst(ctx, instr->dsts[0]); 1819bf215546Sopenharmony_ci 1820bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, instr->dsts[0]); 1821bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name]; 1822bf215546Sopenharmony_ci ra_file_insert(file, interval); 1823bf215546Sopenharmony_ci} 1824bf215546Sopenharmony_ci 1825bf215546Sopenharmony_cistatic void 1826bf215546Sopenharmony_ciassign_input(struct ra_ctx *ctx, struct ir3_instruction *instr) 1827bf215546Sopenharmony_ci{ 1828bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[instr->dsts[0]->name]; 1829bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, instr->dsts[0]); 1830bf215546Sopenharmony_ci 1831bf215546Sopenharmony_ci if (instr->dsts[0]->num == INVALID_REG) { 1832bf215546Sopenharmony_ci assign_reg(instr, instr->dsts[0], ra_interval_get_num(interval)); 1833bf215546Sopenharmony_ci } else { 1834bf215546Sopenharmony_ci interval->frozen = false; 1835bf215546Sopenharmony_ci } 1836bf215546Sopenharmony_ci 1837bf215546Sopenharmony_ci if (instr->dsts[0]->flags & IR3_REG_UNUSED) 1838bf215546Sopenharmony_ci ra_file_remove(file, interval); 1839bf215546Sopenharmony_ci 1840bf215546Sopenharmony_ci ra_foreach_src_rev (src, instr) 1841bf215546Sopenharmony_ci assign_src(ctx, instr, src); 1842bf215546Sopenharmony_ci} 1843bf215546Sopenharmony_ci 1844bf215546Sopenharmony_ci/* chmask is a bit weird, because it has pre-colored sources due to the need 1845bf215546Sopenharmony_ci * to pass some registers to the next stage. Fortunately there are only at 1846bf215546Sopenharmony_ci * most two, and there should be no other live values by the time we get to 1847bf215546Sopenharmony_ci * this instruction, so we only have to do the minimum and don't need any 1848bf215546Sopenharmony_ci * fancy fallbacks. 1849bf215546Sopenharmony_ci * 1850bf215546Sopenharmony_ci * TODO: Add more complete handling of precolored sources, e.g. for function 1851bf215546Sopenharmony_ci * argument handling. We'd need a way to mark sources as fixed so that they 1852bf215546Sopenharmony_ci * don't get moved around when placing other sources in the fallback case, and 1853bf215546Sopenharmony_ci * a duplication of much of the logic in get_reg(). This also opens another 1854bf215546Sopenharmony_ci * can of worms, e.g. what if the precolored source is a split of a vector 1855bf215546Sopenharmony_ci * which is still live -- this breaks our assumption that splits don't incur 1856bf215546Sopenharmony_ci * any "extra" register requirements and we'd have to break it out of the 1857bf215546Sopenharmony_ci * parent ra_interval. 1858bf215546Sopenharmony_ci */ 1859bf215546Sopenharmony_ci 1860bf215546Sopenharmony_cistatic void 1861bf215546Sopenharmony_cihandle_precolored_source(struct ra_ctx *ctx, struct ir3_register *src) 1862bf215546Sopenharmony_ci{ 1863bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, src); 1864bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[src->def->name]; 1865bf215546Sopenharmony_ci physreg_t physreg = ra_reg_get_physreg(src); 1866bf215546Sopenharmony_ci 1867bf215546Sopenharmony_ci if (ra_interval_get_num(interval) == src->num) 1868bf215546Sopenharmony_ci return; 1869bf215546Sopenharmony_ci 1870bf215546Sopenharmony_ci /* Try evicting stuff in our way if it isn't free. This won't move 1871bf215546Sopenharmony_ci * anything unless it overlaps with our precolored physreg, so we don't 1872bf215546Sopenharmony_ci * have to worry about evicting other precolored sources. 1873bf215546Sopenharmony_ci */ 1874bf215546Sopenharmony_ci if (!get_reg_specified(ctx, file, src, physreg, true)) { 1875bf215546Sopenharmony_ci unsigned eviction_count; 1876bf215546Sopenharmony_ci if (!try_evict_regs(ctx, file, src, physreg, &eviction_count, true, 1877bf215546Sopenharmony_ci false)) { 1878bf215546Sopenharmony_ci unreachable("failed to evict for precolored source!"); 1879bf215546Sopenharmony_ci return; 1880bf215546Sopenharmony_ci } 1881bf215546Sopenharmony_ci } 1882bf215546Sopenharmony_ci 1883bf215546Sopenharmony_ci ra_move_interval(ctx, file, interval, physreg); 1884bf215546Sopenharmony_ci} 1885bf215546Sopenharmony_ci 1886bf215546Sopenharmony_cistatic void 1887bf215546Sopenharmony_cihandle_chmask(struct ra_ctx *ctx, struct ir3_instruction *instr) 1888bf215546Sopenharmony_ci{ 1889bf215546Sopenharmony_ci /* Note: we purposely don't mark sources as killed, so that we can reuse 1890bf215546Sopenharmony_ci * some of the get_reg() machinery as-if the source is a destination. 1891bf215546Sopenharmony_ci * Marking it as killed would make e.g. get_reg_specified() wouldn't work 1892bf215546Sopenharmony_ci * correctly. 1893bf215546Sopenharmony_ci */ 1894bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1895bf215546Sopenharmony_ci assert(src->num != INVALID_REG); 1896bf215546Sopenharmony_ci handle_precolored_source(ctx, src); 1897bf215546Sopenharmony_ci } 1898bf215546Sopenharmony_ci 1899bf215546Sopenharmony_ci ra_foreach_src (src, instr) { 1900bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, src); 1901bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[src->def->name]; 1902bf215546Sopenharmony_ci if (src->flags & IR3_REG_FIRST_KILL) 1903bf215546Sopenharmony_ci ra_file_remove(file, interval); 1904bf215546Sopenharmony_ci } 1905bf215546Sopenharmony_ci 1906bf215546Sopenharmony_ci insert_parallel_copy_instr(ctx, instr); 1907bf215546Sopenharmony_ci} 1908bf215546Sopenharmony_ci 1909bf215546Sopenharmony_cistatic physreg_t 1910bf215546Sopenharmony_ciread_register(struct ra_ctx *ctx, struct ir3_block *block, 1911bf215546Sopenharmony_ci struct ir3_register *def) 1912bf215546Sopenharmony_ci{ 1913bf215546Sopenharmony_ci struct ra_block_state *state = &ctx->blocks[block->index]; 1914bf215546Sopenharmony_ci if (state->renames) { 1915bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search(state->renames, def); 1916bf215546Sopenharmony_ci if (entry) { 1917bf215546Sopenharmony_ci return (physreg_t)(uintptr_t)entry->data; 1918bf215546Sopenharmony_ci } 1919bf215546Sopenharmony_ci } 1920bf215546Sopenharmony_ci 1921bf215546Sopenharmony_ci return ra_reg_get_physreg(def); 1922bf215546Sopenharmony_ci} 1923bf215546Sopenharmony_ci 1924bf215546Sopenharmony_cistatic void 1925bf215546Sopenharmony_cihandle_live_in(struct ra_ctx *ctx, struct ir3_register *def) 1926bf215546Sopenharmony_ci{ 1927bf215546Sopenharmony_ci physreg_t physreg = ~0; 1928bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->block->predecessors_count; i++) { 1929bf215546Sopenharmony_ci struct ir3_block *pred = ctx->block->predecessors[i]; 1930bf215546Sopenharmony_ci struct ra_block_state *pred_state = &ctx->blocks[pred->index]; 1931bf215546Sopenharmony_ci 1932bf215546Sopenharmony_ci if (!pred_state->visited) 1933bf215546Sopenharmony_ci continue; 1934bf215546Sopenharmony_ci 1935bf215546Sopenharmony_ci physreg = read_register(ctx, pred, def); 1936bf215546Sopenharmony_ci break; 1937bf215546Sopenharmony_ci } 1938bf215546Sopenharmony_ci 1939bf215546Sopenharmony_ci assert(physreg != (physreg_t)~0); 1940bf215546Sopenharmony_ci 1941bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[def->name]; 1942bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, def); 1943bf215546Sopenharmony_ci ra_interval_init(interval, def); 1944bf215546Sopenharmony_ci interval->physreg_start = physreg; 1945bf215546Sopenharmony_ci interval->physreg_end = physreg + reg_size(def); 1946bf215546Sopenharmony_ci ra_file_insert(file, interval); 1947bf215546Sopenharmony_ci} 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_cistatic void 1950bf215546Sopenharmony_cihandle_live_out(struct ra_ctx *ctx, struct ir3_register *def) 1951bf215546Sopenharmony_ci{ 1952bf215546Sopenharmony_ci /* Skip parallelcopy's which in the original program are only used as phi 1953bf215546Sopenharmony_ci * arguments. Even though phi arguments are live out, they are only 1954bf215546Sopenharmony_ci * assigned when the phi is. 1955bf215546Sopenharmony_ci */ 1956bf215546Sopenharmony_ci if (def->instr->opc == OPC_META_PARALLEL_COPY) 1957bf215546Sopenharmony_ci return; 1958bf215546Sopenharmony_ci 1959bf215546Sopenharmony_ci struct ra_block_state *state = &ctx->blocks[ctx->block->index]; 1960bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[def->name]; 1961bf215546Sopenharmony_ci physreg_t physreg = ra_interval_get_physreg(interval); 1962bf215546Sopenharmony_ci if (physreg != ra_reg_get_physreg(def)) { 1963bf215546Sopenharmony_ci if (!state->renames) 1964bf215546Sopenharmony_ci state->renames = _mesa_pointer_hash_table_create(ctx); 1965bf215546Sopenharmony_ci _mesa_hash_table_insert(state->renames, def, (void *)(uintptr_t)physreg); 1966bf215546Sopenharmony_ci } 1967bf215546Sopenharmony_ci} 1968bf215546Sopenharmony_ci 1969bf215546Sopenharmony_cistatic void 1970bf215546Sopenharmony_cihandle_phi(struct ra_ctx *ctx, struct ir3_register *def) 1971bf215546Sopenharmony_ci{ 1972bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, def); 1973bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[def->name]; 1974bf215546Sopenharmony_ci 1975bf215546Sopenharmony_ci /* phis are always scalar, so they should already be the smallest possible 1976bf215546Sopenharmony_ci * size. However they may be coalesced with other live-in values/phi 1977bf215546Sopenharmony_ci * nodes, so check for that here. 1978bf215546Sopenharmony_ci */ 1979bf215546Sopenharmony_ci struct ir3_reg_interval *parent_ir3 = 1980bf215546Sopenharmony_ci ir3_reg_interval_search(&file->reg_ctx.intervals, def->interval_start); 1981bf215546Sopenharmony_ci physreg_t physreg; 1982bf215546Sopenharmony_ci if (parent_ir3) { 1983bf215546Sopenharmony_ci struct ra_interval *parent = ir3_reg_interval_to_ra_interval(parent_ir3); 1984bf215546Sopenharmony_ci physreg = ra_interval_get_physreg(parent) + 1985bf215546Sopenharmony_ci (def->interval_start - parent_ir3->reg->interval_start); 1986bf215546Sopenharmony_ci } else { 1987bf215546Sopenharmony_ci physreg = get_reg(ctx, file, def); 1988bf215546Sopenharmony_ci } 1989bf215546Sopenharmony_ci 1990bf215546Sopenharmony_ci allocate_dst_fixed(ctx, def, physreg); 1991bf215546Sopenharmony_ci 1992bf215546Sopenharmony_ci ra_file_insert(file, interval); 1993bf215546Sopenharmony_ci} 1994bf215546Sopenharmony_ci 1995bf215546Sopenharmony_cistatic void 1996bf215546Sopenharmony_ciassign_phi(struct ra_ctx *ctx, struct ir3_instruction *phi) 1997bf215546Sopenharmony_ci{ 1998bf215546Sopenharmony_ci struct ra_file *file = ra_get_file(ctx, phi->dsts[0]); 1999bf215546Sopenharmony_ci struct ra_interval *interval = &ctx->intervals[phi->dsts[0]->name]; 2000bf215546Sopenharmony_ci assert(!interval->interval.parent); 2001bf215546Sopenharmony_ci unsigned num = ra_interval_get_num(interval); 2002bf215546Sopenharmony_ci assign_reg(phi, phi->dsts[0], num); 2003bf215546Sopenharmony_ci 2004bf215546Sopenharmony_ci /* Assign the parallelcopy sources of this phi */ 2005bf215546Sopenharmony_ci for (unsigned i = 0; i < phi->srcs_count; i++) { 2006bf215546Sopenharmony_ci if (phi->srcs[i]->def) { 2007bf215546Sopenharmony_ci assign_reg(phi, phi->srcs[i], num); 2008bf215546Sopenharmony_ci assign_reg(phi, phi->srcs[i]->def, num); 2009bf215546Sopenharmony_ci } 2010bf215546Sopenharmony_ci } 2011bf215546Sopenharmony_ci 2012bf215546Sopenharmony_ci if (phi->dsts[0]->flags & IR3_REG_UNUSED) 2013bf215546Sopenharmony_ci ra_file_remove(file, interval); 2014bf215546Sopenharmony_ci} 2015bf215546Sopenharmony_ci 2016bf215546Sopenharmony_ci/* When we split a live range, we sometimes need to emit fixup code at the end 2017bf215546Sopenharmony_ci * of a block. For example, something like: 2018bf215546Sopenharmony_ci * 2019bf215546Sopenharmony_ci * a = ... 2020bf215546Sopenharmony_ci * if (...) { 2021bf215546Sopenharmony_ci * ... 2022bf215546Sopenharmony_ci * a' = a 2023bf215546Sopenharmony_ci * b = ... // a evicted to make room for b 2024bf215546Sopenharmony_ci * ... 2025bf215546Sopenharmony_ci * } 2026bf215546Sopenharmony_ci * ... = a 2027bf215546Sopenharmony_ci * 2028bf215546Sopenharmony_ci * When we insert the copy to a' in insert_parallel_copy_instr(), this forces 2029bf215546Sopenharmony_ci * to insert another copy "a = a'" at the end of the if. Normally this would 2030bf215546Sopenharmony_ci * also entail adding a phi node, but since we're about to go out of SSA 2031bf215546Sopenharmony_ci * anyway we just insert an extra move. Note, however, that "b" might be used 2032bf215546Sopenharmony_ci * in a phi node at the end of the if and share registers with "a", so we 2033bf215546Sopenharmony_ci * have to be careful to extend any preexisting parallelcopy instruction 2034bf215546Sopenharmony_ci * instead of creating our own in order to guarantee that they properly get 2035bf215546Sopenharmony_ci * swapped. 2036bf215546Sopenharmony_ci */ 2037bf215546Sopenharmony_ci 2038bf215546Sopenharmony_cistatic void 2039bf215546Sopenharmony_ciinsert_liveout_copy(struct ir3_block *block, physreg_t dst, physreg_t src, 2040bf215546Sopenharmony_ci struct ir3_register *reg) 2041bf215546Sopenharmony_ci{ 2042bf215546Sopenharmony_ci struct ir3_instruction *old_pcopy = NULL; 2043bf215546Sopenharmony_ci if (!list_is_empty(&block->instr_list)) { 2044bf215546Sopenharmony_ci struct ir3_instruction *last = 2045bf215546Sopenharmony_ci list_entry(block->instr_list.prev, struct ir3_instruction, node); 2046bf215546Sopenharmony_ci if (last->opc == OPC_META_PARALLEL_COPY) 2047bf215546Sopenharmony_ci old_pcopy = last; 2048bf215546Sopenharmony_ci } 2049bf215546Sopenharmony_ci 2050bf215546Sopenharmony_ci unsigned old_pcopy_srcs = old_pcopy ? old_pcopy->srcs_count : 0; 2051bf215546Sopenharmony_ci struct ir3_instruction *pcopy = ir3_instr_create( 2052bf215546Sopenharmony_ci block, OPC_META_PARALLEL_COPY, old_pcopy_srcs + 1, old_pcopy_srcs + 1); 2053bf215546Sopenharmony_ci 2054bf215546Sopenharmony_ci for (unsigned i = 0; i < old_pcopy_srcs; i++) { 2055bf215546Sopenharmony_ci old_pcopy->dsts[i]->instr = pcopy; 2056bf215546Sopenharmony_ci pcopy->dsts[pcopy->dsts_count++] = old_pcopy->dsts[i]; 2057bf215546Sopenharmony_ci } 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci unsigned flags = reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY); 2060bf215546Sopenharmony_ci 2061bf215546Sopenharmony_ci struct ir3_register *dst_reg = ir3_dst_create(pcopy, INVALID_REG, flags); 2062bf215546Sopenharmony_ci dst_reg->wrmask = reg->wrmask; 2063bf215546Sopenharmony_ci dst_reg->size = reg->size; 2064bf215546Sopenharmony_ci assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags)); 2065bf215546Sopenharmony_ci 2066bf215546Sopenharmony_ci for (unsigned i = 0; i < old_pcopy_srcs; i++) { 2067bf215546Sopenharmony_ci pcopy->srcs[pcopy->srcs_count++] = old_pcopy->srcs[i]; 2068bf215546Sopenharmony_ci } 2069bf215546Sopenharmony_ci 2070bf215546Sopenharmony_ci struct ir3_register *src_reg = ir3_src_create(pcopy, INVALID_REG, flags); 2071bf215546Sopenharmony_ci src_reg->wrmask = reg->wrmask; 2072bf215546Sopenharmony_ci src_reg->size = reg->size; 2073bf215546Sopenharmony_ci assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags)); 2074bf215546Sopenharmony_ci 2075bf215546Sopenharmony_ci if (old_pcopy) 2076bf215546Sopenharmony_ci list_del(&old_pcopy->node); 2077bf215546Sopenharmony_ci} 2078bf215546Sopenharmony_ci 2079bf215546Sopenharmony_cistatic void 2080bf215546Sopenharmony_ciinsert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval) 2081bf215546Sopenharmony_ci{ 2082bf215546Sopenharmony_ci physreg_t physreg = ra_interval_get_physreg(interval); 2083bf215546Sopenharmony_ci 2084bf215546Sopenharmony_ci bool shared = interval->interval.reg->flags & IR3_REG_SHARED; 2085bf215546Sopenharmony_ci struct ir3_block **predecessors = 2086bf215546Sopenharmony_ci shared ? ctx->block->physical_predecessors : ctx->block->predecessors; 2087bf215546Sopenharmony_ci unsigned predecessors_count = shared 2088bf215546Sopenharmony_ci ? ctx->block->physical_predecessors_count 2089bf215546Sopenharmony_ci : ctx->block->predecessors_count; 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_ci for (unsigned i = 0; i < predecessors_count; i++) { 2092bf215546Sopenharmony_ci struct ir3_block *pred = predecessors[i]; 2093bf215546Sopenharmony_ci struct ra_block_state *pred_state = &ctx->blocks[pred->index]; 2094bf215546Sopenharmony_ci 2095bf215546Sopenharmony_ci if (!pred_state->visited) 2096bf215546Sopenharmony_ci continue; 2097bf215546Sopenharmony_ci 2098bf215546Sopenharmony_ci physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg); 2099bf215546Sopenharmony_ci if (pred_reg != physreg) { 2100bf215546Sopenharmony_ci insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg); 2101bf215546Sopenharmony_ci 2102bf215546Sopenharmony_ci /* This is a bit tricky, but when visiting the destination of a 2103bf215546Sopenharmony_ci * physical-only edge, we have two predecessors (the if and the 2104bf215546Sopenharmony_ci * header block) and both have multiple successors. We pick the 2105bf215546Sopenharmony_ci * register for all live-ins from the normal edge, which should 2106bf215546Sopenharmony_ci * guarantee that there's no need for shuffling things around in 2107bf215546Sopenharmony_ci * the normal predecessor as long as there are no phi nodes, but 2108bf215546Sopenharmony_ci * we still may need to insert fixup code in the physical 2109bf215546Sopenharmony_ci * predecessor (i.e. the last block of the if) and that has 2110bf215546Sopenharmony_ci * another successor (the block after the if) so we need to update 2111bf215546Sopenharmony_ci * the renames state for when we process the other successor. This 2112bf215546Sopenharmony_ci * crucially depends on the other successor getting processed 2113bf215546Sopenharmony_ci * after this. 2114bf215546Sopenharmony_ci * 2115bf215546Sopenharmony_ci * For normal (non-physical) edges we disallow critical edges so 2116bf215546Sopenharmony_ci * that hacks like this aren't necessary. 2117bf215546Sopenharmony_ci */ 2118bf215546Sopenharmony_ci if (!pred_state->renames) 2119bf215546Sopenharmony_ci pred_state->renames = _mesa_pointer_hash_table_create(ctx); 2120bf215546Sopenharmony_ci _mesa_hash_table_insert(pred_state->renames, interval->interval.reg, 2121bf215546Sopenharmony_ci (void *)(uintptr_t)physreg); 2122bf215546Sopenharmony_ci } 2123bf215546Sopenharmony_ci } 2124bf215546Sopenharmony_ci} 2125bf215546Sopenharmony_ci 2126bf215546Sopenharmony_cistatic void 2127bf215546Sopenharmony_ciinsert_file_live_in_moves(struct ra_ctx *ctx, struct ra_file *file) 2128bf215546Sopenharmony_ci{ 2129bf215546Sopenharmony_ci BITSET_WORD *live_in = ctx->live->live_in[ctx->block->index]; 2130bf215546Sopenharmony_ci rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals, 2131bf215546Sopenharmony_ci physreg_node) { 2132bf215546Sopenharmony_ci /* Skip phi nodes. This needs to happen after phi nodes are allocated, 2133bf215546Sopenharmony_ci * because we may have to move live-ins around to make space for phi 2134bf215546Sopenharmony_ci * nodes, but we shouldn't be handling phi nodes here. 2135bf215546Sopenharmony_ci */ 2136bf215546Sopenharmony_ci if (BITSET_TEST(live_in, interval->interval.reg->name)) 2137bf215546Sopenharmony_ci insert_live_in_move(ctx, interval); 2138bf215546Sopenharmony_ci } 2139bf215546Sopenharmony_ci} 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_cistatic void 2142bf215546Sopenharmony_ciinsert_entry_regs(struct ra_block_state *state, struct ra_file *file) 2143bf215546Sopenharmony_ci{ 2144bf215546Sopenharmony_ci rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals, 2145bf215546Sopenharmony_ci physreg_node) { 2146bf215546Sopenharmony_ci _mesa_hash_table_insert(state->entry_regs, interval->interval.reg, 2147bf215546Sopenharmony_ci (void *)(uintptr_t)interval->physreg_start); 2148bf215546Sopenharmony_ci } 2149bf215546Sopenharmony_ci} 2150bf215546Sopenharmony_ci 2151bf215546Sopenharmony_cistatic void 2152bf215546Sopenharmony_ciinsert_live_in_moves(struct ra_ctx *ctx) 2153bf215546Sopenharmony_ci{ 2154bf215546Sopenharmony_ci insert_file_live_in_moves(ctx, &ctx->full); 2155bf215546Sopenharmony_ci insert_file_live_in_moves(ctx, &ctx->half); 2156bf215546Sopenharmony_ci insert_file_live_in_moves(ctx, &ctx->shared); 2157bf215546Sopenharmony_ci 2158bf215546Sopenharmony_ci /* If not all predecessors are visited, insert live-in regs so that 2159bf215546Sopenharmony_ci * insert_live_out_moves() will work. 2160bf215546Sopenharmony_ci */ 2161bf215546Sopenharmony_ci bool all_preds_visited = true; 2162bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->block->predecessors_count; i++) { 2163bf215546Sopenharmony_ci if (!ctx->blocks[ctx->block->predecessors[i]->index].visited) { 2164bf215546Sopenharmony_ci all_preds_visited = false; 2165bf215546Sopenharmony_ci break; 2166bf215546Sopenharmony_ci } 2167bf215546Sopenharmony_ci } 2168bf215546Sopenharmony_ci 2169bf215546Sopenharmony_ci if (!all_preds_visited) { 2170bf215546Sopenharmony_ci struct ra_block_state *state = &ctx->blocks[ctx->block->index]; 2171bf215546Sopenharmony_ci state->entry_regs = _mesa_pointer_hash_table_create(ctx); 2172bf215546Sopenharmony_ci 2173bf215546Sopenharmony_ci insert_entry_regs(state, &ctx->full); 2174bf215546Sopenharmony_ci insert_entry_regs(state, &ctx->half); 2175bf215546Sopenharmony_ci insert_entry_regs(state, &ctx->shared); 2176bf215546Sopenharmony_ci } 2177bf215546Sopenharmony_ci} 2178bf215546Sopenharmony_ci 2179bf215546Sopenharmony_cistatic void 2180bf215546Sopenharmony_ciinsert_live_out_move(struct ra_ctx *ctx, struct ra_interval *interval) 2181bf215546Sopenharmony_ci{ 2182bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 2183bf215546Sopenharmony_ci if (!ctx->block->successors[i]) 2184bf215546Sopenharmony_ci continue; 2185bf215546Sopenharmony_ci 2186bf215546Sopenharmony_ci struct ir3_block *succ = ctx->block->successors[i]; 2187bf215546Sopenharmony_ci struct ra_block_state *succ_state = &ctx->blocks[succ->index]; 2188bf215546Sopenharmony_ci 2189bf215546Sopenharmony_ci if (!succ_state->visited) 2190bf215546Sopenharmony_ci continue; 2191bf215546Sopenharmony_ci 2192bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search( 2193bf215546Sopenharmony_ci succ_state->entry_regs, interval->interval.reg); 2194bf215546Sopenharmony_ci if (!entry) 2195bf215546Sopenharmony_ci continue; 2196bf215546Sopenharmony_ci 2197bf215546Sopenharmony_ci physreg_t new_reg = (physreg_t)(uintptr_t)entry->data; 2198bf215546Sopenharmony_ci if (new_reg != interval->physreg_start) { 2199bf215546Sopenharmony_ci insert_liveout_copy(ctx->block, new_reg, interval->physreg_start, 2200bf215546Sopenharmony_ci interval->interval.reg); 2201bf215546Sopenharmony_ci } 2202bf215546Sopenharmony_ci } 2203bf215546Sopenharmony_ci} 2204bf215546Sopenharmony_ci 2205bf215546Sopenharmony_cistatic void 2206bf215546Sopenharmony_ciinsert_file_live_out_moves(struct ra_ctx *ctx, struct ra_file *file) 2207bf215546Sopenharmony_ci{ 2208bf215546Sopenharmony_ci rb_tree_foreach (struct ra_interval, interval, &file->physreg_intervals, 2209bf215546Sopenharmony_ci physreg_node) { 2210bf215546Sopenharmony_ci insert_live_out_move(ctx, interval); 2211bf215546Sopenharmony_ci } 2212bf215546Sopenharmony_ci} 2213bf215546Sopenharmony_ci 2214bf215546Sopenharmony_cistatic void 2215bf215546Sopenharmony_ciinsert_live_out_moves(struct ra_ctx *ctx) 2216bf215546Sopenharmony_ci{ 2217bf215546Sopenharmony_ci insert_file_live_out_moves(ctx, &ctx->full); 2218bf215546Sopenharmony_ci insert_file_live_out_moves(ctx, &ctx->half); 2219bf215546Sopenharmony_ci insert_file_live_out_moves(ctx, &ctx->shared); 2220bf215546Sopenharmony_ci} 2221bf215546Sopenharmony_ci 2222bf215546Sopenharmony_cistatic void 2223bf215546Sopenharmony_cihandle_block(struct ra_ctx *ctx, struct ir3_block *block) 2224bf215546Sopenharmony_ci{ 2225bf215546Sopenharmony_ci ctx->block = block; 2226bf215546Sopenharmony_ci 2227bf215546Sopenharmony_ci /* Reset the register files from the last block */ 2228bf215546Sopenharmony_ci ra_file_init(&ctx->full); 2229bf215546Sopenharmony_ci ra_file_init(&ctx->half); 2230bf215546Sopenharmony_ci ra_file_init(&ctx->shared); 2231bf215546Sopenharmony_ci 2232bf215546Sopenharmony_ci /* Handle live-ins, phis, and input meta-instructions. These all appear 2233bf215546Sopenharmony_ci * live at the beginning of the block, and interfere with each other 2234bf215546Sopenharmony_ci * therefore need to be allocated "in parallel". This means that we 2235bf215546Sopenharmony_ci * have to allocate all of them, inserting them into the file, and then 2236bf215546Sopenharmony_ci * delay updating the IR until all of them are allocated. 2237bf215546Sopenharmony_ci * 2238bf215546Sopenharmony_ci * Handle precolored inputs first, because we need to make sure that other 2239bf215546Sopenharmony_ci * inputs don't overwrite them. We shouldn't have both live-ins/phi nodes 2240bf215546Sopenharmony_ci * and inputs at the same time, because the first block doesn't have 2241bf215546Sopenharmony_ci * predecessors. Therefore handle_live_in doesn't have to worry about 2242bf215546Sopenharmony_ci * them. 2243bf215546Sopenharmony_ci */ 2244bf215546Sopenharmony_ci 2245bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 2246bf215546Sopenharmony_ci if (instr->opc == OPC_META_INPUT) 2247bf215546Sopenharmony_ci handle_precolored_input(ctx, instr); 2248bf215546Sopenharmony_ci else 2249bf215546Sopenharmony_ci break; 2250bf215546Sopenharmony_ci } 2251bf215546Sopenharmony_ci 2252bf215546Sopenharmony_ci unsigned name; 2253bf215546Sopenharmony_ci BITSET_FOREACH_SET (name, ctx->live->live_in[block->index], 2254bf215546Sopenharmony_ci ctx->live->definitions_count) { 2255bf215546Sopenharmony_ci struct ir3_register *reg = ctx->live->definitions[name]; 2256bf215546Sopenharmony_ci handle_live_in(ctx, reg); 2257bf215546Sopenharmony_ci } 2258bf215546Sopenharmony_ci 2259bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 2260bf215546Sopenharmony_ci if (instr->opc == OPC_META_PHI) 2261bf215546Sopenharmony_ci handle_phi(ctx, instr->dsts[0]); 2262bf215546Sopenharmony_ci else if (instr->opc == OPC_META_INPUT || 2263bf215546Sopenharmony_ci instr->opc == OPC_META_TEX_PREFETCH) 2264bf215546Sopenharmony_ci handle_input(ctx, instr); 2265bf215546Sopenharmony_ci else 2266bf215546Sopenharmony_ci break; 2267bf215546Sopenharmony_ci } 2268bf215546Sopenharmony_ci 2269bf215546Sopenharmony_ci /* After this point, every live-in/phi/input has an interval assigned to 2270bf215546Sopenharmony_ci * it. We delay actually assigning values until everything has been 2271bf215546Sopenharmony_ci * allocated, so we can simply ignore any parallel copy entries created 2272bf215546Sopenharmony_ci * when shuffling them around. 2273bf215546Sopenharmony_ci */ 2274bf215546Sopenharmony_ci ctx->parallel_copies_count = 0; 2275bf215546Sopenharmony_ci 2276bf215546Sopenharmony_ci insert_live_in_moves(ctx); 2277bf215546Sopenharmony_ci 2278bf215546Sopenharmony_ci if (RA_DEBUG) { 2279bf215546Sopenharmony_ci d("after live-in block %u:\n", block->index); 2280bf215546Sopenharmony_ci ra_ctx_dump(ctx); 2281bf215546Sopenharmony_ci } 2282bf215546Sopenharmony_ci 2283bf215546Sopenharmony_ci /* Now we're done with processing live-ins, and can handle the body of the 2284bf215546Sopenharmony_ci * block. 2285bf215546Sopenharmony_ci */ 2286bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 2287bf215546Sopenharmony_ci di(instr, "processing"); 2288bf215546Sopenharmony_ci 2289bf215546Sopenharmony_ci if (instr->opc == OPC_META_PHI) 2290bf215546Sopenharmony_ci assign_phi(ctx, instr); 2291bf215546Sopenharmony_ci else if (instr->opc == OPC_META_INPUT || 2292bf215546Sopenharmony_ci instr->opc == OPC_META_TEX_PREFETCH) 2293bf215546Sopenharmony_ci assign_input(ctx, instr); 2294bf215546Sopenharmony_ci else if (instr->opc == OPC_META_SPLIT) 2295bf215546Sopenharmony_ci handle_split(ctx, instr); 2296bf215546Sopenharmony_ci else if (instr->opc == OPC_META_COLLECT) 2297bf215546Sopenharmony_ci handle_collect(ctx, instr); 2298bf215546Sopenharmony_ci else if (instr->opc == OPC_META_PARALLEL_COPY) 2299bf215546Sopenharmony_ci handle_pcopy(ctx, instr); 2300bf215546Sopenharmony_ci else if (instr->opc == OPC_CHMASK) 2301bf215546Sopenharmony_ci handle_chmask(ctx, instr); 2302bf215546Sopenharmony_ci else 2303bf215546Sopenharmony_ci handle_normal_instr(ctx, instr); 2304bf215546Sopenharmony_ci 2305bf215546Sopenharmony_ci if (RA_DEBUG) 2306bf215546Sopenharmony_ci ra_ctx_dump(ctx); 2307bf215546Sopenharmony_ci } 2308bf215546Sopenharmony_ci 2309bf215546Sopenharmony_ci insert_live_out_moves(ctx); 2310bf215546Sopenharmony_ci 2311bf215546Sopenharmony_ci BITSET_FOREACH_SET (name, ctx->live->live_out[block->index], 2312bf215546Sopenharmony_ci ctx->live->definitions_count) { 2313bf215546Sopenharmony_ci struct ir3_register *reg = ctx->live->definitions[name]; 2314bf215546Sopenharmony_ci handle_live_out(ctx, reg); 2315bf215546Sopenharmony_ci } 2316bf215546Sopenharmony_ci 2317bf215546Sopenharmony_ci ctx->blocks[block->index].visited = true; 2318bf215546Sopenharmony_ci} 2319bf215546Sopenharmony_ci 2320bf215546Sopenharmony_cistatic unsigned 2321bf215546Sopenharmony_cicalc_target_full_pressure(struct ir3_shader_variant *v, unsigned pressure) 2322bf215546Sopenharmony_ci{ 2323bf215546Sopenharmony_ci /* Registers are allocated in units of vec4, so switch from units of 2324bf215546Sopenharmony_ci * half-regs to vec4. 2325bf215546Sopenharmony_ci */ 2326bf215546Sopenharmony_ci unsigned reg_count = DIV_ROUND_UP(pressure, 2 * 4); 2327bf215546Sopenharmony_ci 2328bf215546Sopenharmony_ci bool double_threadsize = ir3_should_double_threadsize(v, reg_count); 2329bf215546Sopenharmony_ci 2330bf215546Sopenharmony_ci unsigned target = reg_count; 2331bf215546Sopenharmony_ci unsigned reg_independent_max_waves = 2332bf215546Sopenharmony_ci ir3_get_reg_independent_max_waves(v, double_threadsize); 2333bf215546Sopenharmony_ci unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves( 2334bf215546Sopenharmony_ci v->compiler, reg_count, double_threadsize); 2335bf215546Sopenharmony_ci unsigned target_waves = 2336bf215546Sopenharmony_ci MIN2(reg_independent_max_waves, reg_dependent_max_waves); 2337bf215546Sopenharmony_ci 2338bf215546Sopenharmony_ci while (target <= RA_FULL_SIZE / (2 * 4) && 2339bf215546Sopenharmony_ci ir3_should_double_threadsize(v, target) == double_threadsize && 2340bf215546Sopenharmony_ci ir3_get_reg_dependent_max_waves(v->compiler, target, 2341bf215546Sopenharmony_ci double_threadsize) >= target_waves) 2342bf215546Sopenharmony_ci target++; 2343bf215546Sopenharmony_ci 2344bf215546Sopenharmony_ci return (target - 1) * 2 * 4; 2345bf215546Sopenharmony_ci} 2346bf215546Sopenharmony_ci 2347bf215546Sopenharmony_cistatic void 2348bf215546Sopenharmony_ciadd_pressure(struct ir3_pressure *pressure, struct ir3_register *reg, 2349bf215546Sopenharmony_ci bool merged_regs) 2350bf215546Sopenharmony_ci{ 2351bf215546Sopenharmony_ci unsigned size = reg_size(reg); 2352bf215546Sopenharmony_ci if (reg->flags & IR3_REG_HALF) 2353bf215546Sopenharmony_ci pressure->half += size; 2354bf215546Sopenharmony_ci if (!(reg->flags & IR3_REG_HALF) || merged_regs) 2355bf215546Sopenharmony_ci pressure->full += size; 2356bf215546Sopenharmony_ci} 2357bf215546Sopenharmony_ci 2358bf215546Sopenharmony_cistatic void 2359bf215546Sopenharmony_cidummy_interval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval) 2360bf215546Sopenharmony_ci{ 2361bf215546Sopenharmony_ci} 2362bf215546Sopenharmony_ci 2363bf215546Sopenharmony_cistatic void 2364bf215546Sopenharmony_cidummy_interval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval) 2365bf215546Sopenharmony_ci{ 2366bf215546Sopenharmony_ci} 2367bf215546Sopenharmony_ci 2368bf215546Sopenharmony_cistatic void 2369bf215546Sopenharmony_cidummy_interval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *parent, 2370bf215546Sopenharmony_ci struct ir3_reg_interval *child) 2371bf215546Sopenharmony_ci{ 2372bf215546Sopenharmony_ci} 2373bf215546Sopenharmony_ci 2374bf215546Sopenharmony_ci/* Calculate the minimum possible limit on register pressure so that spilling 2375bf215546Sopenharmony_ci * still succeeds. Used to implement IR3_SHADER_DEBUG=spillall. 2376bf215546Sopenharmony_ci */ 2377bf215546Sopenharmony_ci 2378bf215546Sopenharmony_cistatic void 2379bf215546Sopenharmony_cicalc_min_limit_pressure(struct ir3_shader_variant *v, 2380bf215546Sopenharmony_ci struct ir3_liveness *live, 2381bf215546Sopenharmony_ci struct ir3_pressure *limit) 2382bf215546Sopenharmony_ci{ 2383bf215546Sopenharmony_ci struct ir3_block *start = ir3_start_block(v->ir); 2384bf215546Sopenharmony_ci struct ir3_reg_ctx *ctx = ralloc(NULL, struct ir3_reg_ctx); 2385bf215546Sopenharmony_ci struct ir3_reg_interval *intervals = 2386bf215546Sopenharmony_ci rzalloc_array(ctx, struct ir3_reg_interval, live->definitions_count); 2387bf215546Sopenharmony_ci 2388bf215546Sopenharmony_ci ctx->interval_add = dummy_interval_add; 2389bf215546Sopenharmony_ci ctx->interval_delete = dummy_interval_delete; 2390bf215546Sopenharmony_ci ctx->interval_readd = dummy_interval_readd; 2391bf215546Sopenharmony_ci 2392bf215546Sopenharmony_ci limit->full = limit->half = 0; 2393bf215546Sopenharmony_ci 2394bf215546Sopenharmony_ci struct ir3_pressure cur_pressure = {0}; 2395bf215546Sopenharmony_ci foreach_instr (input, &start->instr_list) { 2396bf215546Sopenharmony_ci if (input->opc != OPC_META_INPUT && 2397bf215546Sopenharmony_ci input->opc != OPC_META_TEX_PREFETCH) 2398bf215546Sopenharmony_ci break; 2399bf215546Sopenharmony_ci 2400bf215546Sopenharmony_ci add_pressure(&cur_pressure, input->dsts[0], v->mergedregs); 2401bf215546Sopenharmony_ci } 2402bf215546Sopenharmony_ci 2403bf215546Sopenharmony_ci limit->full = MAX2(limit->full, cur_pressure.full); 2404bf215546Sopenharmony_ci limit->half = MAX2(limit->half, cur_pressure.half); 2405bf215546Sopenharmony_ci 2406bf215546Sopenharmony_ci foreach_instr (input, &start->instr_list) { 2407bf215546Sopenharmony_ci if (input->opc != OPC_META_INPUT && 2408bf215546Sopenharmony_ci input->opc != OPC_META_TEX_PREFETCH) 2409bf215546Sopenharmony_ci break; 2410bf215546Sopenharmony_ci 2411bf215546Sopenharmony_ci /* pre-colored inputs may have holes, which increases the pressure. */ 2412bf215546Sopenharmony_ci struct ir3_register *dst = input->dsts[0]; 2413bf215546Sopenharmony_ci if (dst->num != INVALID_REG) { 2414bf215546Sopenharmony_ci unsigned physreg = ra_reg_get_physreg(dst) + reg_size(dst); 2415bf215546Sopenharmony_ci if (dst->flags & IR3_REG_HALF) 2416bf215546Sopenharmony_ci limit->half = MAX2(limit->half, physreg); 2417bf215546Sopenharmony_ci if (!(dst->flags & IR3_REG_HALF) || v->mergedregs) 2418bf215546Sopenharmony_ci limit->full = MAX2(limit->full, physreg); 2419bf215546Sopenharmony_ci } 2420bf215546Sopenharmony_ci } 2421bf215546Sopenharmony_ci 2422bf215546Sopenharmony_ci foreach_block (block, &v->ir->block_list) { 2423bf215546Sopenharmony_ci rb_tree_init(&ctx->intervals); 2424bf215546Sopenharmony_ci 2425bf215546Sopenharmony_ci unsigned name; 2426bf215546Sopenharmony_ci BITSET_FOREACH_SET (name, live->live_in[block->index], 2427bf215546Sopenharmony_ci live->definitions_count) { 2428bf215546Sopenharmony_ci struct ir3_register *reg = live->definitions[name]; 2429bf215546Sopenharmony_ci ir3_reg_interval_init(&intervals[reg->name], reg); 2430bf215546Sopenharmony_ci ir3_reg_interval_insert(ctx, &intervals[reg->name]); 2431bf215546Sopenharmony_ci } 2432bf215546Sopenharmony_ci 2433bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 2434bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 2435bf215546Sopenharmony_ci ir3_reg_interval_init(&intervals[dst->name], dst); 2436bf215546Sopenharmony_ci } 2437bf215546Sopenharmony_ci /* phis and parallel copies can be deleted via spilling */ 2438bf215546Sopenharmony_ci 2439bf215546Sopenharmony_ci if (instr->opc == OPC_META_PHI) { 2440bf215546Sopenharmony_ci ir3_reg_interval_insert(ctx, &intervals[instr->dsts[0]->name]); 2441bf215546Sopenharmony_ci continue; 2442bf215546Sopenharmony_ci } 2443bf215546Sopenharmony_ci 2444bf215546Sopenharmony_ci if (instr->opc == OPC_META_PARALLEL_COPY) 2445bf215546Sopenharmony_ci continue; 2446bf215546Sopenharmony_ci 2447bf215546Sopenharmony_ci cur_pressure = (struct ir3_pressure) {0}; 2448bf215546Sopenharmony_ci 2449bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 2450bf215546Sopenharmony_ci if (dst->tied && !(dst->tied->flags & IR3_REG_KILL)) 2451bf215546Sopenharmony_ci add_pressure(&cur_pressure, dst, v->mergedregs); 2452bf215546Sopenharmony_ci } 2453bf215546Sopenharmony_ci 2454bf215546Sopenharmony_ci ra_foreach_src_rev (src, instr) { 2455bf215546Sopenharmony_ci /* We currently don't support spilling the parent of a source when 2456bf215546Sopenharmony_ci * making space for sources, so we have to keep track of the 2457bf215546Sopenharmony_ci * intervals and figure out the root of the tree to figure out how 2458bf215546Sopenharmony_ci * much space we need. 2459bf215546Sopenharmony_ci * 2460bf215546Sopenharmony_ci * TODO: We should probably support this in the spiller. 2461bf215546Sopenharmony_ci */ 2462bf215546Sopenharmony_ci struct ir3_reg_interval *interval = &intervals[src->def->name]; 2463bf215546Sopenharmony_ci while (interval->parent) 2464bf215546Sopenharmony_ci interval = interval->parent; 2465bf215546Sopenharmony_ci add_pressure(&cur_pressure, interval->reg, v->mergedregs); 2466bf215546Sopenharmony_ci 2467bf215546Sopenharmony_ci if (src->flags & IR3_REG_FIRST_KILL) 2468bf215546Sopenharmony_ci ir3_reg_interval_remove(ctx, &intervals[src->def->name]); 2469bf215546Sopenharmony_ci } 2470bf215546Sopenharmony_ci 2471bf215546Sopenharmony_ci limit->full = MAX2(limit->full, cur_pressure.full); 2472bf215546Sopenharmony_ci limit->half = MAX2(limit->half, cur_pressure.half); 2473bf215546Sopenharmony_ci 2474bf215546Sopenharmony_ci cur_pressure = (struct ir3_pressure) {0}; 2475bf215546Sopenharmony_ci 2476bf215546Sopenharmony_ci ra_foreach_dst (dst, instr) { 2477bf215546Sopenharmony_ci ir3_reg_interval_init(&intervals[dst->name], dst); 2478bf215546Sopenharmony_ci ir3_reg_interval_insert(ctx, &intervals[dst->name]); 2479bf215546Sopenharmony_ci add_pressure(&cur_pressure, dst, v->mergedregs); 2480bf215546Sopenharmony_ci } 2481bf215546Sopenharmony_ci 2482bf215546Sopenharmony_ci limit->full = MAX2(limit->full, cur_pressure.full); 2483bf215546Sopenharmony_ci limit->half = MAX2(limit->half, cur_pressure.half); 2484bf215546Sopenharmony_ci } 2485bf215546Sopenharmony_ci } 2486bf215546Sopenharmony_ci 2487bf215546Sopenharmony_ci /* Account for the base register, which needs to be available everywhere. */ 2488bf215546Sopenharmony_ci limit->full += 2; 2489bf215546Sopenharmony_ci 2490bf215546Sopenharmony_ci ralloc_free(ctx); 2491bf215546Sopenharmony_ci} 2492bf215546Sopenharmony_ci 2493bf215546Sopenharmony_ci/* 2494bf215546Sopenharmony_ci * If barriers are used, it must be possible for all waves in the workgroup 2495bf215546Sopenharmony_ci * to execute concurrently. Thus we may have to reduce the registers limit. 2496bf215546Sopenharmony_ci */ 2497bf215546Sopenharmony_cistatic void 2498bf215546Sopenharmony_cicalc_limit_pressure_for_cs_with_barrier(struct ir3_shader_variant *v, 2499bf215546Sopenharmony_ci struct ir3_pressure *limit_pressure) 2500bf215546Sopenharmony_ci{ 2501bf215546Sopenharmony_ci const struct ir3_compiler *compiler = v->compiler; 2502bf215546Sopenharmony_ci 2503bf215546Sopenharmony_ci unsigned threads_per_wg; 2504bf215546Sopenharmony_ci if (v->local_size_variable) { 2505bf215546Sopenharmony_ci /* We have to expect the worst case. */ 2506bf215546Sopenharmony_ci threads_per_wg = compiler->max_variable_workgroup_size; 2507bf215546Sopenharmony_ci } else { 2508bf215546Sopenharmony_ci threads_per_wg = v->local_size[0] * v->local_size[1] * v->local_size[2]; 2509bf215546Sopenharmony_ci } 2510bf215546Sopenharmony_ci 2511bf215546Sopenharmony_ci /* The register file is grouped into reg_size_vec4 number of parts. 2512bf215546Sopenharmony_ci * Each part has enough registers to add a single vec4 register to 2513bf215546Sopenharmony_ci * each thread of a single-sized wave-pair. With double threadsize 2514bf215546Sopenharmony_ci * each wave-pair would consume two parts of the register file to get 2515bf215546Sopenharmony_ci * a single vec4 for a thread. The more active wave-pairs the less 2516bf215546Sopenharmony_ci * parts each could get. 2517bf215546Sopenharmony_ci */ 2518bf215546Sopenharmony_ci 2519bf215546Sopenharmony_ci bool double_threadsize = ir3_should_double_threadsize(v, 0); 2520bf215546Sopenharmony_ci unsigned waves_per_wg = DIV_ROUND_UP( 2521bf215546Sopenharmony_ci threads_per_wg, compiler->threadsize_base * (double_threadsize ? 2 : 1) * 2522bf215546Sopenharmony_ci compiler->wave_granularity); 2523bf215546Sopenharmony_ci 2524bf215546Sopenharmony_ci uint32_t vec4_regs_per_thread = 2525bf215546Sopenharmony_ci compiler->reg_size_vec4 / (waves_per_wg * (double_threadsize ? 2 : 1)); 2526bf215546Sopenharmony_ci assert(vec4_regs_per_thread > 0); 2527bf215546Sopenharmony_ci 2528bf215546Sopenharmony_ci uint32_t half_regs_per_thread = vec4_regs_per_thread * 4 * 2; 2529bf215546Sopenharmony_ci 2530bf215546Sopenharmony_ci if (limit_pressure->full > half_regs_per_thread) { 2531bf215546Sopenharmony_ci if (v->mergedregs) { 2532bf215546Sopenharmony_ci limit_pressure->full = half_regs_per_thread; 2533bf215546Sopenharmony_ci } else { 2534bf215546Sopenharmony_ci /* TODO: Handle !mergedregs case, probably we would have to do this 2535bf215546Sopenharmony_ci * after the first register pressure pass. 2536bf215546Sopenharmony_ci */ 2537bf215546Sopenharmony_ci } 2538bf215546Sopenharmony_ci } 2539bf215546Sopenharmony_ci} 2540bf215546Sopenharmony_ci 2541bf215546Sopenharmony_ciint 2542bf215546Sopenharmony_ciir3_ra(struct ir3_shader_variant *v) 2543bf215546Sopenharmony_ci{ 2544bf215546Sopenharmony_ci ir3_calc_dominance(v->ir); 2545bf215546Sopenharmony_ci 2546bf215546Sopenharmony_ci ir3_create_parallel_copies(v->ir); 2547bf215546Sopenharmony_ci 2548bf215546Sopenharmony_ci struct ra_ctx *ctx = rzalloc(NULL, struct ra_ctx); 2549bf215546Sopenharmony_ci 2550bf215546Sopenharmony_ci ctx->merged_regs = v->mergedregs; 2551bf215546Sopenharmony_ci ctx->compiler = v->compiler; 2552bf215546Sopenharmony_ci ctx->stage = v->type; 2553bf215546Sopenharmony_ci 2554bf215546Sopenharmony_ci struct ir3_liveness *live = ir3_calc_liveness(ctx, v->ir); 2555bf215546Sopenharmony_ci 2556bf215546Sopenharmony_ci ir3_debug_print(v->ir, "AFTER: create_parallel_copies"); 2557bf215546Sopenharmony_ci 2558bf215546Sopenharmony_ci ir3_merge_regs(live, v->ir); 2559bf215546Sopenharmony_ci 2560bf215546Sopenharmony_ci struct ir3_pressure max_pressure; 2561bf215546Sopenharmony_ci ir3_calc_pressure(v, live, &max_pressure); 2562bf215546Sopenharmony_ci d("max pressure:"); 2563bf215546Sopenharmony_ci d("\tfull: %u", max_pressure.full); 2564bf215546Sopenharmony_ci d("\thalf: %u", max_pressure.half); 2565bf215546Sopenharmony_ci d("\tshared: %u", max_pressure.shared); 2566bf215546Sopenharmony_ci 2567bf215546Sopenharmony_ci struct ir3_pressure limit_pressure; 2568bf215546Sopenharmony_ci limit_pressure.full = RA_FULL_SIZE; 2569bf215546Sopenharmony_ci limit_pressure.half = RA_HALF_SIZE; 2570bf215546Sopenharmony_ci limit_pressure.shared = RA_SHARED_SIZE; 2571bf215546Sopenharmony_ci 2572bf215546Sopenharmony_ci if (gl_shader_stage_is_compute(v->type) && v->has_barrier) { 2573bf215546Sopenharmony_ci calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure); 2574bf215546Sopenharmony_ci } 2575bf215546Sopenharmony_ci 2576bf215546Sopenharmony_ci /* If the user forces a doubled threadsize, we may have to lower the limit 2577bf215546Sopenharmony_ci * because on some gens the register file is not big enough to hold a 2578bf215546Sopenharmony_ci * double-size wave with all 48 registers in use. 2579bf215546Sopenharmony_ci */ 2580bf215546Sopenharmony_ci if (v->real_wavesize == IR3_DOUBLE_ONLY) { 2581bf215546Sopenharmony_ci limit_pressure.full = 2582bf215546Sopenharmony_ci MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16); 2583bf215546Sopenharmony_ci } 2584bf215546Sopenharmony_ci 2585bf215546Sopenharmony_ci /* If requested, lower the limit so that spilling happens more often. */ 2586bf215546Sopenharmony_ci if (ir3_shader_debug & IR3_DBG_SPILLALL) 2587bf215546Sopenharmony_ci calc_min_limit_pressure(v, live, &limit_pressure); 2588bf215546Sopenharmony_ci 2589bf215546Sopenharmony_ci if (max_pressure.shared > limit_pressure.shared) { 2590bf215546Sopenharmony_ci /* TODO shared reg -> normal reg spilling */ 2591bf215546Sopenharmony_ci d("shared max pressure exceeded!"); 2592bf215546Sopenharmony_ci goto fail; 2593bf215546Sopenharmony_ci } 2594bf215546Sopenharmony_ci 2595bf215546Sopenharmony_ci bool spilled = false; 2596bf215546Sopenharmony_ci if (max_pressure.full > limit_pressure.full || 2597bf215546Sopenharmony_ci max_pressure.half > limit_pressure.half) { 2598bf215546Sopenharmony_ci if (!v->compiler->has_pvtmem) { 2599bf215546Sopenharmony_ci d("max pressure exceeded!"); 2600bf215546Sopenharmony_ci goto fail; 2601bf215546Sopenharmony_ci } 2602bf215546Sopenharmony_ci d("max pressure exceeded, spilling!"); 2603bf215546Sopenharmony_ci IR3_PASS(v->ir, ir3_spill, v, &live, &limit_pressure); 2604bf215546Sopenharmony_ci ir3_calc_pressure(v, live, &max_pressure); 2605bf215546Sopenharmony_ci assert(max_pressure.full <= limit_pressure.full && 2606bf215546Sopenharmony_ci max_pressure.half <= limit_pressure.half); 2607bf215546Sopenharmony_ci spilled = true; 2608bf215546Sopenharmony_ci } 2609bf215546Sopenharmony_ci 2610bf215546Sopenharmony_ci ctx->live = live; 2611bf215546Sopenharmony_ci ctx->intervals = 2612bf215546Sopenharmony_ci rzalloc_array(ctx, struct ra_interval, live->definitions_count); 2613bf215546Sopenharmony_ci ctx->blocks = rzalloc_array(ctx, struct ra_block_state, live->block_count); 2614bf215546Sopenharmony_ci 2615bf215546Sopenharmony_ci ctx->full.size = calc_target_full_pressure(v, max_pressure.full); 2616bf215546Sopenharmony_ci d("full size: %u", ctx->full.size); 2617bf215546Sopenharmony_ci 2618bf215546Sopenharmony_ci if (!v->mergedregs) 2619bf215546Sopenharmony_ci ctx->half.size = RA_HALF_SIZE; 2620bf215546Sopenharmony_ci 2621bf215546Sopenharmony_ci ctx->shared.size = RA_SHARED_SIZE; 2622bf215546Sopenharmony_ci 2623bf215546Sopenharmony_ci ctx->full.start = ctx->half.start = ctx->shared.start = 0; 2624bf215546Sopenharmony_ci 2625bf215546Sopenharmony_ci foreach_block (block, &v->ir->block_list) 2626bf215546Sopenharmony_ci handle_block(ctx, block); 2627bf215546Sopenharmony_ci 2628bf215546Sopenharmony_ci ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count); 2629bf215546Sopenharmony_ci 2630bf215546Sopenharmony_ci /* Strip array-ness and SSA-ness at the end, because various helpers still 2631bf215546Sopenharmony_ci * need to work even on definitions that have already been assigned. For 2632bf215546Sopenharmony_ci * example, we need to preserve array-ness so that array live-ins have the 2633bf215546Sopenharmony_ci * right size. 2634bf215546Sopenharmony_ci */ 2635bf215546Sopenharmony_ci foreach_block (block, &v->ir->block_list) { 2636bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 2637bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->dsts_count; i++) { 2638bf215546Sopenharmony_ci instr->dsts[i]->flags &= ~IR3_REG_SSA; 2639bf215546Sopenharmony_ci 2640bf215546Sopenharmony_ci /* Parallel copies of array registers copy the whole register, and 2641bf215546Sopenharmony_ci * we need some way to let the parallel copy code know that this was 2642bf215546Sopenharmony_ci * an array whose size is determined by reg->size. So keep the array 2643bf215546Sopenharmony_ci * flag on those. spill/reload also need to work on the entire 2644bf215546Sopenharmony_ci * array. 2645bf215546Sopenharmony_ci */ 2646bf215546Sopenharmony_ci if (!is_meta(instr) && instr->opc != OPC_RELOAD_MACRO) 2647bf215546Sopenharmony_ci instr->dsts[i]->flags &= ~IR3_REG_ARRAY; 2648bf215546Sopenharmony_ci } 2649bf215546Sopenharmony_ci 2650bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->srcs_count; i++) { 2651bf215546Sopenharmony_ci instr->srcs[i]->flags &= ~IR3_REG_SSA; 2652bf215546Sopenharmony_ci 2653bf215546Sopenharmony_ci if (!is_meta(instr) && instr->opc != OPC_SPILL_MACRO) 2654bf215546Sopenharmony_ci instr->srcs[i]->flags &= ~IR3_REG_ARRAY; 2655bf215546Sopenharmony_ci } 2656bf215546Sopenharmony_ci } 2657bf215546Sopenharmony_ci } 2658bf215546Sopenharmony_ci 2659bf215546Sopenharmony_ci ir3_debug_print(v->ir, "AFTER: register allocation"); 2660bf215546Sopenharmony_ci 2661bf215546Sopenharmony_ci if (spilled) { 2662bf215546Sopenharmony_ci IR3_PASS(v->ir, ir3_lower_spill); 2663bf215546Sopenharmony_ci } 2664bf215546Sopenharmony_ci 2665bf215546Sopenharmony_ci ir3_lower_copies(v); 2666bf215546Sopenharmony_ci 2667bf215546Sopenharmony_ci ir3_debug_print(v->ir, "AFTER: ir3_lower_copies"); 2668bf215546Sopenharmony_ci 2669bf215546Sopenharmony_ci ralloc_free(ctx); 2670bf215546Sopenharmony_ci 2671bf215546Sopenharmony_ci return 0; 2672bf215546Sopenharmony_cifail: 2673bf215546Sopenharmony_ci ralloc_free(ctx); 2674bf215546Sopenharmony_ci return -1; 2675bf215546Sopenharmony_ci} 2676