1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2022 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22bf215546Sopenharmony_ci * SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "agx_compiler.h" 26bf215546Sopenharmony_ci#include "agx_builder.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* 29bf215546Sopenharmony_ci * Emits code for 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * for (int i = 0; i < n; ++i) 32bf215546Sopenharmony_ci * registers[dests[i]] = registers[srcs[i]]; 33bf215546Sopenharmony_ci * 34bf215546Sopenharmony_ci * ...with all copies happening in parallel. 35bf215546Sopenharmony_ci * 36bf215546Sopenharmony_ci * That is, emit machine instructions equivalent to a parallel copy. This is 37bf215546Sopenharmony_ci * used to lower not only parallel copies but also collects and splits, which 38bf215546Sopenharmony_ci * also have parallel copy semantics. 39bf215546Sopenharmony_ci * 40bf215546Sopenharmony_ci * We only handles register-register copies, not general agx_index sources. This 41bf215546Sopenharmony_ci * suffices for its internal use for register allocation. 42bf215546Sopenharmony_ci */ 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_cistatic void 45bf215546Sopenharmony_cido_copy(agx_builder *b, const struct agx_copy *copy) 46bf215546Sopenharmony_ci{ 47bf215546Sopenharmony_ci agx_mov_to(b, agx_register(copy->dest, copy->size), 48bf215546Sopenharmony_ci agx_register(copy->src, copy->size)); 49bf215546Sopenharmony_ci} 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_cistatic void 52bf215546Sopenharmony_cido_swap(agx_builder *b, const struct agx_copy *copy) 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci if (copy->dest == copy->src) 55bf215546Sopenharmony_ci return; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci agx_index x = agx_register(copy->dest, copy->size); 58bf215546Sopenharmony_ci agx_index y = agx_register(copy->src, copy->size); 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci agx_xor_to(b, x, x, y); 61bf215546Sopenharmony_ci agx_xor_to(b, y, x, y); 62bf215546Sopenharmony_ci agx_xor_to(b, x, x, y); 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_cistruct copy_ctx { 66bf215546Sopenharmony_ci /* Number of copies being processed */ 67bf215546Sopenharmony_ci unsigned entry_count; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci /* For each physreg, the number of pending copy entries that use it as a 70bf215546Sopenharmony_ci * source. Once this drops to zero, then the physreg is unblocked and can 71bf215546Sopenharmony_ci * be moved to. 72bf215546Sopenharmony_ci */ 73bf215546Sopenharmony_ci unsigned physreg_use_count[AGX_NUM_REGS]; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci /* For each physreg, the pending copy_entry that uses it as a dest. */ 76bf215546Sopenharmony_ci struct agx_copy *physreg_dest[AGX_NUM_REGS]; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci struct agx_copy entries[AGX_NUM_REGS]; 79bf215546Sopenharmony_ci}; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_cistatic bool 82bf215546Sopenharmony_cientry_blocked(struct agx_copy *entry, struct copy_ctx *ctx) 83bf215546Sopenharmony_ci{ 84bf215546Sopenharmony_ci for (unsigned i = 0; i < agx_size_align_16(entry->size); i++) { 85bf215546Sopenharmony_ci if (ctx->physreg_use_count[entry->dest + i] != 0) 86bf215546Sopenharmony_ci return true; 87bf215546Sopenharmony_ci } 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci return false; 90bf215546Sopenharmony_ci} 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_cistatic bool 93bf215546Sopenharmony_ciis_real(struct agx_copy *entry) 94bf215546Sopenharmony_ci{ 95bf215546Sopenharmony_ci /* TODO: Allow immediates in agx_copy */ 96bf215546Sopenharmony_ci return true; 97bf215546Sopenharmony_ci} 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci/* TODO: Generalize to other bit sizes */ 100bf215546Sopenharmony_cistatic void 101bf215546Sopenharmony_cisplit_32bit_copy(struct copy_ctx *ctx, struct agx_copy *entry) 102bf215546Sopenharmony_ci{ 103bf215546Sopenharmony_ci assert(!entry->done); 104bf215546Sopenharmony_ci assert(is_real(entry)); 105bf215546Sopenharmony_ci assert(agx_size_align_16(entry->size) == 2); 106bf215546Sopenharmony_ci struct agx_copy *new_entry = &ctx->entries[ctx->entry_count++]; 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci new_entry->dest = entry->dest + 1; 109bf215546Sopenharmony_ci new_entry->src = entry->src + 1; 110bf215546Sopenharmony_ci new_entry->done = false; 111bf215546Sopenharmony_ci entry->size = AGX_SIZE_16; 112bf215546Sopenharmony_ci new_entry->size = AGX_SIZE_16; 113bf215546Sopenharmony_ci ctx->physreg_dest[entry->dest + 1] = new_entry; 114bf215546Sopenharmony_ci} 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_civoid 117bf215546Sopenharmony_ciagx_emit_parallel_copies(agx_builder *b, 118bf215546Sopenharmony_ci struct agx_copy *copies, 119bf215546Sopenharmony_ci unsigned num_copies) 120bf215546Sopenharmony_ci{ 121bf215546Sopenharmony_ci struct copy_ctx _ctx = { 122bf215546Sopenharmony_ci .entry_count = num_copies 123bf215546Sopenharmony_ci }; 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci struct copy_ctx *ctx = &_ctx; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci /* Set up the bookkeeping */ 128bf215546Sopenharmony_ci memset(ctx->physreg_dest, 0, sizeof(ctx->physreg_dest)); 129bf215546Sopenharmony_ci memset(ctx->physreg_use_count, 0, sizeof(ctx->physreg_use_count)); 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->entry_count; i++) { 132bf215546Sopenharmony_ci struct agx_copy *entry = &copies[i]; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci ctx->entries[i] = *entry; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) { 137bf215546Sopenharmony_ci if (is_real(entry)) 138bf215546Sopenharmony_ci ctx->physreg_use_count[entry->src + j]++; 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci /* Copies should not have overlapping destinations. */ 141bf215546Sopenharmony_ci assert(!ctx->physreg_dest[entry->dest + j]); 142bf215546Sopenharmony_ci ctx->physreg_dest[entry->dest + j] = entry; 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci bool progress = true; 147bf215546Sopenharmony_ci while (progress) { 148bf215546Sopenharmony_ci progress = false; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci /* Step 1: resolve paths in the transfer graph. This means finding 151bf215546Sopenharmony_ci * copies whose destination aren't blocked by something else and then 152bf215546Sopenharmony_ci * emitting them, continuing this process until every copy is blocked 153bf215546Sopenharmony_ci * and there are only cycles left. 154bf215546Sopenharmony_ci * 155bf215546Sopenharmony_ci * TODO: We should note that src is also available in dest to unblock 156bf215546Sopenharmony_ci * cycles that src is involved in. 157bf215546Sopenharmony_ci */ 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->entry_count; i++) { 160bf215546Sopenharmony_ci struct agx_copy *entry = &ctx->entries[i]; 161bf215546Sopenharmony_ci if (!entry->done && !entry_blocked(entry, ctx)) { 162bf215546Sopenharmony_ci entry->done = true; 163bf215546Sopenharmony_ci progress = true; 164bf215546Sopenharmony_ci do_copy(b, entry); 165bf215546Sopenharmony_ci for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) { 166bf215546Sopenharmony_ci if (is_real(entry)) 167bf215546Sopenharmony_ci ctx->physreg_use_count[entry->src + j]--; 168bf215546Sopenharmony_ci ctx->physreg_dest[entry->dest + j] = NULL; 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci } 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci if (progress) 174bf215546Sopenharmony_ci continue; 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci /* Step 2: Find partially blocked copies and split them. In the 177bf215546Sopenharmony_ci * mergedregs case, we can 32-bit copies which are only blocked on one 178bf215546Sopenharmony_ci * 16-bit half, and splitting them helps get things moving. 179bf215546Sopenharmony_ci * 180bf215546Sopenharmony_ci * We can skip splitting copies if the source isn't a register, 181bf215546Sopenharmony_ci * however, because it does not unblock anything and therefore doesn't 182bf215546Sopenharmony_ci * contribute to making forward progress with step 1. These copies 183bf215546Sopenharmony_ci * should still be resolved eventually in step 1 because they can't be 184bf215546Sopenharmony_ci * part of a cycle. 185bf215546Sopenharmony_ci */ 186bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->entry_count; i++) { 187bf215546Sopenharmony_ci struct agx_copy *entry = &ctx->entries[i]; 188bf215546Sopenharmony_ci if (entry->done || (agx_size_align_16(entry->size) != 2)) 189bf215546Sopenharmony_ci continue; 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci if (((ctx->physreg_use_count[entry->dest] == 0 || 192bf215546Sopenharmony_ci ctx->physreg_use_count[entry->dest + 1] == 0)) && 193bf215546Sopenharmony_ci is_real(entry)) { 194bf215546Sopenharmony_ci split_32bit_copy(ctx, entry); 195bf215546Sopenharmony_ci progress = true; 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci } 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci /* Step 3: resolve cycles through swapping. 201bf215546Sopenharmony_ci * 202bf215546Sopenharmony_ci * At this point, the transfer graph should consist of only cycles. 203bf215546Sopenharmony_ci * The reason is that, given any physreg n_1 that's the source of a 204bf215546Sopenharmony_ci * remaining entry, it has a destination n_2, which (because every 205bf215546Sopenharmony_ci * copy is blocked) is the source of some other copy whose destination 206bf215546Sopenharmony_ci * is n_3, and so we can follow the chain until we get a cycle. If we 207bf215546Sopenharmony_ci * reached some other node than n_1: 208bf215546Sopenharmony_ci * 209bf215546Sopenharmony_ci * n_1 -> n_2 -> ... -> n_i 210bf215546Sopenharmony_ci * ^ | 211bf215546Sopenharmony_ci * |-------------| 212bf215546Sopenharmony_ci * 213bf215546Sopenharmony_ci * then n_2 would be the destination of 2 copies, which is illegal 214bf215546Sopenharmony_ci * (checked above in an assert). So n_1 must be part of a cycle: 215bf215546Sopenharmony_ci * 216bf215546Sopenharmony_ci * n_1 -> n_2 -> ... -> n_i 217bf215546Sopenharmony_ci * ^ | 218bf215546Sopenharmony_ci * |---------------------| 219bf215546Sopenharmony_ci * 220bf215546Sopenharmony_ci * and this must be only cycle n_1 is involved in, because any other 221bf215546Sopenharmony_ci * path starting from n_1 would also have to end in n_1, resulting in 222bf215546Sopenharmony_ci * a node somewhere along the way being the destination of 2 copies 223bf215546Sopenharmony_ci * when the 2 paths merge. 224bf215546Sopenharmony_ci * 225bf215546Sopenharmony_ci * The way we resolve the cycle is through picking a copy (n_1, n_2) 226bf215546Sopenharmony_ci * and swapping n_1 and n_2. This moves n_1 to n_2, so n_2 is taken 227bf215546Sopenharmony_ci * out of the cycle: 228bf215546Sopenharmony_ci * 229bf215546Sopenharmony_ci * n_1 -> ... -> n_i 230bf215546Sopenharmony_ci * ^ | 231bf215546Sopenharmony_ci * |--------------| 232bf215546Sopenharmony_ci * 233bf215546Sopenharmony_ci * and we can keep repeating this until the cycle is empty. 234bf215546Sopenharmony_ci */ 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->entry_count; i++) { 237bf215546Sopenharmony_ci struct agx_copy *entry = &ctx->entries[i]; 238bf215546Sopenharmony_ci if (entry->done) 239bf215546Sopenharmony_ci continue; 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci assert(is_real(entry)); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci /* catch trivial copies */ 244bf215546Sopenharmony_ci if (entry->dest == entry->src) { 245bf215546Sopenharmony_ci entry->done = true; 246bf215546Sopenharmony_ci continue; 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci do_swap(b, entry); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci /* Split any blocking copies whose sources are only partially 252bf215546Sopenharmony_ci * contained within our destination. 253bf215546Sopenharmony_ci */ 254bf215546Sopenharmony_ci if (agx_size_align_16(entry->size) == 1) { 255bf215546Sopenharmony_ci for (unsigned j = 0; j < ctx->entry_count; j++) { 256bf215546Sopenharmony_ci struct agx_copy *blocking = &ctx->entries[j]; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci if (blocking->done) 259bf215546Sopenharmony_ci continue; 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci if (blocking->src <= entry->dest && 262bf215546Sopenharmony_ci blocking->src + 1 >= entry->dest && 263bf215546Sopenharmony_ci agx_size_align_16(blocking->size) == 2) { 264bf215546Sopenharmony_ci split_32bit_copy(ctx, blocking); 265bf215546Sopenharmony_ci } 266bf215546Sopenharmony_ci } 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci /* Update sources of blocking copies. 270bf215546Sopenharmony_ci * 271bf215546Sopenharmony_ci * Note: at this point, every blocking copy's source should be 272bf215546Sopenharmony_ci * contained within our destination. 273bf215546Sopenharmony_ci */ 274bf215546Sopenharmony_ci for (unsigned j = 0; j < ctx->entry_count; j++) { 275bf215546Sopenharmony_ci struct agx_copy *blocking = &ctx->entries[j]; 276bf215546Sopenharmony_ci if (blocking->src >= entry->dest && 277bf215546Sopenharmony_ci blocking->src < entry->dest + agx_size_align_16(entry->size)) { 278bf215546Sopenharmony_ci blocking->src = entry->src + (blocking->src - entry->dest); 279bf215546Sopenharmony_ci } 280bf215546Sopenharmony_ci } 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci entry->done = true; 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci} 285